{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 32628, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.0648522741203875e-05, "grad_norm": 96.86287002828071, "learning_rate": 1.0214504596527069e-08, "loss": 2.0634, "step": 1 }, { "epoch": 6.129704548240775e-05, "grad_norm": 59.708437651788095, "learning_rate": 2.0429009193054138e-08, "loss": 1.8793, "step": 2 }, { "epoch": 9.194556822361162e-05, "grad_norm": 5.1830553615654775, "learning_rate": 3.0643513789581204e-08, "loss": 0.7622, "step": 3 }, { "epoch": 0.0001225940909648155, "grad_norm": 80.08499892229985, "learning_rate": 4.0858018386108276e-08, "loss": 2.0761, "step": 4 }, { "epoch": 0.00015324261370601937, "grad_norm": 86.69354824720264, "learning_rate": 5.107252298263535e-08, "loss": 2.1178, "step": 5 }, { "epoch": 0.00018389113644722325, "grad_norm": 65.74291260537014, "learning_rate": 6.128702757916241e-08, "loss": 2.0249, "step": 6 }, { "epoch": 0.00021453965918842712, "grad_norm": 97.35274500380119, "learning_rate": 7.150153217568949e-08, "loss": 1.9961, "step": 7 }, { "epoch": 0.000245188181929631, "grad_norm": 88.52033606110444, "learning_rate": 8.171603677221655e-08, "loss": 2.0264, "step": 8 }, { "epoch": 0.0002758367046708349, "grad_norm": 75.03540526070398, "learning_rate": 9.193054136874362e-08, "loss": 2.1207, "step": 9 }, { "epoch": 0.00030648522741203875, "grad_norm": 102.33467990176264, "learning_rate": 1.021450459652707e-07, "loss": 2.1849, "step": 10 }, { "epoch": 0.0003371337501532426, "grad_norm": 86.14647733853003, "learning_rate": 1.1235955056179776e-07, "loss": 2.0712, "step": 11 }, { "epoch": 0.0003677822728944465, "grad_norm": 95.62521899962503, "learning_rate": 1.2257405515832481e-07, "loss": 1.9079, "step": 12 }, { "epoch": 0.0003984307956356504, "grad_norm": 154.59820060030992, "learning_rate": 1.327885597548519e-07, "loss": 2.8042, "step": 13 }, { "epoch": 0.00042907931837685425, "grad_norm": 4.745288689553833, "learning_rate": 1.4300306435137899e-07, "loss": 0.697, "step": 14 }, { "epoch": 0.0004597278411180581, "grad_norm": 62.713650534016416, "learning_rate": 1.5321756894790606e-07, "loss": 1.9836, "step": 15 }, { "epoch": 0.000490376363859262, "grad_norm": 5.226945078730183, "learning_rate": 1.634320735444331e-07, "loss": 0.7346, "step": 16 }, { "epoch": 0.0005210248866004658, "grad_norm": 5.124798357538041, "learning_rate": 1.7364657814096015e-07, "loss": 0.755, "step": 17 }, { "epoch": 0.0005516734093416697, "grad_norm": 5.054457342601462, "learning_rate": 1.8386108273748725e-07, "loss": 0.7529, "step": 18 }, { "epoch": 0.0005823219320828736, "grad_norm": 73.51491426246093, "learning_rate": 1.9407558733401432e-07, "loss": 2.0104, "step": 19 }, { "epoch": 0.0006129704548240775, "grad_norm": 66.39635878366511, "learning_rate": 2.042900919305414e-07, "loss": 1.7826, "step": 20 }, { "epoch": 0.0006436189775652813, "grad_norm": 117.01074644368109, "learning_rate": 2.1450459652706847e-07, "loss": 2.065, "step": 21 }, { "epoch": 0.0006742675003064852, "grad_norm": 94.72477653825386, "learning_rate": 2.247191011235955e-07, "loss": 2.0982, "step": 22 }, { "epoch": 0.0007049160230476891, "grad_norm": 83.12728959176015, "learning_rate": 2.349336057201226e-07, "loss": 1.772, "step": 23 }, { "epoch": 0.000735564545788893, "grad_norm": 59.53424961054868, "learning_rate": 2.4514811031664963e-07, "loss": 1.8873, "step": 24 }, { "epoch": 0.0007662130685300968, "grad_norm": 53.31192919179236, "learning_rate": 2.5536261491317673e-07, "loss": 1.8939, "step": 25 }, { "epoch": 0.0007968615912713007, "grad_norm": 5.1515018867911335, "learning_rate": 2.655771195097038e-07, "loss": 0.7243, "step": 26 }, { "epoch": 0.0008275101140125046, "grad_norm": 51.2813101917181, "learning_rate": 2.7579162410623087e-07, "loss": 1.8275, "step": 27 }, { "epoch": 0.0008581586367537085, "grad_norm": 40.67360847641765, "learning_rate": 2.8600612870275797e-07, "loss": 1.7995, "step": 28 }, { "epoch": 0.0008888071594949123, "grad_norm": 35.39637621968793, "learning_rate": 2.96220633299285e-07, "loss": 1.7693, "step": 29 }, { "epoch": 0.0009194556822361162, "grad_norm": 34.074730044053844, "learning_rate": 3.064351378958121e-07, "loss": 1.8612, "step": 30 }, { "epoch": 0.0009501042049773201, "grad_norm": 49.116912491495626, "learning_rate": 3.1664964249233916e-07, "loss": 1.7973, "step": 31 }, { "epoch": 0.000980752727718524, "grad_norm": 24.619864522115805, "learning_rate": 3.268641470888662e-07, "loss": 1.667, "step": 32 }, { "epoch": 0.001011401250459728, "grad_norm": 4.71145205110844, "learning_rate": 3.3707865168539325e-07, "loss": 0.7207, "step": 33 }, { "epoch": 0.0010420497732009316, "grad_norm": 5.1283369788991955, "learning_rate": 3.472931562819203e-07, "loss": 0.7446, "step": 34 }, { "epoch": 0.0010726982959421356, "grad_norm": 41.16793781732342, "learning_rate": 3.5750766087844745e-07, "loss": 1.9237, "step": 35 }, { "epoch": 0.0011033468186833395, "grad_norm": 15.347142252442719, "learning_rate": 3.677221654749745e-07, "loss": 1.5764, "step": 36 }, { "epoch": 0.0011339953414245434, "grad_norm": 14.579834699743872, "learning_rate": 3.7793667007150154e-07, "loss": 1.658, "step": 37 }, { "epoch": 0.0011646438641657471, "grad_norm": 5.134173708029878, "learning_rate": 3.8815117466802864e-07, "loss": 0.7675, "step": 38 }, { "epoch": 0.001195292386906951, "grad_norm": 19.549080153448532, "learning_rate": 3.9836567926455574e-07, "loss": 1.5274, "step": 39 }, { "epoch": 0.001225940909648155, "grad_norm": 17.431763721645307, "learning_rate": 4.085801838610828e-07, "loss": 1.5975, "step": 40 }, { "epoch": 0.001256589432389359, "grad_norm": 13.709106809188052, "learning_rate": 4.1879468845760983e-07, "loss": 1.5637, "step": 41 }, { "epoch": 0.0012872379551305626, "grad_norm": 13.414891020430295, "learning_rate": 4.2900919305413693e-07, "loss": 1.6072, "step": 42 }, { "epoch": 0.0013178864778717666, "grad_norm": 14.784965008729689, "learning_rate": 4.39223697650664e-07, "loss": 1.5069, "step": 43 }, { "epoch": 0.0013485350006129705, "grad_norm": 11.450298979173635, "learning_rate": 4.49438202247191e-07, "loss": 1.5218, "step": 44 }, { "epoch": 0.0013791835233541744, "grad_norm": 19.311617829847275, "learning_rate": 4.5965270684371807e-07, "loss": 1.5502, "step": 45 }, { "epoch": 0.0014098320460953781, "grad_norm": 9.94715589003306, "learning_rate": 4.698672114402452e-07, "loss": 1.4789, "step": 46 }, { "epoch": 0.001440480568836582, "grad_norm": 9.252828336876396, "learning_rate": 4.800817160367723e-07, "loss": 1.3833, "step": 47 }, { "epoch": 0.001471129091577786, "grad_norm": 8.56171353894738, "learning_rate": 4.902962206332993e-07, "loss": 1.3665, "step": 48 }, { "epoch": 0.00150177761431899, "grad_norm": 5.055970265019034, "learning_rate": 5.005107252298265e-07, "loss": 0.6828, "step": 49 }, { "epoch": 0.0015324261370601936, "grad_norm": 13.362493158267242, "learning_rate": 5.107252298263535e-07, "loss": 1.3583, "step": 50 }, { "epoch": 0.0015630746598013976, "grad_norm": 8.043214531069923, "learning_rate": 5.209397344228806e-07, "loss": 1.3316, "step": 51 }, { "epoch": 0.0015937231825426015, "grad_norm": 6.085093982822141, "learning_rate": 5.311542390194075e-07, "loss": 1.3459, "step": 52 }, { "epoch": 0.0016243717052838054, "grad_norm": 6.006949417693448, "learning_rate": 5.413687436159346e-07, "loss": 1.3275, "step": 53 }, { "epoch": 0.0016550202280250091, "grad_norm": 6.260057380736392, "learning_rate": 5.515832482124617e-07, "loss": 1.3393, "step": 54 }, { "epoch": 0.001685668750766213, "grad_norm": 5.9872304994030605, "learning_rate": 5.617977528089888e-07, "loss": 1.3155, "step": 55 }, { "epoch": 0.001716317273507417, "grad_norm": 6.030312374625876, "learning_rate": 5.720122574055159e-07, "loss": 1.2517, "step": 56 }, { "epoch": 0.001746965796248621, "grad_norm": 4.128222716562009, "learning_rate": 5.822267620020429e-07, "loss": 0.7113, "step": 57 }, { "epoch": 0.0017776143189898246, "grad_norm": 6.924480975698453, "learning_rate": 5.9244126659857e-07, "loss": 1.25, "step": 58 }, { "epoch": 0.0018082628417310286, "grad_norm": 5.041892119213802, "learning_rate": 6.02655771195097e-07, "loss": 1.1592, "step": 59 }, { "epoch": 0.0018389113644722325, "grad_norm": 4.975862129391129, "learning_rate": 6.128702757916242e-07, "loss": 1.3076, "step": 60 }, { "epoch": 0.0018695598872134364, "grad_norm": 4.084598864230345, "learning_rate": 6.230847803881512e-07, "loss": 0.6829, "step": 61 }, { "epoch": 0.0019002084099546401, "grad_norm": 5.122150387431001, "learning_rate": 6.332992849846783e-07, "loss": 1.2402, "step": 62 }, { "epoch": 0.001930856932695844, "grad_norm": 6.424456791913945, "learning_rate": 6.435137895812053e-07, "loss": 1.3165, "step": 63 }, { "epoch": 0.001961505455437048, "grad_norm": 4.66251436126004, "learning_rate": 6.537282941777324e-07, "loss": 1.3413, "step": 64 }, { "epoch": 0.001992153978178252, "grad_norm": 3.6012840554222554, "learning_rate": 6.639427987742594e-07, "loss": 1.1122, "step": 65 }, { "epoch": 0.002022802500919456, "grad_norm": 3.4194693343194458, "learning_rate": 6.741573033707865e-07, "loss": 1.1524, "step": 66 }, { "epoch": 0.0020534510236606593, "grad_norm": 3.9177473704710115, "learning_rate": 6.843718079673137e-07, "loss": 0.6838, "step": 67 }, { "epoch": 0.0020840995464018633, "grad_norm": 3.65196319900171, "learning_rate": 6.945863125638406e-07, "loss": 1.1558, "step": 68 }, { "epoch": 0.002114748069143067, "grad_norm": 4.132068093350224, "learning_rate": 7.048008171603678e-07, "loss": 1.1835, "step": 69 }, { "epoch": 0.002145396591884271, "grad_norm": 3.714881504395904, "learning_rate": 7.150153217568949e-07, "loss": 1.1979, "step": 70 }, { "epoch": 0.002176045114625475, "grad_norm": 3.054239463554113, "learning_rate": 7.252298263534219e-07, "loss": 1.1227, "step": 71 }, { "epoch": 0.002206693637366679, "grad_norm": 3.335210078190918, "learning_rate": 7.35444330949949e-07, "loss": 1.0897, "step": 72 }, { "epoch": 0.002237342160107883, "grad_norm": 5.718509333409534, "learning_rate": 7.456588355464761e-07, "loss": 1.2162, "step": 73 }, { "epoch": 0.002267990682849087, "grad_norm": 3.120674506307499, "learning_rate": 7.558733401430031e-07, "loss": 1.0349, "step": 74 }, { "epoch": 0.0022986392055902903, "grad_norm": 3.6365794796158015, "learning_rate": 7.660878447395302e-07, "loss": 1.0492, "step": 75 }, { "epoch": 0.0023292877283314943, "grad_norm": 2.778037881949042, "learning_rate": 7.763023493360573e-07, "loss": 1.105, "step": 76 }, { "epoch": 0.002359936251072698, "grad_norm": 2.9368528943694776, "learning_rate": 7.865168539325843e-07, "loss": 1.0768, "step": 77 }, { "epoch": 0.002390584773813902, "grad_norm": 3.3100043233909964, "learning_rate": 7.967313585291115e-07, "loss": 1.1991, "step": 78 }, { "epoch": 0.002421233296555106, "grad_norm": 2.96138063399134, "learning_rate": 8.069458631256384e-07, "loss": 1.1582, "step": 79 }, { "epoch": 0.00245188181929631, "grad_norm": 3.0952700552496912, "learning_rate": 8.171603677221656e-07, "loss": 1.1743, "step": 80 }, { "epoch": 0.002482530342037514, "grad_norm": 3.63804402842874, "learning_rate": 8.273748723186927e-07, "loss": 0.6362, "step": 81 }, { "epoch": 0.002513178864778718, "grad_norm": 3.4983644992203, "learning_rate": 8.375893769152197e-07, "loss": 0.663, "step": 82 }, { "epoch": 0.0025438273875199213, "grad_norm": 2.8398661530385674, "learning_rate": 8.478038815117468e-07, "loss": 1.0997, "step": 83 }, { "epoch": 0.0025744759102611253, "grad_norm": 2.6593352295866843, "learning_rate": 8.580183861082739e-07, "loss": 1.1592, "step": 84 }, { "epoch": 0.002605124433002329, "grad_norm": 3.142037814951282, "learning_rate": 8.682328907048009e-07, "loss": 1.1877, "step": 85 }, { "epoch": 0.002635772955743533, "grad_norm": 2.585685494012471, "learning_rate": 8.78447395301328e-07, "loss": 1.0635, "step": 86 }, { "epoch": 0.002666421478484737, "grad_norm": 3.203469321444957, "learning_rate": 8.886618998978551e-07, "loss": 1.113, "step": 87 }, { "epoch": 0.002697070001225941, "grad_norm": 3.192536509911456, "learning_rate": 8.98876404494382e-07, "loss": 0.6342, "step": 88 }, { "epoch": 0.002727718523967145, "grad_norm": 2.558422931334429, "learning_rate": 9.090909090909091e-07, "loss": 1.0639, "step": 89 }, { "epoch": 0.002758367046708349, "grad_norm": 2.501090604282394, "learning_rate": 9.193054136874361e-07, "loss": 1.0322, "step": 90 }, { "epoch": 0.0027890155694495523, "grad_norm": 2.613240891711274, "learning_rate": 9.295199182839632e-07, "loss": 1.1768, "step": 91 }, { "epoch": 0.0028196640921907563, "grad_norm": 2.268956085444914, "learning_rate": 9.397344228804904e-07, "loss": 1.061, "step": 92 }, { "epoch": 0.00285031261493196, "grad_norm": 3.25018056466684, "learning_rate": 9.499489274770174e-07, "loss": 0.6488, "step": 93 }, { "epoch": 0.002880961137673164, "grad_norm": 2.3916177375068925, "learning_rate": 9.601634320735445e-07, "loss": 1.0628, "step": 94 }, { "epoch": 0.002911609660414368, "grad_norm": 2.345002730787723, "learning_rate": 9.703779366700715e-07, "loss": 1.1053, "step": 95 }, { "epoch": 0.002942258183155572, "grad_norm": 2.7549937121242043, "learning_rate": 9.805924412665985e-07, "loss": 1.1369, "step": 96 }, { "epoch": 0.002972906705896776, "grad_norm": 2.506360958008795, "learning_rate": 9.908069458631257e-07, "loss": 1.1012, "step": 97 }, { "epoch": 0.00300355522863798, "grad_norm": 2.1899499629366384, "learning_rate": 1.001021450459653e-06, "loss": 1.108, "step": 98 }, { "epoch": 0.0030342037513791833, "grad_norm": 2.5550824400904895, "learning_rate": 1.01123595505618e-06, "loss": 1.1466, "step": 99 }, { "epoch": 0.0030648522741203873, "grad_norm": 2.772345713511711, "learning_rate": 1.021450459652707e-06, "loss": 1.1245, "step": 100 }, { "epoch": 0.003095500796861591, "grad_norm": 2.6901152702125106, "learning_rate": 1.031664964249234e-06, "loss": 1.1747, "step": 101 }, { "epoch": 0.003126149319602795, "grad_norm": 2.45227689577317, "learning_rate": 1.0418794688457611e-06, "loss": 1.0941, "step": 102 }, { "epoch": 0.003156797842343999, "grad_norm": 2.6252037436883366, "learning_rate": 1.052093973442288e-06, "loss": 1.1544, "step": 103 }, { "epoch": 0.003187446365085203, "grad_norm": 2.177247675818683, "learning_rate": 1.062308478038815e-06, "loss": 1.0251, "step": 104 }, { "epoch": 0.003218094887826407, "grad_norm": 2.690216978244421, "learning_rate": 1.0725229826353423e-06, "loss": 0.6243, "step": 105 }, { "epoch": 0.003248743410567611, "grad_norm": 2.21631654707533, "learning_rate": 1.0827374872318693e-06, "loss": 1.0523, "step": 106 }, { "epoch": 0.0032793919333088143, "grad_norm": 2.292391939717061, "learning_rate": 1.0929519918283963e-06, "loss": 1.1029, "step": 107 }, { "epoch": 0.0033100404560500183, "grad_norm": 2.372143413282943, "learning_rate": 1.1031664964249235e-06, "loss": 1.1012, "step": 108 }, { "epoch": 0.003340688978791222, "grad_norm": 2.4135297140600973, "learning_rate": 1.1133810010214507e-06, "loss": 1.0883, "step": 109 }, { "epoch": 0.003371337501532426, "grad_norm": 2.4904591517234747, "learning_rate": 1.1235955056179777e-06, "loss": 1.1303, "step": 110 }, { "epoch": 0.00340198602427363, "grad_norm": 2.6120255239657726, "learning_rate": 1.1338100102145047e-06, "loss": 1.1412, "step": 111 }, { "epoch": 0.003432634547014834, "grad_norm": 2.334902183895164, "learning_rate": 1.1440245148110319e-06, "loss": 1.0053, "step": 112 }, { "epoch": 0.003463283069756038, "grad_norm": 2.3362149516575377, "learning_rate": 1.1542390194075589e-06, "loss": 1.2294, "step": 113 }, { "epoch": 0.003493931592497242, "grad_norm": 2.050987253573314, "learning_rate": 1.1644535240040859e-06, "loss": 1.1927, "step": 114 }, { "epoch": 0.0035245801152384453, "grad_norm": 2.790939377328394, "learning_rate": 1.1746680286006129e-06, "loss": 1.0791, "step": 115 }, { "epoch": 0.0035552286379796493, "grad_norm": 2.4921038571837153, "learning_rate": 1.18488253319714e-06, "loss": 1.1186, "step": 116 }, { "epoch": 0.003585877160720853, "grad_norm": 1.9847722006031834, "learning_rate": 1.195097037793667e-06, "loss": 1.0668, "step": 117 }, { "epoch": 0.003616525683462057, "grad_norm": 2.4071288060492466, "learning_rate": 1.205311542390194e-06, "loss": 1.0307, "step": 118 }, { "epoch": 0.003647174206203261, "grad_norm": 2.299447548773128, "learning_rate": 1.2155260469867213e-06, "loss": 1.1738, "step": 119 }, { "epoch": 0.003677822728944465, "grad_norm": 2.6651783123586705, "learning_rate": 1.2257405515832485e-06, "loss": 0.6259, "step": 120 }, { "epoch": 0.003708471251685669, "grad_norm": 2.608454234125932, "learning_rate": 1.2359550561797752e-06, "loss": 0.5903, "step": 121 }, { "epoch": 0.003739119774426873, "grad_norm": 1.8079443270723217, "learning_rate": 1.2461695607763025e-06, "loss": 1.0591, "step": 122 }, { "epoch": 0.0037697682971680763, "grad_norm": 2.6307478394252284, "learning_rate": 1.2563840653728297e-06, "loss": 1.0274, "step": 123 }, { "epoch": 0.0038004168199092803, "grad_norm": 3.131834656783985, "learning_rate": 1.2665985699693567e-06, "loss": 1.0835, "step": 124 }, { "epoch": 0.003831065342650484, "grad_norm": 2.5076029238893804, "learning_rate": 1.2768130745658836e-06, "loss": 1.0961, "step": 125 }, { "epoch": 0.003861713865391688, "grad_norm": 2.1065694870938683, "learning_rate": 1.2870275791624106e-06, "loss": 1.0323, "step": 126 }, { "epoch": 0.003892362388132892, "grad_norm": 2.315785863796997, "learning_rate": 1.2972420837589378e-06, "loss": 1.0611, "step": 127 }, { "epoch": 0.003923010910874096, "grad_norm": 2.1229304597281953, "learning_rate": 1.3074565883554648e-06, "loss": 1.124, "step": 128 }, { "epoch": 0.0039536594336153, "grad_norm": 2.2843366659814293, "learning_rate": 1.317671092951992e-06, "loss": 1.037, "step": 129 }, { "epoch": 0.003984307956356504, "grad_norm": 2.2402876341048397, "learning_rate": 1.3278855975485188e-06, "loss": 1.0975, "step": 130 }, { "epoch": 0.004014956479097708, "grad_norm": 2.2012568129292482, "learning_rate": 1.338100102145046e-06, "loss": 1.0091, "step": 131 }, { "epoch": 0.004045605001838912, "grad_norm": 2.4692482243733447, "learning_rate": 1.348314606741573e-06, "loss": 0.6262, "step": 132 }, { "epoch": 0.004076253524580116, "grad_norm": 1.8016985631297346, "learning_rate": 1.3585291113381002e-06, "loss": 0.9801, "step": 133 }, { "epoch": 0.004106902047321319, "grad_norm": 2.202260271703253, "learning_rate": 1.3687436159346274e-06, "loss": 1.0911, "step": 134 }, { "epoch": 0.004137550570062523, "grad_norm": 2.122511532203541, "learning_rate": 1.3789581205311544e-06, "loss": 1.0371, "step": 135 }, { "epoch": 0.0041681990928037265, "grad_norm": 2.3278861961810584, "learning_rate": 1.3891726251276812e-06, "loss": 1.0557, "step": 136 }, { "epoch": 0.0041988476155449305, "grad_norm": 2.199172098345782, "learning_rate": 1.3993871297242084e-06, "loss": 1.0267, "step": 137 }, { "epoch": 0.004229496138286134, "grad_norm": 1.9198244306328938, "learning_rate": 1.4096016343207356e-06, "loss": 1.0678, "step": 138 }, { "epoch": 0.004260144661027338, "grad_norm": 2.402247410529704, "learning_rate": 1.4198161389172626e-06, "loss": 1.0751, "step": 139 }, { "epoch": 0.004290793183768542, "grad_norm": 2.1409531879867005, "learning_rate": 1.4300306435137898e-06, "loss": 0.6053, "step": 140 }, { "epoch": 0.004321441706509746, "grad_norm": 1.8534298866420351, "learning_rate": 1.4402451481103168e-06, "loss": 0.9599, "step": 141 }, { "epoch": 0.00435209022925095, "grad_norm": 1.9881417938186292, "learning_rate": 1.4504596527068438e-06, "loss": 1.0489, "step": 142 }, { "epoch": 0.004382738751992154, "grad_norm": 2.0567047387059847, "learning_rate": 1.4606741573033708e-06, "loss": 1.06, "step": 143 }, { "epoch": 0.004413387274733358, "grad_norm": 2.1810554903957637, "learning_rate": 1.470888661899898e-06, "loss": 1.036, "step": 144 }, { "epoch": 0.004444035797474562, "grad_norm": 1.9938979727862693, "learning_rate": 1.4811031664964252e-06, "loss": 0.9986, "step": 145 }, { "epoch": 0.004474684320215766, "grad_norm": 1.9331101107056952, "learning_rate": 1.4913176710929522e-06, "loss": 0.9873, "step": 146 }, { "epoch": 0.00450533284295697, "grad_norm": 2.1170008851295057, "learning_rate": 1.501532175689479e-06, "loss": 0.9954, "step": 147 }, { "epoch": 0.004535981365698174, "grad_norm": 2.1628947635845193, "learning_rate": 1.5117466802860062e-06, "loss": 1.0998, "step": 148 }, { "epoch": 0.004566629888439378, "grad_norm": 2.274629214735277, "learning_rate": 1.5219611848825334e-06, "loss": 1.1288, "step": 149 }, { "epoch": 0.004597278411180581, "grad_norm": 2.0713612340427776, "learning_rate": 1.5321756894790604e-06, "loss": 0.6407, "step": 150 }, { "epoch": 0.004627926933921785, "grad_norm": 1.889431024903822, "learning_rate": 1.5423901940755876e-06, "loss": 0.5891, "step": 151 }, { "epoch": 0.0046585754566629885, "grad_norm": 2.7597105862410385, "learning_rate": 1.5526046986721146e-06, "loss": 0.9961, "step": 152 }, { "epoch": 0.0046892239794041925, "grad_norm": 1.7797829380632892, "learning_rate": 1.5628192032686416e-06, "loss": 0.6095, "step": 153 }, { "epoch": 0.004719872502145396, "grad_norm": 1.7110913662917895, "learning_rate": 1.5730337078651686e-06, "loss": 0.5945, "step": 154 }, { "epoch": 0.0047505210248866, "grad_norm": 1.7885067531423469, "learning_rate": 1.5832482124616958e-06, "loss": 0.5923, "step": 155 }, { "epoch": 0.004781169547627804, "grad_norm": 2.606135200835792, "learning_rate": 1.593462717058223e-06, "loss": 0.9396, "step": 156 }, { "epoch": 0.004811818070369008, "grad_norm": 1.9843888042295044, "learning_rate": 1.60367722165475e-06, "loss": 0.9778, "step": 157 }, { "epoch": 0.004842466593110212, "grad_norm": 2.3375131473790534, "learning_rate": 1.6138917262512767e-06, "loss": 1.0235, "step": 158 }, { "epoch": 0.004873115115851416, "grad_norm": 2.47174024618916, "learning_rate": 1.624106230847804e-06, "loss": 1.0689, "step": 159 }, { "epoch": 0.00490376363859262, "grad_norm": 2.171183404668372, "learning_rate": 1.6343207354443311e-06, "loss": 1.0165, "step": 160 }, { "epoch": 0.004934412161333824, "grad_norm": 2.3296477651865604, "learning_rate": 1.6445352400408581e-06, "loss": 1.0388, "step": 161 }, { "epoch": 0.004965060684075028, "grad_norm": 2.2288120074196036, "learning_rate": 1.6547497446373853e-06, "loss": 1.0309, "step": 162 }, { "epoch": 0.004995709206816232, "grad_norm": 2.1270021944999296, "learning_rate": 1.6649642492339123e-06, "loss": 0.9662, "step": 163 }, { "epoch": 0.005026357729557436, "grad_norm": 1.9650468121378284, "learning_rate": 1.6751787538304393e-06, "loss": 0.9267, "step": 164 }, { "epoch": 0.00505700625229864, "grad_norm": 1.9629102234998905, "learning_rate": 1.6853932584269663e-06, "loss": 0.9676, "step": 165 }, { "epoch": 0.005087654775039843, "grad_norm": 2.0618972530474178, "learning_rate": 1.6956077630234935e-06, "loss": 1.0296, "step": 166 }, { "epoch": 0.005118303297781047, "grad_norm": 2.282841767184988, "learning_rate": 1.7058222676200205e-06, "loss": 1.025, "step": 167 }, { "epoch": 0.0051489518205222505, "grad_norm": 1.8388284362651168, "learning_rate": 1.7160367722165477e-06, "loss": 1.0516, "step": 168 }, { "epoch": 0.0051796003432634545, "grad_norm": 1.899049586228805, "learning_rate": 1.7262512768130745e-06, "loss": 1.074, "step": 169 }, { "epoch": 0.005210248866004658, "grad_norm": 2.4829797599196084, "learning_rate": 1.7364657814096017e-06, "loss": 1.0545, "step": 170 }, { "epoch": 0.005240897388745862, "grad_norm": 2.329477092270331, "learning_rate": 1.746680286006129e-06, "loss": 0.9444, "step": 171 }, { "epoch": 0.005271545911487066, "grad_norm": 2.1312840229757315, "learning_rate": 1.756894790602656e-06, "loss": 0.8842, "step": 172 }, { "epoch": 0.00530219443422827, "grad_norm": 2.1108558857526827, "learning_rate": 1.7671092951991831e-06, "loss": 1.0152, "step": 173 }, { "epoch": 0.005332842956969474, "grad_norm": 2.2014769127720077, "learning_rate": 1.7773237997957101e-06, "loss": 1.0259, "step": 174 }, { "epoch": 0.005363491479710678, "grad_norm": 1.3121334356117784, "learning_rate": 1.787538304392237e-06, "loss": 0.5662, "step": 175 }, { "epoch": 0.005394140002451882, "grad_norm": 1.8780490599849913, "learning_rate": 1.797752808988764e-06, "loss": 1.0435, "step": 176 }, { "epoch": 0.005424788525193086, "grad_norm": 2.1199023698004984, "learning_rate": 1.8079673135852913e-06, "loss": 0.9284, "step": 177 }, { "epoch": 0.00545543704793429, "grad_norm": 2.04320855645722, "learning_rate": 1.8181818181818183e-06, "loss": 1.0132, "step": 178 }, { "epoch": 0.005486085570675494, "grad_norm": 2.0731551005867956, "learning_rate": 1.8283963227783455e-06, "loss": 1.1099, "step": 179 }, { "epoch": 0.005516734093416698, "grad_norm": 1.819622233233894, "learning_rate": 1.8386108273748723e-06, "loss": 0.967, "step": 180 }, { "epoch": 0.005547382616157902, "grad_norm": 1.9330854687977272, "learning_rate": 1.8488253319713995e-06, "loss": 0.9349, "step": 181 }, { "epoch": 0.005578031138899105, "grad_norm": 2.1605422909936145, "learning_rate": 1.8590398365679265e-06, "loss": 1.0334, "step": 182 }, { "epoch": 0.005608679661640309, "grad_norm": 1.3053121244964576, "learning_rate": 1.8692543411644537e-06, "loss": 0.5943, "step": 183 }, { "epoch": 0.0056393281843815125, "grad_norm": 1.2468106407030248, "learning_rate": 1.8794688457609809e-06, "loss": 0.5821, "step": 184 }, { "epoch": 0.0056699767071227165, "grad_norm": 2.2857729736659786, "learning_rate": 1.8896833503575079e-06, "loss": 1.072, "step": 185 }, { "epoch": 0.00570062522986392, "grad_norm": 1.924085704487464, "learning_rate": 1.8998978549540349e-06, "loss": 1.0076, "step": 186 }, { "epoch": 0.005731273752605124, "grad_norm": 2.1088557197568054, "learning_rate": 1.910112359550562e-06, "loss": 1.0707, "step": 187 }, { "epoch": 0.005761922275346328, "grad_norm": 1.1278950455479804, "learning_rate": 1.920326864147089e-06, "loss": 0.5627, "step": 188 }, { "epoch": 0.005792570798087532, "grad_norm": 2.187555427219849, "learning_rate": 1.9305413687436163e-06, "loss": 1.0038, "step": 189 }, { "epoch": 0.005823219320828736, "grad_norm": 1.8613101716260592, "learning_rate": 1.940755873340143e-06, "loss": 1.1156, "step": 190 }, { "epoch": 0.00585386784356994, "grad_norm": 1.810294173106286, "learning_rate": 1.9509703779366703e-06, "loss": 1.0096, "step": 191 }, { "epoch": 0.005884516366311144, "grad_norm": 2.152758584743478, "learning_rate": 1.961184882533197e-06, "loss": 0.9524, "step": 192 }, { "epoch": 0.005915164889052348, "grad_norm": 2.230109761568665, "learning_rate": 1.9713993871297242e-06, "loss": 1.0475, "step": 193 }, { "epoch": 0.005945813411793552, "grad_norm": 2.164725068931043, "learning_rate": 1.9816138917262514e-06, "loss": 1.0198, "step": 194 }, { "epoch": 0.005976461934534756, "grad_norm": 2.1809032692833883, "learning_rate": 1.9918283963227787e-06, "loss": 1.0065, "step": 195 }, { "epoch": 0.00600711045727596, "grad_norm": 2.1583492402585343, "learning_rate": 2.002042900919306e-06, "loss": 0.9674, "step": 196 }, { "epoch": 0.006037758980017163, "grad_norm": 1.9046691219127578, "learning_rate": 2.0122574055158326e-06, "loss": 0.9724, "step": 197 }, { "epoch": 0.006068407502758367, "grad_norm": 2.4655986815551283, "learning_rate": 2.02247191011236e-06, "loss": 1.03, "step": 198 }, { "epoch": 0.006099056025499571, "grad_norm": 3.586663594780565, "learning_rate": 2.0326864147088866e-06, "loss": 1.0873, "step": 199 }, { "epoch": 0.0061297045482407745, "grad_norm": 2.2123415146698244, "learning_rate": 2.042900919305414e-06, "loss": 1.2248, "step": 200 }, { "epoch": 0.0061603530709819785, "grad_norm": 2.209343899756635, "learning_rate": 2.053115423901941e-06, "loss": 0.9726, "step": 201 }, { "epoch": 0.006191001593723182, "grad_norm": 2.296200584133391, "learning_rate": 2.063329928498468e-06, "loss": 1.0405, "step": 202 }, { "epoch": 0.006221650116464386, "grad_norm": 1.2080572206267983, "learning_rate": 2.073544433094995e-06, "loss": 0.5763, "step": 203 }, { "epoch": 0.00625229863920559, "grad_norm": 1.9324414486816266, "learning_rate": 2.0837589376915222e-06, "loss": 0.9212, "step": 204 }, { "epoch": 0.006282947161946794, "grad_norm": 1.9515674339697269, "learning_rate": 2.0939734422880494e-06, "loss": 1.0216, "step": 205 }, { "epoch": 0.006313595684687998, "grad_norm": 2.0073130569488433, "learning_rate": 2.104187946884576e-06, "loss": 1.0208, "step": 206 }, { "epoch": 0.006344244207429202, "grad_norm": 1.7342703192195532, "learning_rate": 2.1144024514811034e-06, "loss": 0.9704, "step": 207 }, { "epoch": 0.006374892730170406, "grad_norm": 1.0459202862228312, "learning_rate": 2.12461695607763e-06, "loss": 0.5803, "step": 208 }, { "epoch": 0.00640554125291161, "grad_norm": 2.3157040224735046, "learning_rate": 2.1348314606741574e-06, "loss": 1.0521, "step": 209 }, { "epoch": 0.006436189775652814, "grad_norm": 2.1424548608942064, "learning_rate": 2.1450459652706846e-06, "loss": 1.0389, "step": 210 }, { "epoch": 0.006466838298394018, "grad_norm": 2.142760335481871, "learning_rate": 2.155260469867212e-06, "loss": 1.027, "step": 211 }, { "epoch": 0.006497486821135222, "grad_norm": 2.221437829592008, "learning_rate": 2.1654749744637386e-06, "loss": 1.0083, "step": 212 }, { "epoch": 0.006528135343876425, "grad_norm": 2.2468465976791387, "learning_rate": 2.175689479060266e-06, "loss": 1.0019, "step": 213 }, { "epoch": 0.006558783866617629, "grad_norm": 1.9996774501660726, "learning_rate": 2.1859039836567926e-06, "loss": 1.055, "step": 214 }, { "epoch": 0.006589432389358833, "grad_norm": 2.0150048003409142, "learning_rate": 2.1961184882533198e-06, "loss": 0.8913, "step": 215 }, { "epoch": 0.0066200809121000365, "grad_norm": 1.866315839562069, "learning_rate": 2.206332992849847e-06, "loss": 0.9542, "step": 216 }, { "epoch": 0.0066507294348412405, "grad_norm": 1.8287208136143462, "learning_rate": 2.216547497446374e-06, "loss": 1.04, "step": 217 }, { "epoch": 0.006681377957582444, "grad_norm": 2.1468188636874492, "learning_rate": 2.2267620020429014e-06, "loss": 1.0114, "step": 218 }, { "epoch": 0.006712026480323648, "grad_norm": 0.9603869494014118, "learning_rate": 2.236976506639428e-06, "loss": 0.5404, "step": 219 }, { "epoch": 0.006742675003064852, "grad_norm": 0.9396543274790815, "learning_rate": 2.2471910112359554e-06, "loss": 0.5689, "step": 220 }, { "epoch": 0.006773323525806056, "grad_norm": 1.9627071652591925, "learning_rate": 2.257405515832482e-06, "loss": 0.981, "step": 221 }, { "epoch": 0.00680397204854726, "grad_norm": 1.0112133474480782, "learning_rate": 2.2676200204290094e-06, "loss": 0.5739, "step": 222 }, { "epoch": 0.006834620571288464, "grad_norm": 2.4614463569864182, "learning_rate": 2.2778345250255366e-06, "loss": 1.0417, "step": 223 }, { "epoch": 0.006865269094029668, "grad_norm": 2.2064375736178756, "learning_rate": 2.2880490296220638e-06, "loss": 0.9917, "step": 224 }, { "epoch": 0.006895917616770872, "grad_norm": 1.9738686436582333, "learning_rate": 2.2982635342185906e-06, "loss": 1.0187, "step": 225 }, { "epoch": 0.006926566139512076, "grad_norm": 1.7343111995310205, "learning_rate": 2.3084780388151178e-06, "loss": 0.9156, "step": 226 }, { "epoch": 0.00695721466225328, "grad_norm": 0.9379083171690821, "learning_rate": 2.3186925434116445e-06, "loss": 0.5678, "step": 227 }, { "epoch": 0.006987863184994484, "grad_norm": 2.3224194132421307, "learning_rate": 2.3289070480081717e-06, "loss": 0.9872, "step": 228 }, { "epoch": 0.007018511707735687, "grad_norm": 1.9954906534632313, "learning_rate": 2.339121552604699e-06, "loss": 1.0742, "step": 229 }, { "epoch": 0.007049160230476891, "grad_norm": 2.1068180057285826, "learning_rate": 2.3493360572012257e-06, "loss": 1.0058, "step": 230 }, { "epoch": 0.007079808753218095, "grad_norm": 0.9147743492812818, "learning_rate": 2.359550561797753e-06, "loss": 0.5646, "step": 231 }, { "epoch": 0.0071104572759592985, "grad_norm": 2.4267760759765573, "learning_rate": 2.36976506639428e-06, "loss": 1.0211, "step": 232 }, { "epoch": 0.0071411057987005025, "grad_norm": 1.8832209305620924, "learning_rate": 2.3799795709908073e-06, "loss": 0.8528, "step": 233 }, { "epoch": 0.007171754321441706, "grad_norm": 2.0092489328516794, "learning_rate": 2.390194075587334e-06, "loss": 1.0711, "step": 234 }, { "epoch": 0.00720240284418291, "grad_norm": 2.0769007879912613, "learning_rate": 2.4004085801838613e-06, "loss": 0.9872, "step": 235 }, { "epoch": 0.007233051366924114, "grad_norm": 1.9448734616220296, "learning_rate": 2.410623084780388e-06, "loss": 0.907, "step": 236 }, { "epoch": 0.007263699889665318, "grad_norm": 1.9809682510337252, "learning_rate": 2.4208375893769153e-06, "loss": 1.0085, "step": 237 }, { "epoch": 0.007294348412406522, "grad_norm": 1.846536387411133, "learning_rate": 2.4310520939734425e-06, "loss": 0.9753, "step": 238 }, { "epoch": 0.007324996935147726, "grad_norm": 2.060090551686998, "learning_rate": 2.4412665985699697e-06, "loss": 1.0115, "step": 239 }, { "epoch": 0.00735564545788893, "grad_norm": 2.109104427058646, "learning_rate": 2.451481103166497e-06, "loss": 0.9336, "step": 240 }, { "epoch": 0.007386293980630134, "grad_norm": 1.9429928701599, "learning_rate": 2.4616956077630237e-06, "loss": 0.9979, "step": 241 }, { "epoch": 0.007416942503371338, "grad_norm": 2.2158317142236186, "learning_rate": 2.4719101123595505e-06, "loss": 0.9469, "step": 242 }, { "epoch": 0.007447591026112542, "grad_norm": 2.110818540360969, "learning_rate": 2.4821246169560777e-06, "loss": 0.9229, "step": 243 }, { "epoch": 0.007478239548853746, "grad_norm": 1.9960735412548651, "learning_rate": 2.492339121552605e-06, "loss": 0.8925, "step": 244 }, { "epoch": 0.007508888071594949, "grad_norm": 2.2336181608768992, "learning_rate": 2.5025536261491317e-06, "loss": 0.9899, "step": 245 }, { "epoch": 0.007539536594336153, "grad_norm": 1.9285098926435835, "learning_rate": 2.5127681307456593e-06, "loss": 0.9538, "step": 246 }, { "epoch": 0.007570185117077357, "grad_norm": 2.2828073393529786, "learning_rate": 2.522982635342186e-06, "loss": 0.9268, "step": 247 }, { "epoch": 0.0076008336398185605, "grad_norm": 2.001311611805655, "learning_rate": 2.5331971399387133e-06, "loss": 1.0287, "step": 248 }, { "epoch": 0.0076314821625597645, "grad_norm": 2.2443074874601066, "learning_rate": 2.54341164453524e-06, "loss": 0.902, "step": 249 }, { "epoch": 0.007662130685300968, "grad_norm": 1.9352075708892644, "learning_rate": 2.5536261491317673e-06, "loss": 0.965, "step": 250 }, { "epoch": 0.007692779208042172, "grad_norm": 2.027053378079337, "learning_rate": 2.5638406537282945e-06, "loss": 0.9712, "step": 251 }, { "epoch": 0.007723427730783376, "grad_norm": 2.4696953480892785, "learning_rate": 2.5740551583248213e-06, "loss": 0.9846, "step": 252 }, { "epoch": 0.00775407625352458, "grad_norm": 1.8404293649631356, "learning_rate": 2.584269662921349e-06, "loss": 1.0065, "step": 253 }, { "epoch": 0.007784724776265784, "grad_norm": 2.1681704180455834, "learning_rate": 2.5944841675178757e-06, "loss": 1.0006, "step": 254 }, { "epoch": 0.007815373299006988, "grad_norm": 1.9492288018409978, "learning_rate": 2.6046986721144025e-06, "loss": 0.9479, "step": 255 }, { "epoch": 0.007846021821748192, "grad_norm": 3.210711869440543, "learning_rate": 2.6149131767109297e-06, "loss": 0.9255, "step": 256 }, { "epoch": 0.007876670344489396, "grad_norm": 2.1609016017831357, "learning_rate": 2.6251276813074565e-06, "loss": 0.9761, "step": 257 }, { "epoch": 0.0079073188672306, "grad_norm": 1.8814020917375387, "learning_rate": 2.635342185903984e-06, "loss": 0.9502, "step": 258 }, { "epoch": 0.007937967389971804, "grad_norm": 1.8602386249934104, "learning_rate": 2.645556690500511e-06, "loss": 0.92, "step": 259 }, { "epoch": 0.007968615912713008, "grad_norm": 1.9916151840167646, "learning_rate": 2.6557711950970376e-06, "loss": 0.9505, "step": 260 }, { "epoch": 0.007999264435454212, "grad_norm": 0.7834978838282946, "learning_rate": 2.6659856996935653e-06, "loss": 0.5442, "step": 261 }, { "epoch": 0.008029912958195416, "grad_norm": 1.7258820261653467, "learning_rate": 2.676200204290092e-06, "loss": 0.9451, "step": 262 }, { "epoch": 0.00806056148093662, "grad_norm": 1.7813152172322952, "learning_rate": 2.6864147088866193e-06, "loss": 1.0142, "step": 263 }, { "epoch": 0.008091210003677823, "grad_norm": 0.8135872128936745, "learning_rate": 2.696629213483146e-06, "loss": 0.566, "step": 264 }, { "epoch": 0.008121858526419027, "grad_norm": 2.0897549729659195, "learning_rate": 2.7068437180796737e-06, "loss": 0.9606, "step": 265 }, { "epoch": 0.008152507049160231, "grad_norm": 1.8031623373844994, "learning_rate": 2.7170582226762004e-06, "loss": 0.9404, "step": 266 }, { "epoch": 0.008183155571901435, "grad_norm": 1.754049291724884, "learning_rate": 2.7272727272727272e-06, "loss": 1.0175, "step": 267 }, { "epoch": 0.008213804094642637, "grad_norm": 0.7565994037812249, "learning_rate": 2.737487231869255e-06, "loss": 0.528, "step": 268 }, { "epoch": 0.008244452617383841, "grad_norm": 2.084912439647295, "learning_rate": 2.7477017364657816e-06, "loss": 1.0904, "step": 269 }, { "epoch": 0.008275101140125045, "grad_norm": 2.915092956409034, "learning_rate": 2.757916241062309e-06, "loss": 0.9863, "step": 270 }, { "epoch": 0.00830574966286625, "grad_norm": 0.7145005113694478, "learning_rate": 2.7681307456588356e-06, "loss": 0.5488, "step": 271 }, { "epoch": 0.008336398185607453, "grad_norm": 0.7572591897631183, "learning_rate": 2.7783452502553624e-06, "loss": 0.566, "step": 272 }, { "epoch": 0.008367046708348657, "grad_norm": 2.0163097040219284, "learning_rate": 2.78855975485189e-06, "loss": 0.9577, "step": 273 }, { "epoch": 0.008397695231089861, "grad_norm": 1.9893310722827704, "learning_rate": 2.798774259448417e-06, "loss": 0.9442, "step": 274 }, { "epoch": 0.008428343753831065, "grad_norm": 1.9467732751710456, "learning_rate": 2.8089887640449444e-06, "loss": 0.9418, "step": 275 }, { "epoch": 0.008458992276572269, "grad_norm": 0.8021420099992655, "learning_rate": 2.8192032686414712e-06, "loss": 0.5662, "step": 276 }, { "epoch": 0.008489640799313473, "grad_norm": 2.116866969532792, "learning_rate": 2.829417773237998e-06, "loss": 0.9381, "step": 277 }, { "epoch": 0.008520289322054677, "grad_norm": 2.3498761465671163, "learning_rate": 2.839632277834525e-06, "loss": 1.0161, "step": 278 }, { "epoch": 0.00855093784479588, "grad_norm": 0.751411619339358, "learning_rate": 2.849846782431052e-06, "loss": 0.5638, "step": 279 }, { "epoch": 0.008581586367537085, "grad_norm": 1.7766769455899507, "learning_rate": 2.8600612870275796e-06, "loss": 0.9093, "step": 280 }, { "epoch": 0.008612234890278288, "grad_norm": 2.0597577000654366, "learning_rate": 2.8702757916241064e-06, "loss": 0.9863, "step": 281 }, { "epoch": 0.008642883413019492, "grad_norm": 1.9301882376920676, "learning_rate": 2.8804902962206336e-06, "loss": 0.9625, "step": 282 }, { "epoch": 0.008673531935760696, "grad_norm": 2.5532863144046978, "learning_rate": 2.890704800817161e-06, "loss": 1.1016, "step": 283 }, { "epoch": 0.0087041804585019, "grad_norm": 1.6370944728733645, "learning_rate": 2.9009193054136876e-06, "loss": 0.9702, "step": 284 }, { "epoch": 0.008734828981243104, "grad_norm": 1.9952345007256072, "learning_rate": 2.911133810010215e-06, "loss": 0.9744, "step": 285 }, { "epoch": 0.008765477503984308, "grad_norm": 1.9600805397506185, "learning_rate": 2.9213483146067416e-06, "loss": 0.9581, "step": 286 }, { "epoch": 0.008796126026725512, "grad_norm": 1.7899827029048436, "learning_rate": 2.931562819203269e-06, "loss": 1.003, "step": 287 }, { "epoch": 0.008826774549466716, "grad_norm": 1.8575298016593345, "learning_rate": 2.941777323799796e-06, "loss": 0.9682, "step": 288 }, { "epoch": 0.00885742307220792, "grad_norm": 2.2025600130295175, "learning_rate": 2.9519918283963228e-06, "loss": 0.8997, "step": 289 }, { "epoch": 0.008888071594949124, "grad_norm": 2.1005941212754258, "learning_rate": 2.9622063329928504e-06, "loss": 0.9554, "step": 290 }, { "epoch": 0.008918720117690328, "grad_norm": 1.9362869088676242, "learning_rate": 2.972420837589377e-06, "loss": 0.8913, "step": 291 }, { "epoch": 0.008949368640431532, "grad_norm": 1.9169176723268337, "learning_rate": 2.9826353421859044e-06, "loss": 0.8459, "step": 292 }, { "epoch": 0.008980017163172736, "grad_norm": 0.6794956363472234, "learning_rate": 2.992849846782431e-06, "loss": 0.5464, "step": 293 }, { "epoch": 0.00901066568591394, "grad_norm": 2.1027168600688952, "learning_rate": 3.003064351378958e-06, "loss": 0.9536, "step": 294 }, { "epoch": 0.009041314208655143, "grad_norm": 1.8453782891854864, "learning_rate": 3.0132788559754856e-06, "loss": 0.9759, "step": 295 }, { "epoch": 0.009071962731396347, "grad_norm": 1.6580501048429594, "learning_rate": 3.0234933605720124e-06, "loss": 0.9297, "step": 296 }, { "epoch": 0.009102611254137551, "grad_norm": 0.6852890236827608, "learning_rate": 3.03370786516854e-06, "loss": 0.5493, "step": 297 }, { "epoch": 0.009133259776878755, "grad_norm": 0.7567379009316634, "learning_rate": 3.0439223697650668e-06, "loss": 0.5235, "step": 298 }, { "epoch": 0.00916390829961996, "grad_norm": 2.0580446722025996, "learning_rate": 3.0541368743615935e-06, "loss": 0.9414, "step": 299 }, { "epoch": 0.009194556822361161, "grad_norm": 1.9371228815291313, "learning_rate": 3.0643513789581207e-06, "loss": 0.859, "step": 300 }, { "epoch": 0.009225205345102365, "grad_norm": 2.0444049519169343, "learning_rate": 3.0745658835546475e-06, "loss": 0.9244, "step": 301 }, { "epoch": 0.00925585386784357, "grad_norm": 1.7573362366104317, "learning_rate": 3.084780388151175e-06, "loss": 0.911, "step": 302 }, { "epoch": 0.009286502390584773, "grad_norm": 0.6712247719106377, "learning_rate": 3.094994892747702e-06, "loss": 0.5417, "step": 303 }, { "epoch": 0.009317150913325977, "grad_norm": 1.9998842060778819, "learning_rate": 3.105209397344229e-06, "loss": 0.9078, "step": 304 }, { "epoch": 0.009347799436067181, "grad_norm": 1.9350320396864713, "learning_rate": 3.1154239019407563e-06, "loss": 0.9354, "step": 305 }, { "epoch": 0.009378447958808385, "grad_norm": 1.813009363017042, "learning_rate": 3.125638406537283e-06, "loss": 0.8231, "step": 306 }, { "epoch": 0.009409096481549589, "grad_norm": 1.9265841228112504, "learning_rate": 3.1358529111338103e-06, "loss": 0.9352, "step": 307 }, { "epoch": 0.009439745004290793, "grad_norm": 1.8127146322611198, "learning_rate": 3.146067415730337e-06, "loss": 0.9654, "step": 308 }, { "epoch": 0.009470393527031997, "grad_norm": 2.1591457092372397, "learning_rate": 3.1562819203268647e-06, "loss": 1.0886, "step": 309 }, { "epoch": 0.0095010420497732, "grad_norm": 0.6943895597595314, "learning_rate": 3.1664964249233915e-06, "loss": 0.5343, "step": 310 }, { "epoch": 0.009531690572514405, "grad_norm": 1.9487520734293442, "learning_rate": 3.1767109295199183e-06, "loss": 0.901, "step": 311 }, { "epoch": 0.009562339095255609, "grad_norm": 2.3495245244312266, "learning_rate": 3.186925434116446e-06, "loss": 0.8887, "step": 312 }, { "epoch": 0.009592987617996812, "grad_norm": 2.0604452919347307, "learning_rate": 3.1971399387129727e-06, "loss": 1.0476, "step": 313 }, { "epoch": 0.009623636140738016, "grad_norm": 0.7158526863361768, "learning_rate": 3.2073544433095e-06, "loss": 0.5614, "step": 314 }, { "epoch": 0.00965428466347922, "grad_norm": 2.2027070639928135, "learning_rate": 3.2175689479060267e-06, "loss": 0.9752, "step": 315 }, { "epoch": 0.009684933186220424, "grad_norm": 2.095784477995741, "learning_rate": 3.2277834525025535e-06, "loss": 0.8566, "step": 316 }, { "epoch": 0.009715581708961628, "grad_norm": 2.101118819123347, "learning_rate": 3.237997957099081e-06, "loss": 1.0889, "step": 317 }, { "epoch": 0.009746230231702832, "grad_norm": 3.348568902715603, "learning_rate": 3.248212461695608e-06, "loss": 0.9546, "step": 318 }, { "epoch": 0.009776878754444036, "grad_norm": 1.927012180418579, "learning_rate": 3.258426966292135e-06, "loss": 0.8608, "step": 319 }, { "epoch": 0.00980752727718524, "grad_norm": 2.0279087878792015, "learning_rate": 3.2686414708886623e-06, "loss": 0.8301, "step": 320 }, { "epoch": 0.009838175799926444, "grad_norm": 2.3261127410897284, "learning_rate": 3.278855975485189e-06, "loss": 0.9706, "step": 321 }, { "epoch": 0.009868824322667648, "grad_norm": 1.9753738393090592, "learning_rate": 3.2890704800817163e-06, "loss": 0.9139, "step": 322 }, { "epoch": 0.009899472845408852, "grad_norm": 2.111478614387278, "learning_rate": 3.299284984678243e-06, "loss": 0.9582, "step": 323 }, { "epoch": 0.009930121368150056, "grad_norm": 1.9862113194160227, "learning_rate": 3.3094994892747707e-06, "loss": 0.8282, "step": 324 }, { "epoch": 0.00996076989089126, "grad_norm": 1.6304700587826588, "learning_rate": 3.3197139938712975e-06, "loss": 0.9596, "step": 325 }, { "epoch": 0.009991418413632464, "grad_norm": 2.2725108286026274, "learning_rate": 3.3299284984678247e-06, "loss": 0.9306, "step": 326 }, { "epoch": 0.010022066936373667, "grad_norm": 1.9683589004891113, "learning_rate": 3.340143003064352e-06, "loss": 0.9464, "step": 327 }, { "epoch": 0.010052715459114871, "grad_norm": 2.4746356528341447, "learning_rate": 3.3503575076608787e-06, "loss": 0.9563, "step": 328 }, { "epoch": 0.010083363981856075, "grad_norm": 1.9425129670135481, "learning_rate": 3.360572012257406e-06, "loss": 0.8324, "step": 329 }, { "epoch": 0.01011401250459728, "grad_norm": 1.7475461708980875, "learning_rate": 3.3707865168539327e-06, "loss": 0.8831, "step": 330 }, { "epoch": 0.010144661027338481, "grad_norm": 1.8712146262090792, "learning_rate": 3.3810010214504603e-06, "loss": 0.899, "step": 331 }, { "epoch": 0.010175309550079685, "grad_norm": 2.1129937251764654, "learning_rate": 3.391215526046987e-06, "loss": 0.9929, "step": 332 }, { "epoch": 0.01020595807282089, "grad_norm": 1.7606742591041882, "learning_rate": 3.401430030643514e-06, "loss": 0.8864, "step": 333 }, { "epoch": 0.010236606595562093, "grad_norm": 2.021508848095053, "learning_rate": 3.411644535240041e-06, "loss": 0.9505, "step": 334 }, { "epoch": 0.010267255118303297, "grad_norm": 2.079433337149205, "learning_rate": 3.4218590398365683e-06, "loss": 0.8871, "step": 335 }, { "epoch": 0.010297903641044501, "grad_norm": 2.0654535352083028, "learning_rate": 3.4320735444330955e-06, "loss": 0.871, "step": 336 }, { "epoch": 0.010328552163785705, "grad_norm": 1.9761475180400176, "learning_rate": 3.4422880490296222e-06, "loss": 0.9778, "step": 337 }, { "epoch": 0.010359200686526909, "grad_norm": 2.3497260416900847, "learning_rate": 3.452502553626149e-06, "loss": 0.8356, "step": 338 }, { "epoch": 0.010389849209268113, "grad_norm": 2.3465823502425995, "learning_rate": 3.4627170582226766e-06, "loss": 0.8629, "step": 339 }, { "epoch": 0.010420497732009317, "grad_norm": 1.863140419914664, "learning_rate": 3.4729315628192034e-06, "loss": 0.9625, "step": 340 }, { "epoch": 0.01045114625475052, "grad_norm": 1.9406970760318634, "learning_rate": 3.4831460674157306e-06, "loss": 0.9433, "step": 341 }, { "epoch": 0.010481794777491725, "grad_norm": 2.2113713469271836, "learning_rate": 3.493360572012258e-06, "loss": 0.8851, "step": 342 }, { "epoch": 0.010512443300232929, "grad_norm": 2.3384017722168897, "learning_rate": 3.5035750766087846e-06, "loss": 0.9785, "step": 343 }, { "epoch": 0.010543091822974133, "grad_norm": 2.0617402025595943, "learning_rate": 3.513789581205312e-06, "loss": 0.9507, "step": 344 }, { "epoch": 0.010573740345715336, "grad_norm": 1.9415622874370697, "learning_rate": 3.5240040858018386e-06, "loss": 1.0185, "step": 345 }, { "epoch": 0.01060438886845654, "grad_norm": 0.6667052759412442, "learning_rate": 3.5342185903983662e-06, "loss": 0.5535, "step": 346 }, { "epoch": 0.010635037391197744, "grad_norm": 1.8586512160588247, "learning_rate": 3.544433094994893e-06, "loss": 0.897, "step": 347 }, { "epoch": 0.010665685913938948, "grad_norm": 2.5485309584730333, "learning_rate": 3.5546475995914202e-06, "loss": 0.931, "step": 348 }, { "epoch": 0.010696334436680152, "grad_norm": 2.151497685837508, "learning_rate": 3.564862104187947e-06, "loss": 0.976, "step": 349 }, { "epoch": 0.010726982959421356, "grad_norm": 1.9558904850164278, "learning_rate": 3.575076608784474e-06, "loss": 0.9304, "step": 350 }, { "epoch": 0.01075763148216256, "grad_norm": 1.7298221341759017, "learning_rate": 3.5852911133810014e-06, "loss": 0.8664, "step": 351 }, { "epoch": 0.010788280004903764, "grad_norm": 2.0215763668975555, "learning_rate": 3.595505617977528e-06, "loss": 0.9493, "step": 352 }, { "epoch": 0.010818928527644968, "grad_norm": 1.7992809773030172, "learning_rate": 3.605720122574056e-06, "loss": 0.9174, "step": 353 }, { "epoch": 0.010849577050386172, "grad_norm": 2.077288395671688, "learning_rate": 3.6159346271705826e-06, "loss": 1.0226, "step": 354 }, { "epoch": 0.010880225573127376, "grad_norm": 1.8594055153420868, "learning_rate": 3.6261491317671094e-06, "loss": 0.9474, "step": 355 }, { "epoch": 0.01091087409586858, "grad_norm": 1.9251360445791006, "learning_rate": 3.6363636363636366e-06, "loss": 0.9059, "step": 356 }, { "epoch": 0.010941522618609784, "grad_norm": 1.93585331647238, "learning_rate": 3.646578140960164e-06, "loss": 0.9659, "step": 357 }, { "epoch": 0.010972171141350988, "grad_norm": 2.0088020645862263, "learning_rate": 3.656792645556691e-06, "loss": 0.9894, "step": 358 }, { "epoch": 0.011002819664092191, "grad_norm": 1.8440678125132237, "learning_rate": 3.6670071501532178e-06, "loss": 0.8855, "step": 359 }, { "epoch": 0.011033468186833395, "grad_norm": 1.9528769860659112, "learning_rate": 3.6772216547497446e-06, "loss": 0.9522, "step": 360 }, { "epoch": 0.0110641167095746, "grad_norm": 2.170545896016453, "learning_rate": 3.687436159346272e-06, "loss": 0.8915, "step": 361 }, { "epoch": 0.011094765232315803, "grad_norm": 1.933589599542841, "learning_rate": 3.697650663942799e-06, "loss": 0.9332, "step": 362 }, { "epoch": 0.011125413755057005, "grad_norm": 0.609994709655973, "learning_rate": 3.707865168539326e-06, "loss": 0.5307, "step": 363 }, { "epoch": 0.01115606227779821, "grad_norm": 1.8118102732763595, "learning_rate": 3.718079673135853e-06, "loss": 0.9159, "step": 364 }, { "epoch": 0.011186710800539413, "grad_norm": 1.9484596931805156, "learning_rate": 3.7282941777323806e-06, "loss": 0.8699, "step": 365 }, { "epoch": 0.011217359323280617, "grad_norm": 0.6342149729608438, "learning_rate": 3.7385086823289074e-06, "loss": 0.5332, "step": 366 }, { "epoch": 0.011248007846021821, "grad_norm": 1.8558048547545403, "learning_rate": 3.748723186925434e-06, "loss": 0.8862, "step": 367 }, { "epoch": 0.011278656368763025, "grad_norm": 0.6437472833903836, "learning_rate": 3.7589376915219618e-06, "loss": 0.5493, "step": 368 }, { "epoch": 0.011309304891504229, "grad_norm": 2.05569388430132, "learning_rate": 3.7691521961184886e-06, "loss": 0.9797, "step": 369 }, { "epoch": 0.011339953414245433, "grad_norm": 2.1865832519954296, "learning_rate": 3.7793667007150158e-06, "loss": 0.8399, "step": 370 }, { "epoch": 0.011370601936986637, "grad_norm": 0.6229904088744433, "learning_rate": 3.7895812053115425e-06, "loss": 0.5647, "step": 371 }, { "epoch": 0.01140125045972784, "grad_norm": 1.716341618172815, "learning_rate": 3.7997957099080697e-06, "loss": 0.8836, "step": 372 }, { "epoch": 0.011431898982469045, "grad_norm": 1.6614101392890304, "learning_rate": 3.810010214504597e-06, "loss": 0.8788, "step": 373 }, { "epoch": 0.011462547505210249, "grad_norm": 1.8320952342244166, "learning_rate": 3.820224719101124e-06, "loss": 0.9188, "step": 374 }, { "epoch": 0.011493196027951453, "grad_norm": 1.9355165589532577, "learning_rate": 3.830439223697651e-06, "loss": 0.894, "step": 375 }, { "epoch": 0.011523844550692657, "grad_norm": 0.6175134135868032, "learning_rate": 3.840653728294178e-06, "loss": 0.5219, "step": 376 }, { "epoch": 0.01155449307343386, "grad_norm": 1.9921344713183555, "learning_rate": 3.850868232890705e-06, "loss": 0.8679, "step": 377 }, { "epoch": 0.011585141596175064, "grad_norm": 0.6253094335672258, "learning_rate": 3.8610827374872325e-06, "loss": 0.509, "step": 378 }, { "epoch": 0.011615790118916268, "grad_norm": 0.6966751825397621, "learning_rate": 3.871297242083759e-06, "loss": 0.5511, "step": 379 }, { "epoch": 0.011646438641657472, "grad_norm": 2.083270861485318, "learning_rate": 3.881511746680286e-06, "loss": 0.8647, "step": 380 }, { "epoch": 0.011677087164398676, "grad_norm": 1.7571285527248228, "learning_rate": 3.891726251276814e-06, "loss": 0.9724, "step": 381 }, { "epoch": 0.01170773568713988, "grad_norm": 1.9601651208653454, "learning_rate": 3.9019407558733405e-06, "loss": 0.8763, "step": 382 }, { "epoch": 0.011738384209881084, "grad_norm": 1.9638142061396653, "learning_rate": 3.912155260469867e-06, "loss": 0.9666, "step": 383 }, { "epoch": 0.011769032732622288, "grad_norm": 1.844835092424829, "learning_rate": 3.922369765066394e-06, "loss": 0.9066, "step": 384 }, { "epoch": 0.011799681255363492, "grad_norm": 2.0259713560850874, "learning_rate": 3.932584269662922e-06, "loss": 0.9373, "step": 385 }, { "epoch": 0.011830329778104696, "grad_norm": 2.078903293565144, "learning_rate": 3.9427987742594485e-06, "loss": 0.9088, "step": 386 }, { "epoch": 0.0118609783008459, "grad_norm": 1.6779908480592038, "learning_rate": 3.953013278855976e-06, "loss": 0.9195, "step": 387 }, { "epoch": 0.011891626823587104, "grad_norm": 1.8879214188232225, "learning_rate": 3.963227783452503e-06, "loss": 0.957, "step": 388 }, { "epoch": 0.011922275346328308, "grad_norm": 1.7793004507514245, "learning_rate": 3.97344228804903e-06, "loss": 0.9423, "step": 389 }, { "epoch": 0.011952923869069512, "grad_norm": 1.8309397966446388, "learning_rate": 3.983656792645557e-06, "loss": 0.9797, "step": 390 }, { "epoch": 0.011983572391810715, "grad_norm": 1.8154229962448603, "learning_rate": 3.993871297242084e-06, "loss": 0.8831, "step": 391 }, { "epoch": 0.01201422091455192, "grad_norm": 2.236389126094503, "learning_rate": 4.004085801838612e-06, "loss": 0.8504, "step": 392 }, { "epoch": 0.012044869437293123, "grad_norm": 2.1716020389034125, "learning_rate": 4.0143003064351385e-06, "loss": 0.8198, "step": 393 }, { "epoch": 0.012075517960034325, "grad_norm": 2.0571764711508074, "learning_rate": 4.024514811031665e-06, "loss": 0.8378, "step": 394 }, { "epoch": 0.01210616648277553, "grad_norm": 1.8207987641647196, "learning_rate": 4.034729315628192e-06, "loss": 0.9602, "step": 395 }, { "epoch": 0.012136815005516733, "grad_norm": 1.8600574474713845, "learning_rate": 4.04494382022472e-06, "loss": 0.9404, "step": 396 }, { "epoch": 0.012167463528257937, "grad_norm": 1.968201006144391, "learning_rate": 4.0551583248212465e-06, "loss": 0.966, "step": 397 }, { "epoch": 0.012198112050999141, "grad_norm": 0.6318313562856233, "learning_rate": 4.065372829417773e-06, "loss": 0.5394, "step": 398 }, { "epoch": 0.012228760573740345, "grad_norm": 2.299346531420212, "learning_rate": 4.0755873340143e-06, "loss": 0.9566, "step": 399 }, { "epoch": 0.012259409096481549, "grad_norm": 0.5907512443479678, "learning_rate": 4.085801838610828e-06, "loss": 0.5377, "step": 400 }, { "epoch": 0.012290057619222753, "grad_norm": 2.2551005702611633, "learning_rate": 4.0960163432073544e-06, "loss": 0.9142, "step": 401 }, { "epoch": 0.012320706141963957, "grad_norm": 0.6339100991510016, "learning_rate": 4.106230847803882e-06, "loss": 0.5236, "step": 402 }, { "epoch": 0.01235135466470516, "grad_norm": 2.2914123966866047, "learning_rate": 4.116445352400409e-06, "loss": 0.9167, "step": 403 }, { "epoch": 0.012382003187446365, "grad_norm": 2.0578660952696555, "learning_rate": 4.126659856996936e-06, "loss": 0.9803, "step": 404 }, { "epoch": 0.012412651710187569, "grad_norm": 1.6116232981016458, "learning_rate": 4.136874361593463e-06, "loss": 0.9237, "step": 405 }, { "epoch": 0.012443300232928773, "grad_norm": 2.0024831247118238, "learning_rate": 4.14708886618999e-06, "loss": 0.9722, "step": 406 }, { "epoch": 0.012473948755669977, "grad_norm": 1.8264278127163522, "learning_rate": 4.157303370786518e-06, "loss": 0.9298, "step": 407 }, { "epoch": 0.01250459727841118, "grad_norm": 1.9793282218157198, "learning_rate": 4.1675178753830445e-06, "loss": 0.878, "step": 408 }, { "epoch": 0.012535245801152384, "grad_norm": 1.860772656029955, "learning_rate": 4.177732379979571e-06, "loss": 0.9362, "step": 409 }, { "epoch": 0.012565894323893588, "grad_norm": 1.8171779081838835, "learning_rate": 4.187946884576099e-06, "loss": 0.9435, "step": 410 }, { "epoch": 0.012596542846634792, "grad_norm": 2.3137541672888267, "learning_rate": 4.198161389172626e-06, "loss": 0.9404, "step": 411 }, { "epoch": 0.012627191369375996, "grad_norm": 1.8930091979890575, "learning_rate": 4.208375893769152e-06, "loss": 0.8848, "step": 412 }, { "epoch": 0.0126578398921172, "grad_norm": 1.9187307057921013, "learning_rate": 4.218590398365679e-06, "loss": 0.8518, "step": 413 }, { "epoch": 0.012688488414858404, "grad_norm": 1.891517007944953, "learning_rate": 4.228804902962207e-06, "loss": 0.9501, "step": 414 }, { "epoch": 0.012719136937599608, "grad_norm": 2.411987168289549, "learning_rate": 4.239019407558734e-06, "loss": 0.8929, "step": 415 }, { "epoch": 0.012749785460340812, "grad_norm": 1.9464532786317748, "learning_rate": 4.24923391215526e-06, "loss": 0.8504, "step": 416 }, { "epoch": 0.012780433983082016, "grad_norm": 0.611700379512229, "learning_rate": 4.259448416751788e-06, "loss": 0.5206, "step": 417 }, { "epoch": 0.01281108250582322, "grad_norm": 0.5756482360463362, "learning_rate": 4.269662921348315e-06, "loss": 0.5088, "step": 418 }, { "epoch": 0.012841731028564424, "grad_norm": 2.3314784295289104, "learning_rate": 4.2798774259448424e-06, "loss": 0.9467, "step": 419 }, { "epoch": 0.012872379551305628, "grad_norm": 2.0210085299802394, "learning_rate": 4.290091930541369e-06, "loss": 0.9295, "step": 420 }, { "epoch": 0.012903028074046832, "grad_norm": 2.3300624416898468, "learning_rate": 4.300306435137896e-06, "loss": 0.7587, "step": 421 }, { "epoch": 0.012933676596788036, "grad_norm": 1.9395588706936244, "learning_rate": 4.310520939734424e-06, "loss": 0.9801, "step": 422 }, { "epoch": 0.01296432511952924, "grad_norm": 2.0102868573754056, "learning_rate": 4.32073544433095e-06, "loss": 0.9634, "step": 423 }, { "epoch": 0.012994973642270443, "grad_norm": 0.5801368990386675, "learning_rate": 4.330949948927477e-06, "loss": 0.563, "step": 424 }, { "epoch": 0.013025622165011647, "grad_norm": 1.7318892119040974, "learning_rate": 4.341164453524005e-06, "loss": 0.805, "step": 425 }, { "epoch": 0.01305627068775285, "grad_norm": 2.094987426832682, "learning_rate": 4.351378958120532e-06, "loss": 0.9334, "step": 426 }, { "epoch": 0.013086919210494053, "grad_norm": 2.03386081120378, "learning_rate": 4.361593462717058e-06, "loss": 0.8717, "step": 427 }, { "epoch": 0.013117567733235257, "grad_norm": 2.1514351007312054, "learning_rate": 4.371807967313585e-06, "loss": 0.851, "step": 428 }, { "epoch": 0.013148216255976461, "grad_norm": 2.2665465982009683, "learning_rate": 4.382022471910113e-06, "loss": 0.9765, "step": 429 }, { "epoch": 0.013178864778717665, "grad_norm": 2.1778592092202276, "learning_rate": 4.3922369765066396e-06, "loss": 0.8206, "step": 430 }, { "epoch": 0.01320951330145887, "grad_norm": 2.1179068835037222, "learning_rate": 4.402451481103167e-06, "loss": 0.9636, "step": 431 }, { "epoch": 0.013240161824200073, "grad_norm": 1.9462167419983372, "learning_rate": 4.412665985699694e-06, "loss": 0.8666, "step": 432 }, { "epoch": 0.013270810346941277, "grad_norm": 2.2629630852388054, "learning_rate": 4.422880490296221e-06, "loss": 0.8552, "step": 433 }, { "epoch": 0.013301458869682481, "grad_norm": 2.17172978431336, "learning_rate": 4.433094994892748e-06, "loss": 0.7971, "step": 434 }, { "epoch": 0.013332107392423685, "grad_norm": 1.7893643642198072, "learning_rate": 4.443309499489275e-06, "loss": 0.9408, "step": 435 }, { "epoch": 0.013362755915164889, "grad_norm": 2.1065507844389653, "learning_rate": 4.453524004085803e-06, "loss": 0.8647, "step": 436 }, { "epoch": 0.013393404437906093, "grad_norm": 1.7555113879244875, "learning_rate": 4.4637385086823296e-06, "loss": 0.9409, "step": 437 }, { "epoch": 0.013424052960647297, "grad_norm": 2.0339911305439586, "learning_rate": 4.473953013278856e-06, "loss": 0.9626, "step": 438 }, { "epoch": 0.0134547014833885, "grad_norm": 2.1525972003773957, "learning_rate": 4.484167517875383e-06, "loss": 0.9663, "step": 439 }, { "epoch": 0.013485350006129704, "grad_norm": 2.011613814604013, "learning_rate": 4.494382022471911e-06, "loss": 0.8267, "step": 440 }, { "epoch": 0.013515998528870908, "grad_norm": 1.7444067631980207, "learning_rate": 4.5045965270684375e-06, "loss": 0.8721, "step": 441 }, { "epoch": 0.013546647051612112, "grad_norm": 2.041122491771415, "learning_rate": 4.514811031664964e-06, "loss": 0.9494, "step": 442 }, { "epoch": 0.013577295574353316, "grad_norm": 1.7384855730533648, "learning_rate": 4.525025536261491e-06, "loss": 0.8716, "step": 443 }, { "epoch": 0.01360794409709452, "grad_norm": 2.133431681968796, "learning_rate": 4.535240040858019e-06, "loss": 0.823, "step": 444 }, { "epoch": 0.013638592619835724, "grad_norm": 2.066383592780018, "learning_rate": 4.5454545454545455e-06, "loss": 0.8285, "step": 445 }, { "epoch": 0.013669241142576928, "grad_norm": 1.7595894198521524, "learning_rate": 4.555669050051073e-06, "loss": 0.9472, "step": 446 }, { "epoch": 0.013699889665318132, "grad_norm": 1.9316638344195827, "learning_rate": 4.5658835546476e-06, "loss": 0.9543, "step": 447 }, { "epoch": 0.013730538188059336, "grad_norm": 1.7547216102776118, "learning_rate": 4.5760980592441276e-06, "loss": 0.8877, "step": 448 }, { "epoch": 0.01376118671080054, "grad_norm": 2.222039003608368, "learning_rate": 4.586312563840654e-06, "loss": 0.8633, "step": 449 }, { "epoch": 0.013791835233541744, "grad_norm": 1.8847243419812172, "learning_rate": 4.596527068437181e-06, "loss": 0.8355, "step": 450 }, { "epoch": 0.013822483756282948, "grad_norm": 0.6326628946859794, "learning_rate": 4.606741573033709e-06, "loss": 0.5356, "step": 451 }, { "epoch": 0.013853132279024152, "grad_norm": 2.2693735938480075, "learning_rate": 4.6169560776302355e-06, "loss": 0.9303, "step": 452 }, { "epoch": 0.013883780801765356, "grad_norm": 0.5632707513614243, "learning_rate": 4.627170582226762e-06, "loss": 0.5448, "step": 453 }, { "epoch": 0.01391442932450656, "grad_norm": 2.096932174003764, "learning_rate": 4.637385086823289e-06, "loss": 0.952, "step": 454 }, { "epoch": 0.013945077847247763, "grad_norm": 2.2235550245057327, "learning_rate": 4.647599591419817e-06, "loss": 0.8563, "step": 455 }, { "epoch": 0.013975726369988967, "grad_norm": 2.129875730234174, "learning_rate": 4.6578140960163435e-06, "loss": 0.8431, "step": 456 }, { "epoch": 0.01400637489273017, "grad_norm": 2.0225942491455, "learning_rate": 4.66802860061287e-06, "loss": 0.7896, "step": 457 }, { "epoch": 0.014037023415471373, "grad_norm": 1.9028893775921851, "learning_rate": 4.678243105209398e-06, "loss": 0.9501, "step": 458 }, { "epoch": 0.014067671938212577, "grad_norm": 2.0380683325887046, "learning_rate": 4.688457609805925e-06, "loss": 0.8609, "step": 459 }, { "epoch": 0.014098320460953781, "grad_norm": 2.0384091489801333, "learning_rate": 4.6986721144024515e-06, "loss": 0.8491, "step": 460 }, { "epoch": 0.014128968983694985, "grad_norm": 0.5640954795369777, "learning_rate": 4.708886618998979e-06, "loss": 0.5335, "step": 461 }, { "epoch": 0.01415961750643619, "grad_norm": 1.9704861833556702, "learning_rate": 4.719101123595506e-06, "loss": 0.8507, "step": 462 }, { "epoch": 0.014190266029177393, "grad_norm": 0.6167684289984926, "learning_rate": 4.7293156281920335e-06, "loss": 0.5292, "step": 463 }, { "epoch": 0.014220914551918597, "grad_norm": 1.882002005344198, "learning_rate": 4.73953013278856e-06, "loss": 0.924, "step": 464 }, { "epoch": 0.014251563074659801, "grad_norm": 2.082286554972067, "learning_rate": 4.749744637385087e-06, "loss": 0.8681, "step": 465 }, { "epoch": 0.014282211597401005, "grad_norm": 1.888546330398308, "learning_rate": 4.759959141981615e-06, "loss": 0.9525, "step": 466 }, { "epoch": 0.014312860120142209, "grad_norm": 2.0208931917403814, "learning_rate": 4.7701736465781415e-06, "loss": 0.7672, "step": 467 }, { "epoch": 0.014343508642883413, "grad_norm": 1.8260853612803378, "learning_rate": 4.780388151174668e-06, "loss": 0.8624, "step": 468 }, { "epoch": 0.014374157165624617, "grad_norm": 2.1851673011407886, "learning_rate": 4.790602655771195e-06, "loss": 0.9545, "step": 469 }, { "epoch": 0.01440480568836582, "grad_norm": 2.218121783254763, "learning_rate": 4.800817160367723e-06, "loss": 0.9483, "step": 470 }, { "epoch": 0.014435454211107025, "grad_norm": 1.7686183006058231, "learning_rate": 4.8110316649642495e-06, "loss": 0.9295, "step": 471 }, { "epoch": 0.014466102733848228, "grad_norm": 2.0172468432433868, "learning_rate": 4.821246169560776e-06, "loss": 0.8675, "step": 472 }, { "epoch": 0.014496751256589432, "grad_norm": 2.445774430746069, "learning_rate": 4.831460674157304e-06, "loss": 0.9246, "step": 473 }, { "epoch": 0.014527399779330636, "grad_norm": 2.1357426913859956, "learning_rate": 4.841675178753831e-06, "loss": 0.9038, "step": 474 }, { "epoch": 0.01455804830207184, "grad_norm": 2.1248290218184303, "learning_rate": 4.851889683350358e-06, "loss": 0.8232, "step": 475 }, { "epoch": 0.014588696824813044, "grad_norm": 1.6354946394354082, "learning_rate": 4.862104187946885e-06, "loss": 0.8061, "step": 476 }, { "epoch": 0.014619345347554248, "grad_norm": 2.107530495980384, "learning_rate": 4.872318692543412e-06, "loss": 0.864, "step": 477 }, { "epoch": 0.014649993870295452, "grad_norm": 1.9500495845982395, "learning_rate": 4.8825331971399395e-06, "loss": 0.9313, "step": 478 }, { "epoch": 0.014680642393036656, "grad_norm": 2.10158389519295, "learning_rate": 4.892747701736466e-06, "loss": 0.9878, "step": 479 }, { "epoch": 0.01471129091577786, "grad_norm": 1.91149552888476, "learning_rate": 4.902962206332994e-06, "loss": 0.8771, "step": 480 }, { "epoch": 0.014741939438519064, "grad_norm": 0.543537523390581, "learning_rate": 4.913176710929521e-06, "loss": 0.516, "step": 481 }, { "epoch": 0.014772587961260268, "grad_norm": 1.9893241070360674, "learning_rate": 4.9233912155260474e-06, "loss": 0.9594, "step": 482 }, { "epoch": 0.014803236484001472, "grad_norm": 2.1300071766894937, "learning_rate": 4.933605720122574e-06, "loss": 0.9164, "step": 483 }, { "epoch": 0.014833885006742676, "grad_norm": 2.166251396062244, "learning_rate": 4.943820224719101e-06, "loss": 0.8814, "step": 484 }, { "epoch": 0.01486453352948388, "grad_norm": 2.102850091192083, "learning_rate": 4.954034729315629e-06, "loss": 0.9734, "step": 485 }, { "epoch": 0.014895182052225083, "grad_norm": 0.568095281490009, "learning_rate": 4.964249233912155e-06, "loss": 0.5178, "step": 486 }, { "epoch": 0.014925830574966287, "grad_norm": 1.88901615523601, "learning_rate": 4.974463738508682e-06, "loss": 0.78, "step": 487 }, { "epoch": 0.014956479097707491, "grad_norm": 2.0723070662640377, "learning_rate": 4.98467824310521e-06, "loss": 0.852, "step": 488 }, { "epoch": 0.014987127620448694, "grad_norm": 1.9655243492575294, "learning_rate": 4.994892747701737e-06, "loss": 0.7116, "step": 489 }, { "epoch": 0.015017776143189897, "grad_norm": 2.2398411077215132, "learning_rate": 5.005107252298263e-06, "loss": 1.0113, "step": 490 }, { "epoch": 0.015048424665931101, "grad_norm": 1.8889217889992949, "learning_rate": 5.015321756894791e-06, "loss": 0.9438, "step": 491 }, { "epoch": 0.015079073188672305, "grad_norm": 1.759822390690419, "learning_rate": 5.025536261491319e-06, "loss": 0.9063, "step": 492 }, { "epoch": 0.01510972171141351, "grad_norm": 2.0996950539079537, "learning_rate": 5.0357507660878446e-06, "loss": 0.9302, "step": 493 }, { "epoch": 0.015140370234154713, "grad_norm": 1.8627843779039455, "learning_rate": 5.045965270684372e-06, "loss": 0.9324, "step": 494 }, { "epoch": 0.015171018756895917, "grad_norm": 1.7578319238107563, "learning_rate": 5.0561797752809e-06, "loss": 0.8909, "step": 495 }, { "epoch": 0.015201667279637121, "grad_norm": 1.8977184695306193, "learning_rate": 5.066394279877427e-06, "loss": 0.7489, "step": 496 }, { "epoch": 0.015232315802378325, "grad_norm": 1.835348002045644, "learning_rate": 5.076608784473953e-06, "loss": 0.8502, "step": 497 }, { "epoch": 0.015262964325119529, "grad_norm": 2.129310935304693, "learning_rate": 5.08682328907048e-06, "loss": 0.8493, "step": 498 }, { "epoch": 0.015293612847860733, "grad_norm": 2.199401243837629, "learning_rate": 5.097037793667008e-06, "loss": 0.8996, "step": 499 }, { "epoch": 0.015324261370601937, "grad_norm": 1.9288292676404346, "learning_rate": 5.1072522982635346e-06, "loss": 0.8289, "step": 500 }, { "epoch": 0.01535490989334314, "grad_norm": 1.7739363377212085, "learning_rate": 5.117466802860061e-06, "loss": 0.8509, "step": 501 }, { "epoch": 0.015385558416084345, "grad_norm": 1.9102583282708459, "learning_rate": 5.127681307456589e-06, "loss": 0.8816, "step": 502 }, { "epoch": 0.015416206938825549, "grad_norm": 1.8451463032320548, "learning_rate": 5.137895812053117e-06, "loss": 0.9984, "step": 503 }, { "epoch": 0.015446855461566752, "grad_norm": 2.0220152002335188, "learning_rate": 5.1481103166496425e-06, "loss": 0.8732, "step": 504 }, { "epoch": 0.015477503984307956, "grad_norm": 0.5857398592606914, "learning_rate": 5.15832482124617e-06, "loss": 0.5283, "step": 505 }, { "epoch": 0.01550815250704916, "grad_norm": 1.9611589810925298, "learning_rate": 5.168539325842698e-06, "loss": 0.8763, "step": 506 }, { "epoch": 0.015538801029790364, "grad_norm": 2.0610378700811505, "learning_rate": 5.178753830439224e-06, "loss": 0.8859, "step": 507 }, { "epoch": 0.015569449552531568, "grad_norm": 1.8399965303898145, "learning_rate": 5.188968335035751e-06, "loss": 0.8018, "step": 508 }, { "epoch": 0.015600098075272772, "grad_norm": 1.8290303567674457, "learning_rate": 5.199182839632278e-06, "loss": 0.8051, "step": 509 }, { "epoch": 0.015630746598013976, "grad_norm": 1.8129111819854111, "learning_rate": 5.209397344228805e-06, "loss": 0.8982, "step": 510 }, { "epoch": 0.015661395120755178, "grad_norm": 0.6333930977976151, "learning_rate": 5.2196118488253326e-06, "loss": 0.5337, "step": 511 }, { "epoch": 0.015692043643496384, "grad_norm": 0.5868566013838146, "learning_rate": 5.229826353421859e-06, "loss": 0.5171, "step": 512 }, { "epoch": 0.015722692166237586, "grad_norm": 0.5541816413768101, "learning_rate": 5.240040858018387e-06, "loss": 0.5341, "step": 513 }, { "epoch": 0.015753340688978792, "grad_norm": 2.0713213387217757, "learning_rate": 5.250255362614913e-06, "loss": 0.8317, "step": 514 }, { "epoch": 0.015783989211719994, "grad_norm": 2.3095908339215407, "learning_rate": 5.2604698672114405e-06, "loss": 0.894, "step": 515 }, { "epoch": 0.0158146377344612, "grad_norm": 1.865425716722202, "learning_rate": 5.270684371807968e-06, "loss": 0.8842, "step": 516 }, { "epoch": 0.015845286257202402, "grad_norm": 1.7943742032372818, "learning_rate": 5.280898876404494e-06, "loss": 0.9016, "step": 517 }, { "epoch": 0.015875934779943607, "grad_norm": 2.3440750186278243, "learning_rate": 5.291113381001022e-06, "loss": 0.962, "step": 518 }, { "epoch": 0.01590658330268481, "grad_norm": 1.8606806886799319, "learning_rate": 5.301327885597549e-06, "loss": 0.8097, "step": 519 }, { "epoch": 0.015937231825426015, "grad_norm": 1.8122410907579354, "learning_rate": 5.311542390194075e-06, "loss": 0.9278, "step": 520 }, { "epoch": 0.015967880348167218, "grad_norm": 2.1337753964217154, "learning_rate": 5.321756894790603e-06, "loss": 0.9216, "step": 521 }, { "epoch": 0.015998528870908423, "grad_norm": 1.956001113895032, "learning_rate": 5.3319713993871305e-06, "loss": 0.9154, "step": 522 }, { "epoch": 0.016029177393649625, "grad_norm": 2.483324831466697, "learning_rate": 5.342185903983657e-06, "loss": 0.892, "step": 523 }, { "epoch": 0.01605982591639083, "grad_norm": 1.842526376998299, "learning_rate": 5.352400408580184e-06, "loss": 0.8401, "step": 524 }, { "epoch": 0.016090474439132033, "grad_norm": 1.925866130799891, "learning_rate": 5.362614913176712e-06, "loss": 0.8485, "step": 525 }, { "epoch": 0.01612112296187324, "grad_norm": 2.4402956785702705, "learning_rate": 5.3728294177732385e-06, "loss": 0.8981, "step": 526 }, { "epoch": 0.01615177148461444, "grad_norm": 2.5085857110210767, "learning_rate": 5.383043922369765e-06, "loss": 0.8457, "step": 527 }, { "epoch": 0.016182420007355647, "grad_norm": 1.7543570103649786, "learning_rate": 5.393258426966292e-06, "loss": 0.8243, "step": 528 }, { "epoch": 0.01621306853009685, "grad_norm": 1.7587037789443287, "learning_rate": 5.40347293156282e-06, "loss": 0.8464, "step": 529 }, { "epoch": 0.016243717052838055, "grad_norm": 0.575488101215563, "learning_rate": 5.413687436159347e-06, "loss": 0.5519, "step": 530 }, { "epoch": 0.016274365575579257, "grad_norm": 0.5670739814962448, "learning_rate": 5.423901940755873e-06, "loss": 0.5033, "step": 531 }, { "epoch": 0.016305014098320463, "grad_norm": 1.9917911066531333, "learning_rate": 5.434116445352401e-06, "loss": 0.8805, "step": 532 }, { "epoch": 0.016335662621061665, "grad_norm": 0.5896095964760028, "learning_rate": 5.4443309499489285e-06, "loss": 0.5156, "step": 533 }, { "epoch": 0.01636631114380287, "grad_norm": 2.0562159468124177, "learning_rate": 5.4545454545454545e-06, "loss": 0.8948, "step": 534 }, { "epoch": 0.016396959666544073, "grad_norm": 1.9780797183103236, "learning_rate": 5.464759959141982e-06, "loss": 0.8648, "step": 535 }, { "epoch": 0.016427608189285275, "grad_norm": 2.0675668318241724, "learning_rate": 5.47497446373851e-06, "loss": 0.7532, "step": 536 }, { "epoch": 0.01645825671202648, "grad_norm": 1.7904705219914365, "learning_rate": 5.485188968335036e-06, "loss": 0.9337, "step": 537 }, { "epoch": 0.016488905234767683, "grad_norm": 0.5683448377709516, "learning_rate": 5.495403472931563e-06, "loss": 0.5354, "step": 538 }, { "epoch": 0.01651955375750889, "grad_norm": 1.907771129948236, "learning_rate": 5.50561797752809e-06, "loss": 0.8703, "step": 539 }, { "epoch": 0.01655020228025009, "grad_norm": 1.773192658348204, "learning_rate": 5.515832482124618e-06, "loss": 0.7974, "step": 540 }, { "epoch": 0.016580850802991296, "grad_norm": 2.238294537285421, "learning_rate": 5.5260469867211445e-06, "loss": 0.8918, "step": 541 }, { "epoch": 0.0166114993257325, "grad_norm": 0.555334445429195, "learning_rate": 5.536261491317671e-06, "loss": 0.5269, "step": 542 }, { "epoch": 0.016642147848473704, "grad_norm": 2.0366543682410967, "learning_rate": 5.546475995914199e-06, "loss": 0.8786, "step": 543 }, { "epoch": 0.016672796371214906, "grad_norm": 1.9633408163779988, "learning_rate": 5.556690500510725e-06, "loss": 0.7559, "step": 544 }, { "epoch": 0.016703444893956112, "grad_norm": 1.9574996208436661, "learning_rate": 5.5669050051072524e-06, "loss": 0.872, "step": 545 }, { "epoch": 0.016734093416697314, "grad_norm": 2.1509211777295447, "learning_rate": 5.57711950970378e-06, "loss": 0.8209, "step": 546 }, { "epoch": 0.01676474193943852, "grad_norm": 0.5492554567090584, "learning_rate": 5.587334014300308e-06, "loss": 0.5317, "step": 547 }, { "epoch": 0.016795390462179722, "grad_norm": 1.9749523901289654, "learning_rate": 5.597548518896834e-06, "loss": 0.9256, "step": 548 }, { "epoch": 0.016826038984920928, "grad_norm": 2.1617551328716047, "learning_rate": 5.607763023493361e-06, "loss": 0.8542, "step": 549 }, { "epoch": 0.01685668750766213, "grad_norm": 2.3186098967994457, "learning_rate": 5.617977528089889e-06, "loss": 0.8129, "step": 550 }, { "epoch": 0.016887336030403335, "grad_norm": 2.7178018567511377, "learning_rate": 5.628192032686415e-06, "loss": 0.8714, "step": 551 }, { "epoch": 0.016917984553144538, "grad_norm": 1.903781450290628, "learning_rate": 5.6384065372829424e-06, "loss": 0.9296, "step": 552 }, { "epoch": 0.016948633075885743, "grad_norm": 1.8064491233702502, "learning_rate": 5.648621041879469e-06, "loss": 0.8047, "step": 553 }, { "epoch": 0.016979281598626945, "grad_norm": 2.153209395701494, "learning_rate": 5.658835546475996e-06, "loss": 0.8203, "step": 554 }, { "epoch": 0.01700993012136815, "grad_norm": 1.8697857907249267, "learning_rate": 5.669050051072524e-06, "loss": 0.7934, "step": 555 }, { "epoch": 0.017040578644109353, "grad_norm": 0.557927744006261, "learning_rate": 5.67926455566905e-06, "loss": 0.5272, "step": 556 }, { "epoch": 0.01707122716685056, "grad_norm": 0.5610023818678441, "learning_rate": 5.689479060265578e-06, "loss": 0.5206, "step": 557 }, { "epoch": 0.01710187568959176, "grad_norm": 1.8615025790393729, "learning_rate": 5.699693564862104e-06, "loss": 0.8639, "step": 558 }, { "epoch": 0.017132524212332967, "grad_norm": 0.5608402605395069, "learning_rate": 5.709908069458632e-06, "loss": 0.5027, "step": 559 }, { "epoch": 0.01716317273507417, "grad_norm": 0.5745659698568915, "learning_rate": 5.720122574055159e-06, "loss": 0.5403, "step": 560 }, { "epoch": 0.017193821257815375, "grad_norm": 1.7651769879780133, "learning_rate": 5.730337078651685e-06, "loss": 0.8404, "step": 561 }, { "epoch": 0.017224469780556577, "grad_norm": 2.26725624439447, "learning_rate": 5.740551583248213e-06, "loss": 0.8096, "step": 562 }, { "epoch": 0.017255118303297783, "grad_norm": 2.084371733633681, "learning_rate": 5.7507660878447404e-06, "loss": 0.8917, "step": 563 }, { "epoch": 0.017285766826038985, "grad_norm": 1.928042074848299, "learning_rate": 5.760980592441267e-06, "loss": 0.8964, "step": 564 }, { "epoch": 0.01731641534878019, "grad_norm": 2.093890138411976, "learning_rate": 5.771195097037794e-06, "loss": 0.8366, "step": 565 }, { "epoch": 0.017347063871521393, "grad_norm": 1.6371413177594025, "learning_rate": 5.781409601634322e-06, "loss": 0.8165, "step": 566 }, { "epoch": 0.017377712394262595, "grad_norm": 1.9199928079581916, "learning_rate": 5.791624106230848e-06, "loss": 0.8734, "step": 567 }, { "epoch": 0.0174083609170038, "grad_norm": 1.8657594326654152, "learning_rate": 5.801838610827375e-06, "loss": 0.8894, "step": 568 }, { "epoch": 0.017439009439745003, "grad_norm": 2.196656352197846, "learning_rate": 5.812053115423902e-06, "loss": 0.8787, "step": 569 }, { "epoch": 0.01746965796248621, "grad_norm": 1.9216996159988446, "learning_rate": 5.82226762002043e-06, "loss": 0.8296, "step": 570 }, { "epoch": 0.01750030648522741, "grad_norm": 2.2124620762750125, "learning_rate": 5.832482124616956e-06, "loss": 0.8226, "step": 571 }, { "epoch": 0.017530955007968616, "grad_norm": 1.9751627457103058, "learning_rate": 5.842696629213483e-06, "loss": 0.8323, "step": 572 }, { "epoch": 0.01756160353070982, "grad_norm": 2.2359522327875907, "learning_rate": 5.852911133810011e-06, "loss": 0.8438, "step": 573 }, { "epoch": 0.017592252053451024, "grad_norm": 0.5445881946564597, "learning_rate": 5.863125638406538e-06, "loss": 0.5188, "step": 574 }, { "epoch": 0.017622900576192226, "grad_norm": 0.5536834870286373, "learning_rate": 5.873340143003064e-06, "loss": 0.524, "step": 575 }, { "epoch": 0.017653549098933432, "grad_norm": 2.008080425298695, "learning_rate": 5.883554647599592e-06, "loss": 0.9877, "step": 576 }, { "epoch": 0.017684197621674634, "grad_norm": 2.1521034375183192, "learning_rate": 5.89376915219612e-06, "loss": 0.8771, "step": 577 }, { "epoch": 0.01771484614441584, "grad_norm": 1.7294254921706782, "learning_rate": 5.9039836567926455e-06, "loss": 0.9591, "step": 578 }, { "epoch": 0.017745494667157042, "grad_norm": 2.0475283825806123, "learning_rate": 5.914198161389173e-06, "loss": 0.8356, "step": 579 }, { "epoch": 0.017776143189898248, "grad_norm": 1.9442634240657037, "learning_rate": 5.924412665985701e-06, "loss": 0.8149, "step": 580 }, { "epoch": 0.01780679171263945, "grad_norm": 1.837318905496985, "learning_rate": 5.934627170582227e-06, "loss": 0.9264, "step": 581 }, { "epoch": 0.017837440235380655, "grad_norm": 1.7795045044170033, "learning_rate": 5.944841675178754e-06, "loss": 0.8347, "step": 582 }, { "epoch": 0.017868088758121858, "grad_norm": 2.3397764498464246, "learning_rate": 5.955056179775281e-06, "loss": 0.762, "step": 583 }, { "epoch": 0.017898737280863063, "grad_norm": 1.69309354344748, "learning_rate": 5.965270684371809e-06, "loss": 0.8989, "step": 584 }, { "epoch": 0.017929385803604266, "grad_norm": 2.295350322003683, "learning_rate": 5.9754851889683355e-06, "loss": 0.7588, "step": 585 }, { "epoch": 0.01796003432634547, "grad_norm": 1.926100923136626, "learning_rate": 5.985699693564862e-06, "loss": 0.8426, "step": 586 }, { "epoch": 0.017990682849086673, "grad_norm": 1.9508425916861734, "learning_rate": 5.99591419816139e-06, "loss": 0.8678, "step": 587 }, { "epoch": 0.01802133137182788, "grad_norm": 1.9234326091602365, "learning_rate": 6.006128702757916e-06, "loss": 0.8069, "step": 588 }, { "epoch": 0.01805197989456908, "grad_norm": 2.3203908376363818, "learning_rate": 6.0163432073544435e-06, "loss": 0.9011, "step": 589 }, { "epoch": 0.018082628417310287, "grad_norm": 1.779145771755853, "learning_rate": 6.026557711950971e-06, "loss": 0.8598, "step": 590 }, { "epoch": 0.01811327694005149, "grad_norm": 1.8799488200798677, "learning_rate": 6.036772216547499e-06, "loss": 0.8752, "step": 591 }, { "epoch": 0.018143925462792695, "grad_norm": 1.9006802969308034, "learning_rate": 6.046986721144025e-06, "loss": 0.9556, "step": 592 }, { "epoch": 0.018174573985533897, "grad_norm": 1.9091180453247056, "learning_rate": 6.057201225740552e-06, "loss": 0.8454, "step": 593 }, { "epoch": 0.018205222508275103, "grad_norm": 0.5207668160019384, "learning_rate": 6.06741573033708e-06, "loss": 0.5071, "step": 594 }, { "epoch": 0.018235871031016305, "grad_norm": 0.5460861170995411, "learning_rate": 6.077630234933606e-06, "loss": 0.5014, "step": 595 }, { "epoch": 0.01826651955375751, "grad_norm": 1.9306341637004734, "learning_rate": 6.0878447395301335e-06, "loss": 0.9596, "step": 596 }, { "epoch": 0.018297168076498713, "grad_norm": 1.9660115096534818, "learning_rate": 6.09805924412666e-06, "loss": 0.7843, "step": 597 }, { "epoch": 0.01832781659923992, "grad_norm": 0.5351122741969688, "learning_rate": 6.108273748723187e-06, "loss": 0.5065, "step": 598 }, { "epoch": 0.01835846512198112, "grad_norm": 1.87744172801568, "learning_rate": 6.118488253319715e-06, "loss": 0.8689, "step": 599 }, { "epoch": 0.018389113644722323, "grad_norm": 2.126164367651868, "learning_rate": 6.1287027579162415e-06, "loss": 0.8936, "step": 600 }, { "epoch": 0.01841976216746353, "grad_norm": 2.1521103624297653, "learning_rate": 6.138917262512769e-06, "loss": 0.8064, "step": 601 }, { "epoch": 0.01845041069020473, "grad_norm": 2.1362943669445453, "learning_rate": 6.149131767109295e-06, "loss": 0.9492, "step": 602 }, { "epoch": 0.018481059212945936, "grad_norm": 1.7887513315054717, "learning_rate": 6.159346271705823e-06, "loss": 0.8729, "step": 603 }, { "epoch": 0.01851170773568714, "grad_norm": 2.013042420699516, "learning_rate": 6.16956077630235e-06, "loss": 0.8256, "step": 604 }, { "epoch": 0.018542356258428344, "grad_norm": 1.9414707511000906, "learning_rate": 6.179775280898876e-06, "loss": 0.8059, "step": 605 }, { "epoch": 0.018573004781169546, "grad_norm": 2.1168588975859453, "learning_rate": 6.189989785495404e-06, "loss": 0.8445, "step": 606 }, { "epoch": 0.018603653303910752, "grad_norm": 2.301171325558875, "learning_rate": 6.2002042900919315e-06, "loss": 0.831, "step": 607 }, { "epoch": 0.018634301826651954, "grad_norm": 0.551188254559654, "learning_rate": 6.210418794688458e-06, "loss": 0.5253, "step": 608 }, { "epoch": 0.01866495034939316, "grad_norm": 2.478055375330621, "learning_rate": 6.220633299284985e-06, "loss": 0.7193, "step": 609 }, { "epoch": 0.018695598872134362, "grad_norm": 0.5363186815354817, "learning_rate": 6.230847803881513e-06, "loss": 0.5175, "step": 610 }, { "epoch": 0.018726247394875568, "grad_norm": 1.4787047971243132, "learning_rate": 6.2410623084780395e-06, "loss": 0.7467, "step": 611 }, { "epoch": 0.01875689591761677, "grad_norm": 0.7210931095934286, "learning_rate": 6.251276813074566e-06, "loss": 0.5085, "step": 612 }, { "epoch": 0.018787544440357976, "grad_norm": 1.9931766359423433, "learning_rate": 6.261491317671093e-06, "loss": 0.8156, "step": 613 }, { "epoch": 0.018818192963099178, "grad_norm": 2.039801186912655, "learning_rate": 6.271705822267621e-06, "loss": 0.9485, "step": 614 }, { "epoch": 0.018848841485840383, "grad_norm": 0.5504457083555784, "learning_rate": 6.2819203268641474e-06, "loss": 0.537, "step": 615 }, { "epoch": 0.018879490008581586, "grad_norm": 1.9230258041513475, "learning_rate": 6.292134831460674e-06, "loss": 0.8627, "step": 616 }, { "epoch": 0.01891013853132279, "grad_norm": 0.5457896930219261, "learning_rate": 6.302349336057202e-06, "loss": 0.4934, "step": 617 }, { "epoch": 0.018940787054063993, "grad_norm": 0.5550613073248056, "learning_rate": 6.3125638406537295e-06, "loss": 0.5186, "step": 618 }, { "epoch": 0.0189714355768052, "grad_norm": 1.8485514674307655, "learning_rate": 6.322778345250255e-06, "loss": 0.9168, "step": 619 }, { "epoch": 0.0190020840995464, "grad_norm": 2.077408172536789, "learning_rate": 6.332992849846783e-06, "loss": 0.7233, "step": 620 }, { "epoch": 0.019032732622287607, "grad_norm": 0.5192719079304288, "learning_rate": 6.343207354443311e-06, "loss": 0.5062, "step": 621 }, { "epoch": 0.01906338114502881, "grad_norm": 1.9700289765115364, "learning_rate": 6.353421859039837e-06, "loss": 0.8721, "step": 622 }, { "epoch": 0.019094029667770015, "grad_norm": 2.162321518848782, "learning_rate": 6.363636363636364e-06, "loss": 0.9119, "step": 623 }, { "epoch": 0.019124678190511217, "grad_norm": 2.0468997696623954, "learning_rate": 6.373850868232892e-06, "loss": 0.9332, "step": 624 }, { "epoch": 0.019155326713252423, "grad_norm": 0.5225730313842495, "learning_rate": 6.384065372829419e-06, "loss": 0.51, "step": 625 }, { "epoch": 0.019185975235993625, "grad_norm": 0.5467874051107885, "learning_rate": 6.3942798774259454e-06, "loss": 0.5081, "step": 626 }, { "epoch": 0.01921662375873483, "grad_norm": 2.2080784170663663, "learning_rate": 6.404494382022472e-06, "loss": 0.8755, "step": 627 }, { "epoch": 0.019247272281476033, "grad_norm": 1.8904860254929559, "learning_rate": 6.414708886619e-06, "loss": 0.7654, "step": 628 }, { "epoch": 0.01927792080421724, "grad_norm": 2.009692067085428, "learning_rate": 6.424923391215527e-06, "loss": 0.8441, "step": 629 }, { "epoch": 0.01930856932695844, "grad_norm": 1.8173623707970978, "learning_rate": 6.435137895812053e-06, "loss": 0.9085, "step": 630 }, { "epoch": 0.019339217849699643, "grad_norm": 1.8192967127034696, "learning_rate": 6.445352400408581e-06, "loss": 0.8127, "step": 631 }, { "epoch": 0.01936986637244085, "grad_norm": 0.553976258245407, "learning_rate": 6.455566905005107e-06, "loss": 0.5074, "step": 632 }, { "epoch": 0.01940051489518205, "grad_norm": 1.9388355229544745, "learning_rate": 6.465781409601635e-06, "loss": 0.8891, "step": 633 }, { "epoch": 0.019431163417923256, "grad_norm": 1.7770325062771104, "learning_rate": 6.475995914198162e-06, "loss": 0.869, "step": 634 }, { "epoch": 0.01946181194066446, "grad_norm": 1.9154132961016073, "learning_rate": 6.48621041879469e-06, "loss": 0.8375, "step": 635 }, { "epoch": 0.019492460463405664, "grad_norm": 1.9518018612916859, "learning_rate": 6.496424923391216e-06, "loss": 0.7816, "step": 636 }, { "epoch": 0.019523108986146866, "grad_norm": 1.8952086276094744, "learning_rate": 6.506639427987743e-06, "loss": 0.8081, "step": 637 }, { "epoch": 0.019553757508888072, "grad_norm": 2.13076518349875, "learning_rate": 6.51685393258427e-06, "loss": 0.9703, "step": 638 }, { "epoch": 0.019584406031629274, "grad_norm": 1.6916220990363742, "learning_rate": 6.527068437180797e-06, "loss": 0.7828, "step": 639 }, { "epoch": 0.01961505455437048, "grad_norm": 1.7687473266726281, "learning_rate": 6.537282941777325e-06, "loss": 0.7547, "step": 640 }, { "epoch": 0.019645703077111682, "grad_norm": 0.5421757493210981, "learning_rate": 6.547497446373851e-06, "loss": 0.5162, "step": 641 }, { "epoch": 0.019676351599852888, "grad_norm": 0.5540357906500841, "learning_rate": 6.557711950970378e-06, "loss": 0.525, "step": 642 }, { "epoch": 0.01970700012259409, "grad_norm": 1.8103467462475653, "learning_rate": 6.567926455566905e-06, "loss": 0.8489, "step": 643 }, { "epoch": 0.019737648645335296, "grad_norm": 1.8651472379259724, "learning_rate": 6.5781409601634326e-06, "loss": 0.8358, "step": 644 }, { "epoch": 0.019768297168076498, "grad_norm": 1.8208090380904645, "learning_rate": 6.58835546475996e-06, "loss": 0.83, "step": 645 }, { "epoch": 0.019798945690817703, "grad_norm": 2.00902142945766, "learning_rate": 6.598569969356486e-06, "loss": 0.7601, "step": 646 }, { "epoch": 0.019829594213558906, "grad_norm": 1.7617258290066449, "learning_rate": 6.608784473953014e-06, "loss": 0.8019, "step": 647 }, { "epoch": 0.01986024273630011, "grad_norm": 1.9473345235935922, "learning_rate": 6.618998978549541e-06, "loss": 0.8472, "step": 648 }, { "epoch": 0.019890891259041314, "grad_norm": 1.8650128887549653, "learning_rate": 6.629213483146067e-06, "loss": 0.9337, "step": 649 }, { "epoch": 0.01992153978178252, "grad_norm": 2.311279742107757, "learning_rate": 6.639427987742595e-06, "loss": 0.9143, "step": 650 }, { "epoch": 0.01995218830452372, "grad_norm": 1.987902615124884, "learning_rate": 6.649642492339123e-06, "loss": 0.9044, "step": 651 }, { "epoch": 0.019982836827264927, "grad_norm": 2.065818207381676, "learning_rate": 6.659856996935649e-06, "loss": 0.8565, "step": 652 }, { "epoch": 0.02001348535000613, "grad_norm": 2.1096568416516996, "learning_rate": 6.670071501532176e-06, "loss": 0.8016, "step": 653 }, { "epoch": 0.020044133872747335, "grad_norm": 0.5436203387283182, "learning_rate": 6.680286006128704e-06, "loss": 0.4932, "step": 654 }, { "epoch": 0.020074782395488537, "grad_norm": 1.706262241476072, "learning_rate": 6.6905005107252305e-06, "loss": 0.8438, "step": 655 }, { "epoch": 0.020105430918229743, "grad_norm": 1.6390381969517807, "learning_rate": 6.700715015321757e-06, "loss": 0.8424, "step": 656 }, { "epoch": 0.020136079440970945, "grad_norm": 1.888106548241186, "learning_rate": 6.710929519918284e-06, "loss": 0.9078, "step": 657 }, { "epoch": 0.02016672796371215, "grad_norm": 1.8626901587181681, "learning_rate": 6.721144024514812e-06, "loss": 0.8491, "step": 658 }, { "epoch": 0.020197376486453353, "grad_norm": 1.7777434057825716, "learning_rate": 6.7313585291113385e-06, "loss": 0.8224, "step": 659 }, { "epoch": 0.02022802500919456, "grad_norm": 1.8536794590910795, "learning_rate": 6.741573033707865e-06, "loss": 0.8962, "step": 660 }, { "epoch": 0.02025867353193576, "grad_norm": 1.612658868935164, "learning_rate": 6.751787538304393e-06, "loss": 0.8707, "step": 661 }, { "epoch": 0.020289322054676963, "grad_norm": 2.039805793263006, "learning_rate": 6.7620020429009206e-06, "loss": 0.8128, "step": 662 }, { "epoch": 0.02031997057741817, "grad_norm": 1.6207460890131624, "learning_rate": 6.7722165474974465e-06, "loss": 0.7913, "step": 663 }, { "epoch": 0.02035061910015937, "grad_norm": 0.5570721449040524, "learning_rate": 6.782431052093974e-06, "loss": 0.5159, "step": 664 }, { "epoch": 0.020381267622900576, "grad_norm": 1.9940390054681554, "learning_rate": 6.792645556690502e-06, "loss": 0.893, "step": 665 }, { "epoch": 0.02041191614564178, "grad_norm": 1.7123209835943054, "learning_rate": 6.802860061287028e-06, "loss": 0.8004, "step": 666 }, { "epoch": 0.020442564668382984, "grad_norm": 1.925160318964607, "learning_rate": 6.813074565883555e-06, "loss": 0.8522, "step": 667 }, { "epoch": 0.020473213191124186, "grad_norm": 2.12286257621427, "learning_rate": 6.823289070480082e-06, "loss": 0.8038, "step": 668 }, { "epoch": 0.020503861713865392, "grad_norm": 2.163351290419092, "learning_rate": 6.83350357507661e-06, "loss": 0.8597, "step": 669 }, { "epoch": 0.020534510236606594, "grad_norm": 1.9937187051609984, "learning_rate": 6.8437180796731365e-06, "loss": 0.8025, "step": 670 }, { "epoch": 0.0205651587593478, "grad_norm": 1.8057293765617477, "learning_rate": 6.853932584269663e-06, "loss": 0.7931, "step": 671 }, { "epoch": 0.020595807282089002, "grad_norm": 1.8449061406803715, "learning_rate": 6.864147088866191e-06, "loss": 0.8611, "step": 672 }, { "epoch": 0.020626455804830208, "grad_norm": 1.9221899708574617, "learning_rate": 6.874361593462717e-06, "loss": 0.8592, "step": 673 }, { "epoch": 0.02065710432757141, "grad_norm": 2.014336306160668, "learning_rate": 6.8845760980592445e-06, "loss": 0.7888, "step": 674 }, { "epoch": 0.020687752850312616, "grad_norm": 1.966477300127472, "learning_rate": 6.894790602655772e-06, "loss": 0.744, "step": 675 }, { "epoch": 0.020718401373053818, "grad_norm": 1.847530277782276, "learning_rate": 6.905005107252298e-06, "loss": 0.8748, "step": 676 }, { "epoch": 0.020749049895795024, "grad_norm": 1.839439298840963, "learning_rate": 6.915219611848826e-06, "loss": 0.8763, "step": 677 }, { "epoch": 0.020779698418536226, "grad_norm": 1.9053622026874504, "learning_rate": 6.925434116445353e-06, "loss": 0.848, "step": 678 }, { "epoch": 0.02081034694127743, "grad_norm": 1.831296113253139, "learning_rate": 6.935648621041881e-06, "loss": 0.8104, "step": 679 }, { "epoch": 0.020840995464018634, "grad_norm": 2.002348003041827, "learning_rate": 6.945863125638407e-06, "loss": 0.8154, "step": 680 }, { "epoch": 0.02087164398675984, "grad_norm": 1.9971566521600397, "learning_rate": 6.9560776302349345e-06, "loss": 0.7922, "step": 681 }, { "epoch": 0.02090229250950104, "grad_norm": 1.8679872375432172, "learning_rate": 6.966292134831461e-06, "loss": 0.8407, "step": 682 }, { "epoch": 0.020932941032242247, "grad_norm": 1.708822246706035, "learning_rate": 6.976506639427988e-06, "loss": 0.7742, "step": 683 }, { "epoch": 0.02096358955498345, "grad_norm": 2.2846873714202087, "learning_rate": 6.986721144024516e-06, "loss": 0.8583, "step": 684 }, { "epoch": 0.020994238077724655, "grad_norm": 2.1144226394458014, "learning_rate": 6.9969356486210425e-06, "loss": 0.8558, "step": 685 }, { "epoch": 0.021024886600465857, "grad_norm": 1.6192842610501847, "learning_rate": 7.007150153217569e-06, "loss": 0.8299, "step": 686 }, { "epoch": 0.021055535123207063, "grad_norm": 1.6184836291042932, "learning_rate": 7.017364657814096e-06, "loss": 0.8576, "step": 687 }, { "epoch": 0.021086183645948265, "grad_norm": 2.0737422582265626, "learning_rate": 7.027579162410624e-06, "loss": 0.7787, "step": 688 }, { "epoch": 0.02111683216868947, "grad_norm": 1.8377259312205625, "learning_rate": 7.037793667007151e-06, "loss": 0.7659, "step": 689 }, { "epoch": 0.021147480691430673, "grad_norm": 1.9791446363935852, "learning_rate": 7.048008171603677e-06, "loss": 0.8097, "step": 690 }, { "epoch": 0.02117812921417188, "grad_norm": 1.8828781331359472, "learning_rate": 7.058222676200205e-06, "loss": 0.8507, "step": 691 }, { "epoch": 0.02120877773691308, "grad_norm": 0.5810726484637111, "learning_rate": 7.0684371807967325e-06, "loss": 0.5243, "step": 692 }, { "epoch": 0.021239426259654283, "grad_norm": 2.0991853072626023, "learning_rate": 7.078651685393258e-06, "loss": 0.8315, "step": 693 }, { "epoch": 0.02127007478239549, "grad_norm": 1.807350405241431, "learning_rate": 7.088866189989786e-06, "loss": 0.8509, "step": 694 }, { "epoch": 0.02130072330513669, "grad_norm": 1.7858375838901308, "learning_rate": 7.099080694586314e-06, "loss": 0.7849, "step": 695 }, { "epoch": 0.021331371827877896, "grad_norm": 2.1835042942575704, "learning_rate": 7.1092951991828404e-06, "loss": 0.8394, "step": 696 }, { "epoch": 0.0213620203506191, "grad_norm": 1.9961704353757486, "learning_rate": 7.119509703779367e-06, "loss": 0.8584, "step": 697 }, { "epoch": 0.021392668873360304, "grad_norm": 1.639612865243043, "learning_rate": 7.129724208375894e-06, "loss": 0.762, "step": 698 }, { "epoch": 0.021423317396101507, "grad_norm": 0.5281834514569487, "learning_rate": 7.139938712972422e-06, "loss": 0.512, "step": 699 }, { "epoch": 0.021453965918842712, "grad_norm": 1.8055169899859167, "learning_rate": 7.150153217568948e-06, "loss": 0.8364, "step": 700 }, { "epoch": 0.021484614441583914, "grad_norm": 2.0189339913548916, "learning_rate": 7.160367722165475e-06, "loss": 0.8247, "step": 701 }, { "epoch": 0.02151526296432512, "grad_norm": 1.8806073951688116, "learning_rate": 7.170582226762003e-06, "loss": 0.838, "step": 702 }, { "epoch": 0.021545911487066322, "grad_norm": 1.7513575422569168, "learning_rate": 7.180796731358529e-06, "loss": 0.8979, "step": 703 }, { "epoch": 0.021576560009807528, "grad_norm": 1.905632994257091, "learning_rate": 7.191011235955056e-06, "loss": 0.7667, "step": 704 }, { "epoch": 0.02160720853254873, "grad_norm": 1.9163301913879012, "learning_rate": 7.201225740551584e-06, "loss": 0.6957, "step": 705 }, { "epoch": 0.021637857055289936, "grad_norm": 2.074851930003208, "learning_rate": 7.211440245148112e-06, "loss": 0.9019, "step": 706 }, { "epoch": 0.021668505578031138, "grad_norm": 1.675860698585072, "learning_rate": 7.2216547497446376e-06, "loss": 0.8312, "step": 707 }, { "epoch": 0.021699154100772344, "grad_norm": 1.7525754845935366, "learning_rate": 7.231869254341165e-06, "loss": 0.8849, "step": 708 }, { "epoch": 0.021729802623513546, "grad_norm": 1.875241793081149, "learning_rate": 7.242083758937693e-06, "loss": 0.8682, "step": 709 }, { "epoch": 0.02176045114625475, "grad_norm": 1.8674158634893099, "learning_rate": 7.252298263534219e-06, "loss": 0.8513, "step": 710 }, { "epoch": 0.021791099668995954, "grad_norm": 2.197414643374834, "learning_rate": 7.262512768130746e-06, "loss": 0.8789, "step": 711 }, { "epoch": 0.02182174819173716, "grad_norm": 1.7383674345668285, "learning_rate": 7.272727272727273e-06, "loss": 0.8076, "step": 712 }, { "epoch": 0.02185239671447836, "grad_norm": 1.750524119250876, "learning_rate": 7.282941777323801e-06, "loss": 0.801, "step": 713 }, { "epoch": 0.021883045237219567, "grad_norm": 1.9471108721896813, "learning_rate": 7.293156281920328e-06, "loss": 0.8128, "step": 714 }, { "epoch": 0.02191369375996077, "grad_norm": 2.095064803997909, "learning_rate": 7.303370786516854e-06, "loss": 0.9069, "step": 715 }, { "epoch": 0.021944342282701975, "grad_norm": 1.6055626467241104, "learning_rate": 7.313585291113382e-06, "loss": 0.8017, "step": 716 }, { "epoch": 0.021974990805443177, "grad_norm": 1.8866048419569919, "learning_rate": 7.323799795709908e-06, "loss": 0.9156, "step": 717 }, { "epoch": 0.022005639328184383, "grad_norm": 1.85828475879991, "learning_rate": 7.3340143003064355e-06, "loss": 0.8656, "step": 718 }, { "epoch": 0.022036287850925585, "grad_norm": 2.006208940990668, "learning_rate": 7.344228804902963e-06, "loss": 0.8788, "step": 719 }, { "epoch": 0.02206693637366679, "grad_norm": 2.0758228496441493, "learning_rate": 7.354443309499489e-06, "loss": 0.8086, "step": 720 }, { "epoch": 0.022097584896407993, "grad_norm": 2.109639529124265, "learning_rate": 7.364657814096017e-06, "loss": 0.7956, "step": 721 }, { "epoch": 0.0221282334191492, "grad_norm": 1.9733160817281645, "learning_rate": 7.374872318692544e-06, "loss": 0.8089, "step": 722 }, { "epoch": 0.0221588819418904, "grad_norm": 1.8970510840262622, "learning_rate": 7.385086823289071e-06, "loss": 0.8518, "step": 723 }, { "epoch": 0.022189530464631606, "grad_norm": 1.7934252161104018, "learning_rate": 7.395301327885598e-06, "loss": 0.7675, "step": 724 }, { "epoch": 0.02222017898737281, "grad_norm": 2.012434760497623, "learning_rate": 7.4055158324821256e-06, "loss": 0.8661, "step": 725 }, { "epoch": 0.02225082751011401, "grad_norm": 2.1976432118072493, "learning_rate": 7.415730337078652e-06, "loss": 0.9339, "step": 726 }, { "epoch": 0.022281476032855217, "grad_norm": 0.5559066805733248, "learning_rate": 7.425944841675179e-06, "loss": 0.5301, "step": 727 }, { "epoch": 0.02231212455559642, "grad_norm": 1.9251265267135265, "learning_rate": 7.436159346271706e-06, "loss": 0.879, "step": 728 }, { "epoch": 0.022342773078337624, "grad_norm": 1.677833203235899, "learning_rate": 7.4463738508682335e-06, "loss": 0.8049, "step": 729 }, { "epoch": 0.022373421601078827, "grad_norm": 2.1748338523565653, "learning_rate": 7.456588355464761e-06, "loss": 0.7996, "step": 730 }, { "epoch": 0.022404070123820032, "grad_norm": 1.6337888774698437, "learning_rate": 7.466802860061287e-06, "loss": 0.8018, "step": 731 }, { "epoch": 0.022434718646561234, "grad_norm": 2.0407689372328384, "learning_rate": 7.477017364657815e-06, "loss": 0.8475, "step": 732 }, { "epoch": 0.02246536716930244, "grad_norm": 2.307791921021635, "learning_rate": 7.487231869254342e-06, "loss": 0.7958, "step": 733 }, { "epoch": 0.022496015692043642, "grad_norm": 1.7250241680055403, "learning_rate": 7.497446373850868e-06, "loss": 0.8551, "step": 734 }, { "epoch": 0.022526664214784848, "grad_norm": 1.9486154718860398, "learning_rate": 7.507660878447396e-06, "loss": 0.7753, "step": 735 }, { "epoch": 0.02255731273752605, "grad_norm": 0.5530326724327148, "learning_rate": 7.5178753830439235e-06, "loss": 0.5108, "step": 736 }, { "epoch": 0.022587961260267256, "grad_norm": 1.7111908720908893, "learning_rate": 7.5280898876404495e-06, "loss": 0.8997, "step": 737 }, { "epoch": 0.022618609783008458, "grad_norm": 1.8991968499335965, "learning_rate": 7.538304392236977e-06, "loss": 0.865, "step": 738 }, { "epoch": 0.022649258305749664, "grad_norm": 2.1194725927641622, "learning_rate": 7.548518896833505e-06, "loss": 0.7754, "step": 739 }, { "epoch": 0.022679906828490866, "grad_norm": 1.780004953126006, "learning_rate": 7.5587334014300315e-06, "loss": 0.6947, "step": 740 }, { "epoch": 0.02271055535123207, "grad_norm": 0.5110770391917615, "learning_rate": 7.568947906026558e-06, "loss": 0.4964, "step": 741 }, { "epoch": 0.022741203873973274, "grad_norm": 1.8710278112458736, "learning_rate": 7.579162410623085e-06, "loss": 0.8526, "step": 742 }, { "epoch": 0.02277185239671448, "grad_norm": 1.7094524095762038, "learning_rate": 7.589376915219613e-06, "loss": 0.836, "step": 743 }, { "epoch": 0.02280250091945568, "grad_norm": 1.967251699450419, "learning_rate": 7.5995914198161395e-06, "loss": 0.9215, "step": 744 }, { "epoch": 0.022833149442196887, "grad_norm": 1.839224962992188, "learning_rate": 7.609805924412666e-06, "loss": 0.8384, "step": 745 }, { "epoch": 0.02286379796493809, "grad_norm": 2.0955350776881345, "learning_rate": 7.620020429009194e-06, "loss": 0.9043, "step": 746 }, { "epoch": 0.022894446487679295, "grad_norm": 1.9537656854079166, "learning_rate": 7.63023493360572e-06, "loss": 0.841, "step": 747 }, { "epoch": 0.022925095010420497, "grad_norm": 1.7328449943584752, "learning_rate": 7.640449438202247e-06, "loss": 0.7377, "step": 748 }, { "epoch": 0.022955743533161703, "grad_norm": 1.6989562326682912, "learning_rate": 7.650663942798775e-06, "loss": 0.8266, "step": 749 }, { "epoch": 0.022986392055902905, "grad_norm": 1.8364585360349386, "learning_rate": 7.660878447395303e-06, "loss": 0.8617, "step": 750 }, { "epoch": 0.02301704057864411, "grad_norm": 1.7911781383097638, "learning_rate": 7.671092951991829e-06, "loss": 0.8201, "step": 751 }, { "epoch": 0.023047689101385313, "grad_norm": 1.9565063091770267, "learning_rate": 7.681307456588356e-06, "loss": 0.8751, "step": 752 }, { "epoch": 0.02307833762412652, "grad_norm": 1.7308024485177054, "learning_rate": 7.691521961184884e-06, "loss": 0.7892, "step": 753 }, { "epoch": 0.02310898614686772, "grad_norm": 2.0111664636825783, "learning_rate": 7.70173646578141e-06, "loss": 0.8059, "step": 754 }, { "epoch": 0.023139634669608927, "grad_norm": 2.2086998952867254, "learning_rate": 7.711950970377937e-06, "loss": 0.8257, "step": 755 }, { "epoch": 0.02317028319235013, "grad_norm": 2.094089942243336, "learning_rate": 7.722165474974465e-06, "loss": 0.7702, "step": 756 }, { "epoch": 0.02320093171509133, "grad_norm": 1.9891995948066319, "learning_rate": 7.732379979570993e-06, "loss": 0.7416, "step": 757 }, { "epoch": 0.023231580237832537, "grad_norm": 2.149882034941162, "learning_rate": 7.742594484167519e-06, "loss": 0.8063, "step": 758 }, { "epoch": 0.02326222876057374, "grad_norm": 1.723356755096407, "learning_rate": 7.752808988764046e-06, "loss": 0.7609, "step": 759 }, { "epoch": 0.023292877283314944, "grad_norm": 1.8932783990417283, "learning_rate": 7.763023493360572e-06, "loss": 0.9157, "step": 760 }, { "epoch": 0.023323525806056147, "grad_norm": 1.9942220399757093, "learning_rate": 7.7732379979571e-06, "loss": 0.7821, "step": 761 }, { "epoch": 0.023354174328797352, "grad_norm": 1.7488269366570577, "learning_rate": 7.783452502553627e-06, "loss": 0.8463, "step": 762 }, { "epoch": 0.023384822851538555, "grad_norm": 2.0287523020390443, "learning_rate": 7.793667007150153e-06, "loss": 0.9375, "step": 763 }, { "epoch": 0.02341547137427976, "grad_norm": 0.5932027199313892, "learning_rate": 7.803881511746681e-06, "loss": 0.4932, "step": 764 }, { "epoch": 0.023446119897020962, "grad_norm": 1.7414584731910114, "learning_rate": 7.814096016343207e-06, "loss": 0.8114, "step": 765 }, { "epoch": 0.023476768419762168, "grad_norm": 0.5632596130149216, "learning_rate": 7.824310520939735e-06, "loss": 0.5125, "step": 766 }, { "epoch": 0.02350741694250337, "grad_norm": 2.1158854374604386, "learning_rate": 7.834525025536262e-06, "loss": 0.9361, "step": 767 }, { "epoch": 0.023538065465244576, "grad_norm": 1.5982414833498984, "learning_rate": 7.844739530132788e-06, "loss": 0.7992, "step": 768 }, { "epoch": 0.023568713987985778, "grad_norm": 1.9190471746785651, "learning_rate": 7.854954034729316e-06, "loss": 0.7614, "step": 769 }, { "epoch": 0.023599362510726984, "grad_norm": 1.7946835187670136, "learning_rate": 7.865168539325843e-06, "loss": 0.7205, "step": 770 }, { "epoch": 0.023630011033468186, "grad_norm": 1.9100309239799178, "learning_rate": 7.87538304392237e-06, "loss": 0.7677, "step": 771 }, { "epoch": 0.02366065955620939, "grad_norm": 1.8606765691169986, "learning_rate": 7.885597548518897e-06, "loss": 0.9131, "step": 772 }, { "epoch": 0.023691308078950594, "grad_norm": 0.5079636284444401, "learning_rate": 7.895812053115425e-06, "loss": 0.4914, "step": 773 }, { "epoch": 0.0237219566016918, "grad_norm": 1.746436796301612, "learning_rate": 7.906026557711952e-06, "loss": 0.866, "step": 774 }, { "epoch": 0.023752605124433, "grad_norm": 1.899399888421003, "learning_rate": 7.916241062308478e-06, "loss": 0.7989, "step": 775 }, { "epoch": 0.023783253647174207, "grad_norm": 1.8695404696356697, "learning_rate": 7.926455566905006e-06, "loss": 0.8754, "step": 776 }, { "epoch": 0.02381390216991541, "grad_norm": 1.782164630949393, "learning_rate": 7.936670071501533e-06, "loss": 0.7633, "step": 777 }, { "epoch": 0.023844550692656615, "grad_norm": 1.9768825108033226, "learning_rate": 7.94688457609806e-06, "loss": 0.8082, "step": 778 }, { "epoch": 0.023875199215397817, "grad_norm": 1.8914589485247815, "learning_rate": 7.957099080694587e-06, "loss": 0.8906, "step": 779 }, { "epoch": 0.023905847738139023, "grad_norm": 2.4265899136327147, "learning_rate": 7.967313585291115e-06, "loss": 0.7106, "step": 780 }, { "epoch": 0.023936496260880225, "grad_norm": 0.5465020871527319, "learning_rate": 7.97752808988764e-06, "loss": 0.5005, "step": 781 }, { "epoch": 0.02396714478362143, "grad_norm": 1.6826177734714154, "learning_rate": 7.987742594484168e-06, "loss": 0.7758, "step": 782 }, { "epoch": 0.023997793306362633, "grad_norm": 1.9826100709680052, "learning_rate": 7.997957099080696e-06, "loss": 0.8343, "step": 783 }, { "epoch": 0.02402844182910384, "grad_norm": 2.3499848103083405, "learning_rate": 8.008171603677223e-06, "loss": 0.8892, "step": 784 }, { "epoch": 0.02405909035184504, "grad_norm": 1.7054302012095082, "learning_rate": 8.01838610827375e-06, "loss": 0.705, "step": 785 }, { "epoch": 0.024089738874586247, "grad_norm": 1.9400869433884222, "learning_rate": 8.028600612870277e-06, "loss": 0.8404, "step": 786 }, { "epoch": 0.02412038739732745, "grad_norm": 1.9217834151726938, "learning_rate": 8.038815117466805e-06, "loss": 0.8132, "step": 787 }, { "epoch": 0.02415103592006865, "grad_norm": 1.9338265168700095, "learning_rate": 8.04902962206333e-06, "loss": 0.8989, "step": 788 }, { "epoch": 0.024181684442809857, "grad_norm": 1.8747853271414212, "learning_rate": 8.059244126659858e-06, "loss": 0.8729, "step": 789 }, { "epoch": 0.02421233296555106, "grad_norm": 1.620765079340608, "learning_rate": 8.069458631256384e-06, "loss": 0.8825, "step": 790 }, { "epoch": 0.024242981488292265, "grad_norm": 0.5268002158390852, "learning_rate": 8.079673135852912e-06, "loss": 0.5119, "step": 791 }, { "epoch": 0.024273630011033467, "grad_norm": 1.8612436890458226, "learning_rate": 8.08988764044944e-06, "loss": 0.8426, "step": 792 }, { "epoch": 0.024304278533774672, "grad_norm": 1.7105581449961866, "learning_rate": 8.100102145045965e-06, "loss": 0.8719, "step": 793 }, { "epoch": 0.024334927056515875, "grad_norm": 1.852707211042306, "learning_rate": 8.110316649642493e-06, "loss": 0.8147, "step": 794 }, { "epoch": 0.02436557557925708, "grad_norm": 1.858875991099887, "learning_rate": 8.120531154239019e-06, "loss": 0.8358, "step": 795 }, { "epoch": 0.024396224101998282, "grad_norm": 0.5207769818133171, "learning_rate": 8.130745658835547e-06, "loss": 0.4841, "step": 796 }, { "epoch": 0.024426872624739488, "grad_norm": 1.7501182483472166, "learning_rate": 8.140960163432074e-06, "loss": 0.8387, "step": 797 }, { "epoch": 0.02445752114748069, "grad_norm": 1.707897320134225, "learning_rate": 8.1511746680286e-06, "loss": 0.7855, "step": 798 }, { "epoch": 0.024488169670221896, "grad_norm": 1.8639027162126132, "learning_rate": 8.161389172625128e-06, "loss": 0.81, "step": 799 }, { "epoch": 0.024518818192963098, "grad_norm": 1.8279924713513735, "learning_rate": 8.171603677221655e-06, "loss": 0.8209, "step": 800 }, { "epoch": 0.024549466715704304, "grad_norm": 1.7988673949466147, "learning_rate": 8.181818181818183e-06, "loss": 0.8518, "step": 801 }, { "epoch": 0.024580115238445506, "grad_norm": 1.894081211812177, "learning_rate": 8.192032686414709e-06, "loss": 0.7952, "step": 802 }, { "epoch": 0.02461076376118671, "grad_norm": 1.963208202919834, "learning_rate": 8.202247191011237e-06, "loss": 0.8666, "step": 803 }, { "epoch": 0.024641412283927914, "grad_norm": 1.9505408098232953, "learning_rate": 8.212461695607764e-06, "loss": 0.8076, "step": 804 }, { "epoch": 0.02467206080666912, "grad_norm": 1.8356435255127346, "learning_rate": 8.22267620020429e-06, "loss": 0.75, "step": 805 }, { "epoch": 0.02470270932941032, "grad_norm": 1.6726525903907554, "learning_rate": 8.232890704800818e-06, "loss": 0.7778, "step": 806 }, { "epoch": 0.024733357852151527, "grad_norm": 2.347298215080832, "learning_rate": 8.243105209397345e-06, "loss": 0.8031, "step": 807 }, { "epoch": 0.02476400637489273, "grad_norm": 1.8869304698258507, "learning_rate": 8.253319713993871e-06, "loss": 0.8292, "step": 808 }, { "epoch": 0.024794654897633935, "grad_norm": 0.5589105313617265, "learning_rate": 8.263534218590399e-06, "loss": 0.519, "step": 809 }, { "epoch": 0.024825303420375137, "grad_norm": 0.5321449359033937, "learning_rate": 8.273748723186927e-06, "loss": 0.4984, "step": 810 }, { "epoch": 0.024855951943116343, "grad_norm": 1.9621510906628088, "learning_rate": 8.283963227783454e-06, "loss": 0.8513, "step": 811 }, { "epoch": 0.024886600465857545, "grad_norm": 1.7480925224133197, "learning_rate": 8.29417773237998e-06, "loss": 0.7862, "step": 812 }, { "epoch": 0.02491724898859875, "grad_norm": 1.6741733270574892, "learning_rate": 8.304392236976508e-06, "loss": 0.9162, "step": 813 }, { "epoch": 0.024947897511339953, "grad_norm": 1.8660698660748358, "learning_rate": 8.314606741573035e-06, "loss": 0.8153, "step": 814 }, { "epoch": 0.02497854603408116, "grad_norm": 1.8606950169838423, "learning_rate": 8.324821246169561e-06, "loss": 0.8664, "step": 815 }, { "epoch": 0.02500919455682236, "grad_norm": 1.7075099412583898, "learning_rate": 8.335035750766089e-06, "loss": 0.69, "step": 816 }, { "epoch": 0.025039843079563567, "grad_norm": 1.891975952408911, "learning_rate": 8.345250255362617e-06, "loss": 0.7446, "step": 817 }, { "epoch": 0.02507049160230477, "grad_norm": 1.9922898530236017, "learning_rate": 8.355464759959142e-06, "loss": 0.8216, "step": 818 }, { "epoch": 0.02510114012504597, "grad_norm": 0.522334375508359, "learning_rate": 8.36567926455567e-06, "loss": 0.5351, "step": 819 }, { "epoch": 0.025131788647787177, "grad_norm": 0.5691766154221455, "learning_rate": 8.375893769152198e-06, "loss": 0.5007, "step": 820 }, { "epoch": 0.02516243717052838, "grad_norm": 2.1081877554641766, "learning_rate": 8.386108273748724e-06, "loss": 0.7807, "step": 821 }, { "epoch": 0.025193085693269585, "grad_norm": 1.848594104741574, "learning_rate": 8.396322778345251e-06, "loss": 0.8454, "step": 822 }, { "epoch": 0.025223734216010787, "grad_norm": 2.241952904145501, "learning_rate": 8.406537282941777e-06, "loss": 0.8712, "step": 823 }, { "epoch": 0.025254382738751992, "grad_norm": 1.8801413997476624, "learning_rate": 8.416751787538305e-06, "loss": 0.8457, "step": 824 }, { "epoch": 0.025285031261493195, "grad_norm": 2.0615971282350687, "learning_rate": 8.426966292134832e-06, "loss": 0.8859, "step": 825 }, { "epoch": 0.0253156797842344, "grad_norm": 1.8340629920065654, "learning_rate": 8.437180796731358e-06, "loss": 0.8121, "step": 826 }, { "epoch": 0.025346328306975602, "grad_norm": 1.7775966257038573, "learning_rate": 8.447395301327886e-06, "loss": 0.8247, "step": 827 }, { "epoch": 0.025376976829716808, "grad_norm": 1.8184567676581946, "learning_rate": 8.457609805924414e-06, "loss": 0.8179, "step": 828 }, { "epoch": 0.02540762535245801, "grad_norm": 1.963928116814159, "learning_rate": 8.46782431052094e-06, "loss": 0.9004, "step": 829 }, { "epoch": 0.025438273875199216, "grad_norm": 1.9650033027860405, "learning_rate": 8.478038815117467e-06, "loss": 0.9039, "step": 830 }, { "epoch": 0.025468922397940418, "grad_norm": 1.8381320297754618, "learning_rate": 8.488253319713995e-06, "loss": 0.9952, "step": 831 }, { "epoch": 0.025499570920681624, "grad_norm": 1.9751804075204011, "learning_rate": 8.49846782431052e-06, "loss": 0.8869, "step": 832 }, { "epoch": 0.025530219443422826, "grad_norm": 1.9129928579233428, "learning_rate": 8.508682328907048e-06, "loss": 0.8363, "step": 833 }, { "epoch": 0.02556086796616403, "grad_norm": 1.7542721893142854, "learning_rate": 8.518896833503576e-06, "loss": 0.8464, "step": 834 }, { "epoch": 0.025591516488905234, "grad_norm": 2.098236213449367, "learning_rate": 8.529111338100104e-06, "loss": 0.7182, "step": 835 }, { "epoch": 0.02562216501164644, "grad_norm": 1.789111535097205, "learning_rate": 8.53932584269663e-06, "loss": 0.8959, "step": 836 }, { "epoch": 0.025652813534387642, "grad_norm": 1.5019354961011315, "learning_rate": 8.549540347293157e-06, "loss": 0.8171, "step": 837 }, { "epoch": 0.025683462057128847, "grad_norm": 1.960869850860928, "learning_rate": 8.559754851889685e-06, "loss": 0.9041, "step": 838 }, { "epoch": 0.02571411057987005, "grad_norm": 1.786186239226934, "learning_rate": 8.56996935648621e-06, "loss": 0.7981, "step": 839 }, { "epoch": 0.025744759102611255, "grad_norm": 1.7668637339375994, "learning_rate": 8.580183861082738e-06, "loss": 0.8153, "step": 840 }, { "epoch": 0.025775407625352457, "grad_norm": 1.7868007544024915, "learning_rate": 8.590398365679266e-06, "loss": 0.8178, "step": 841 }, { "epoch": 0.025806056148093663, "grad_norm": 2.2637744203671404, "learning_rate": 8.600612870275792e-06, "loss": 0.7785, "step": 842 }, { "epoch": 0.025836704670834865, "grad_norm": 1.9752284877392974, "learning_rate": 8.61082737487232e-06, "loss": 0.7281, "step": 843 }, { "epoch": 0.02586735319357607, "grad_norm": 1.6699882289440513, "learning_rate": 8.621041879468847e-06, "loss": 0.8833, "step": 844 }, { "epoch": 0.025898001716317273, "grad_norm": 1.790932771536462, "learning_rate": 8.631256384065375e-06, "loss": 0.8464, "step": 845 }, { "epoch": 0.02592865023905848, "grad_norm": 1.7986003013133234, "learning_rate": 8.6414708886619e-06, "loss": 0.7546, "step": 846 }, { "epoch": 0.02595929876179968, "grad_norm": 0.5719095371664297, "learning_rate": 8.651685393258428e-06, "loss": 0.4897, "step": 847 }, { "epoch": 0.025989947284540887, "grad_norm": 2.1029544117138124, "learning_rate": 8.661899897854954e-06, "loss": 0.7523, "step": 848 }, { "epoch": 0.02602059580728209, "grad_norm": 1.9954129282573825, "learning_rate": 8.672114402451482e-06, "loss": 0.781, "step": 849 }, { "epoch": 0.026051244330023295, "grad_norm": 1.7878042282685604, "learning_rate": 8.68232890704801e-06, "loss": 0.8986, "step": 850 }, { "epoch": 0.026081892852764497, "grad_norm": 1.7341504200073579, "learning_rate": 8.692543411644536e-06, "loss": 0.7837, "step": 851 }, { "epoch": 0.0261125413755057, "grad_norm": 1.9509933640156683, "learning_rate": 8.702757916241063e-06, "loss": 0.7225, "step": 852 }, { "epoch": 0.026143189898246905, "grad_norm": 2.056773822637653, "learning_rate": 8.712972420837589e-06, "loss": 0.8637, "step": 853 }, { "epoch": 0.026173838420988107, "grad_norm": 1.9561879281749388, "learning_rate": 8.723186925434117e-06, "loss": 0.8325, "step": 854 }, { "epoch": 0.026204486943729313, "grad_norm": 1.8984126247500075, "learning_rate": 8.733401430030644e-06, "loss": 0.8774, "step": 855 }, { "epoch": 0.026235135466470515, "grad_norm": 2.0659313000058863, "learning_rate": 8.74361593462717e-06, "loss": 0.8413, "step": 856 }, { "epoch": 0.02626578398921172, "grad_norm": 1.985190003809885, "learning_rate": 8.753830439223698e-06, "loss": 0.8594, "step": 857 }, { "epoch": 0.026296432511952923, "grad_norm": 1.9461565616789462, "learning_rate": 8.764044943820226e-06, "loss": 0.8968, "step": 858 }, { "epoch": 0.026327081034694128, "grad_norm": 1.8869346101030726, "learning_rate": 8.774259448416752e-06, "loss": 0.772, "step": 859 }, { "epoch": 0.02635772955743533, "grad_norm": 1.7749075958231697, "learning_rate": 8.784473953013279e-06, "loss": 0.7747, "step": 860 }, { "epoch": 0.026388378080176536, "grad_norm": 1.8661458523500474, "learning_rate": 8.794688457609807e-06, "loss": 0.8723, "step": 861 }, { "epoch": 0.02641902660291774, "grad_norm": 1.8625894391313105, "learning_rate": 8.804902962206334e-06, "loss": 0.8246, "step": 862 }, { "epoch": 0.026449675125658944, "grad_norm": 0.5467005237874227, "learning_rate": 8.81511746680286e-06, "loss": 0.4945, "step": 863 }, { "epoch": 0.026480323648400146, "grad_norm": 2.2089247801418463, "learning_rate": 8.825331971399388e-06, "loss": 0.8362, "step": 864 }, { "epoch": 0.026510972171141352, "grad_norm": 1.750638128228365, "learning_rate": 8.835546475995916e-06, "loss": 0.8431, "step": 865 }, { "epoch": 0.026541620693882554, "grad_norm": 1.9065708469930225, "learning_rate": 8.845760980592442e-06, "loss": 0.8705, "step": 866 }, { "epoch": 0.02657226921662376, "grad_norm": 1.7889116507161957, "learning_rate": 8.855975485188969e-06, "loss": 0.8029, "step": 867 }, { "epoch": 0.026602917739364962, "grad_norm": 1.9354312138877297, "learning_rate": 8.866189989785497e-06, "loss": 0.8065, "step": 868 }, { "epoch": 0.026633566262106168, "grad_norm": 1.7874648019805617, "learning_rate": 8.876404494382023e-06, "loss": 0.8226, "step": 869 }, { "epoch": 0.02666421478484737, "grad_norm": 1.976698421743498, "learning_rate": 8.88661899897855e-06, "loss": 0.7705, "step": 870 }, { "epoch": 0.026694863307588575, "grad_norm": 1.722988337598737, "learning_rate": 8.896833503575078e-06, "loss": 0.7651, "step": 871 }, { "epoch": 0.026725511830329778, "grad_norm": 2.03716243226289, "learning_rate": 8.907048008171606e-06, "loss": 0.8516, "step": 872 }, { "epoch": 0.026756160353070983, "grad_norm": 1.8282992086348788, "learning_rate": 8.917262512768132e-06, "loss": 0.8136, "step": 873 }, { "epoch": 0.026786808875812185, "grad_norm": 0.5450682456245363, "learning_rate": 8.927477017364659e-06, "loss": 0.5255, "step": 874 }, { "epoch": 0.02681745739855339, "grad_norm": 1.899590623741128, "learning_rate": 8.937691521961187e-06, "loss": 0.7988, "step": 875 }, { "epoch": 0.026848105921294593, "grad_norm": 1.851613784043913, "learning_rate": 8.947906026557713e-06, "loss": 0.7635, "step": 876 }, { "epoch": 0.0268787544440358, "grad_norm": 2.112152211608425, "learning_rate": 8.95812053115424e-06, "loss": 0.8837, "step": 877 }, { "epoch": 0.026909402966777, "grad_norm": 1.933302297210001, "learning_rate": 8.968335035750766e-06, "loss": 0.7511, "step": 878 }, { "epoch": 0.026940051489518207, "grad_norm": 1.8695829888711524, "learning_rate": 8.978549540347294e-06, "loss": 0.8234, "step": 879 }, { "epoch": 0.02697070001225941, "grad_norm": 1.812652396017215, "learning_rate": 8.988764044943822e-06, "loss": 0.8417, "step": 880 }, { "epoch": 0.027001348535000615, "grad_norm": 1.7393568592522952, "learning_rate": 8.998978549540347e-06, "loss": 0.7753, "step": 881 }, { "epoch": 0.027031997057741817, "grad_norm": 0.5600785807878444, "learning_rate": 9.009193054136875e-06, "loss": 0.5267, "step": 882 }, { "epoch": 0.02706264558048302, "grad_norm": 1.83647972403181, "learning_rate": 9.019407558733401e-06, "loss": 0.7961, "step": 883 }, { "epoch": 0.027093294103224225, "grad_norm": 1.907128975764622, "learning_rate": 9.029622063329929e-06, "loss": 0.8424, "step": 884 }, { "epoch": 0.027123942625965427, "grad_norm": 1.8665505824693343, "learning_rate": 9.039836567926456e-06, "loss": 0.8116, "step": 885 }, { "epoch": 0.027154591148706633, "grad_norm": 1.7293761907492584, "learning_rate": 9.050051072522982e-06, "loss": 0.9125, "step": 886 }, { "epoch": 0.027185239671447835, "grad_norm": 1.7397152115827417, "learning_rate": 9.06026557711951e-06, "loss": 0.7305, "step": 887 }, { "epoch": 0.02721588819418904, "grad_norm": 1.9387338718871987, "learning_rate": 9.070480081716037e-06, "loss": 0.8786, "step": 888 }, { "epoch": 0.027246536716930243, "grad_norm": 1.5831454424481701, "learning_rate": 9.080694586312565e-06, "loss": 0.7724, "step": 889 }, { "epoch": 0.02727718523967145, "grad_norm": 2.8019945286103165, "learning_rate": 9.090909090909091e-06, "loss": 0.8367, "step": 890 }, { "epoch": 0.02730783376241265, "grad_norm": 1.945374660859934, "learning_rate": 9.101123595505619e-06, "loss": 0.7426, "step": 891 }, { "epoch": 0.027338482285153856, "grad_norm": 1.909419744064806, "learning_rate": 9.111338100102146e-06, "loss": 0.8151, "step": 892 }, { "epoch": 0.02736913080789506, "grad_norm": 1.6432048038101261, "learning_rate": 9.121552604698672e-06, "loss": 0.8022, "step": 893 }, { "epoch": 0.027399779330636264, "grad_norm": 1.8999141107146174, "learning_rate": 9.1317671092952e-06, "loss": 0.7904, "step": 894 }, { "epoch": 0.027430427853377466, "grad_norm": 1.8055971202397103, "learning_rate": 9.141981613891727e-06, "loss": 0.6972, "step": 895 }, { "epoch": 0.027461076376118672, "grad_norm": 2.0670003966519803, "learning_rate": 9.152196118488255e-06, "loss": 0.8689, "step": 896 }, { "epoch": 0.027491724898859874, "grad_norm": 1.8086444249293525, "learning_rate": 9.162410623084781e-06, "loss": 0.729, "step": 897 }, { "epoch": 0.02752237342160108, "grad_norm": 1.8500903029388052, "learning_rate": 9.172625127681309e-06, "loss": 0.7822, "step": 898 }, { "epoch": 0.027553021944342282, "grad_norm": 0.5369051616560225, "learning_rate": 9.182839632277836e-06, "loss": 0.5133, "step": 899 }, { "epoch": 0.027583670467083488, "grad_norm": 1.8964315197134156, "learning_rate": 9.193054136874362e-06, "loss": 0.7804, "step": 900 }, { "epoch": 0.02761431898982469, "grad_norm": 1.9107074268105644, "learning_rate": 9.20326864147089e-06, "loss": 0.8465, "step": 901 }, { "epoch": 0.027644967512565895, "grad_norm": 1.6781717242867318, "learning_rate": 9.213483146067417e-06, "loss": 0.8563, "step": 902 }, { "epoch": 0.027675616035307098, "grad_norm": 2.0578841805519756, "learning_rate": 9.223697650663943e-06, "loss": 0.8982, "step": 903 }, { "epoch": 0.027706264558048303, "grad_norm": 1.8037280822912372, "learning_rate": 9.233912155260471e-06, "loss": 0.8816, "step": 904 }, { "epoch": 0.027736913080789505, "grad_norm": 1.701837927854398, "learning_rate": 9.244126659856999e-06, "loss": 0.8616, "step": 905 }, { "epoch": 0.02776756160353071, "grad_norm": 1.785955985487947, "learning_rate": 9.254341164453525e-06, "loss": 0.8222, "step": 906 }, { "epoch": 0.027798210126271913, "grad_norm": 1.9910333235306108, "learning_rate": 9.264555669050052e-06, "loss": 0.7799, "step": 907 }, { "epoch": 0.02782885864901312, "grad_norm": 1.858945574209587, "learning_rate": 9.274770173646578e-06, "loss": 0.8465, "step": 908 }, { "epoch": 0.02785950717175432, "grad_norm": 0.572748537035264, "learning_rate": 9.284984678243106e-06, "loss": 0.5196, "step": 909 }, { "epoch": 0.027890155694495527, "grad_norm": 2.62701106089711, "learning_rate": 9.295199182839633e-06, "loss": 0.7132, "step": 910 }, { "epoch": 0.02792080421723673, "grad_norm": 1.8994615790425518, "learning_rate": 9.30541368743616e-06, "loss": 0.7443, "step": 911 }, { "epoch": 0.027951452739977935, "grad_norm": 1.8439319617118413, "learning_rate": 9.315628192032687e-06, "loss": 0.8957, "step": 912 }, { "epoch": 0.027982101262719137, "grad_norm": 1.847079128595781, "learning_rate": 9.325842696629213e-06, "loss": 0.8427, "step": 913 }, { "epoch": 0.02801274978546034, "grad_norm": 1.7269485339674986, "learning_rate": 9.33605720122574e-06, "loss": 0.8914, "step": 914 }, { "epoch": 0.028043398308201545, "grad_norm": 1.680561508699066, "learning_rate": 9.346271705822268e-06, "loss": 0.8124, "step": 915 }, { "epoch": 0.028074046830942747, "grad_norm": 3.211402764515668, "learning_rate": 9.356486210418796e-06, "loss": 0.8502, "step": 916 }, { "epoch": 0.028104695353683953, "grad_norm": 1.755326970853572, "learning_rate": 9.366700715015322e-06, "loss": 0.7948, "step": 917 }, { "epoch": 0.028135343876425155, "grad_norm": 1.6845033250840804, "learning_rate": 9.37691521961185e-06, "loss": 0.8143, "step": 918 }, { "epoch": 0.02816599239916636, "grad_norm": 1.7806416053256502, "learning_rate": 9.387129724208377e-06, "loss": 0.873, "step": 919 }, { "epoch": 0.028196640921907563, "grad_norm": 1.8629082334412432, "learning_rate": 9.397344228804903e-06, "loss": 0.8204, "step": 920 }, { "epoch": 0.02822728944464877, "grad_norm": 1.894277093682219, "learning_rate": 9.40755873340143e-06, "loss": 0.7991, "step": 921 }, { "epoch": 0.02825793796738997, "grad_norm": 1.732220972822004, "learning_rate": 9.417773237997958e-06, "loss": 0.8041, "step": 922 }, { "epoch": 0.028288586490131176, "grad_norm": 1.8073979797113013, "learning_rate": 9.427987742594486e-06, "loss": 0.7295, "step": 923 }, { "epoch": 0.02831923501287238, "grad_norm": 0.5997847222935884, "learning_rate": 9.438202247191012e-06, "loss": 0.522, "step": 924 }, { "epoch": 0.028349883535613584, "grad_norm": 0.5798740086301426, "learning_rate": 9.44841675178754e-06, "loss": 0.5159, "step": 925 }, { "epoch": 0.028380532058354786, "grad_norm": 1.8123433419013093, "learning_rate": 9.458631256384067e-06, "loss": 0.7175, "step": 926 }, { "epoch": 0.028411180581095992, "grad_norm": 1.8627438113238601, "learning_rate": 9.468845760980593e-06, "loss": 0.8686, "step": 927 }, { "epoch": 0.028441829103837194, "grad_norm": 1.800866397456303, "learning_rate": 9.47906026557712e-06, "loss": 0.901, "step": 928 }, { "epoch": 0.0284724776265784, "grad_norm": 1.6641270326413833, "learning_rate": 9.489274770173648e-06, "loss": 0.7502, "step": 929 }, { "epoch": 0.028503126149319602, "grad_norm": 2.9659024719712774, "learning_rate": 9.499489274770174e-06, "loss": 0.7974, "step": 930 }, { "epoch": 0.028533774672060808, "grad_norm": 1.8872318214045152, "learning_rate": 9.509703779366702e-06, "loss": 0.6806, "step": 931 }, { "epoch": 0.02856442319480201, "grad_norm": 1.798243150024461, "learning_rate": 9.51991828396323e-06, "loss": 0.8547, "step": 932 }, { "epoch": 0.028595071717543216, "grad_norm": 1.6527566633637891, "learning_rate": 9.530132788559755e-06, "loss": 0.7221, "step": 933 }, { "epoch": 0.028625720240284418, "grad_norm": 1.863476034840302, "learning_rate": 9.540347293156283e-06, "loss": 0.8585, "step": 934 }, { "epoch": 0.028656368763025623, "grad_norm": 0.6435300388988551, "learning_rate": 9.55056179775281e-06, "loss": 0.5049, "step": 935 }, { "epoch": 0.028687017285766826, "grad_norm": 1.9872287120704326, "learning_rate": 9.560776302349337e-06, "loss": 0.8341, "step": 936 }, { "epoch": 0.02871766580850803, "grad_norm": 1.855860694915291, "learning_rate": 9.570990806945864e-06, "loss": 0.7949, "step": 937 }, { "epoch": 0.028748314331249233, "grad_norm": 1.9479117262970438, "learning_rate": 9.58120531154239e-06, "loss": 0.9114, "step": 938 }, { "epoch": 0.02877896285399044, "grad_norm": 1.8455781823870088, "learning_rate": 9.591419816138918e-06, "loss": 0.6628, "step": 939 }, { "epoch": 0.02880961137673164, "grad_norm": 1.8644699004621994, "learning_rate": 9.601634320735445e-06, "loss": 0.8314, "step": 940 }, { "epoch": 0.028840259899472847, "grad_norm": 1.655760659176407, "learning_rate": 9.611848825331971e-06, "loss": 0.7767, "step": 941 }, { "epoch": 0.02887090842221405, "grad_norm": 2.108502577635167, "learning_rate": 9.622063329928499e-06, "loss": 0.8501, "step": 942 }, { "epoch": 0.028901556944955255, "grad_norm": 0.5104090938811693, "learning_rate": 9.632277834525027e-06, "loss": 0.5073, "step": 943 }, { "epoch": 0.028932205467696457, "grad_norm": 1.6102416527728212, "learning_rate": 9.642492339121552e-06, "loss": 0.8561, "step": 944 }, { "epoch": 0.02896285399043766, "grad_norm": 1.8897191520066137, "learning_rate": 9.65270684371808e-06, "loss": 0.8444, "step": 945 }, { "epoch": 0.028993502513178865, "grad_norm": 1.7707868968505058, "learning_rate": 9.662921348314608e-06, "loss": 0.8118, "step": 946 }, { "epoch": 0.029024151035920067, "grad_norm": 1.8525988924596284, "learning_rate": 9.673135852911134e-06, "loss": 0.8607, "step": 947 }, { "epoch": 0.029054799558661273, "grad_norm": 1.593887266496177, "learning_rate": 9.683350357507661e-06, "loss": 0.7717, "step": 948 }, { "epoch": 0.029085448081402475, "grad_norm": 1.9392593881495, "learning_rate": 9.693564862104189e-06, "loss": 0.7978, "step": 949 }, { "epoch": 0.02911609660414368, "grad_norm": 1.6396244868861973, "learning_rate": 9.703779366700717e-06, "loss": 0.8181, "step": 950 }, { "epoch": 0.029146745126884883, "grad_norm": 1.7585581134661137, "learning_rate": 9.713993871297242e-06, "loss": 0.7582, "step": 951 }, { "epoch": 0.02917739364962609, "grad_norm": 1.9142160308659433, "learning_rate": 9.72420837589377e-06, "loss": 0.799, "step": 952 }, { "epoch": 0.02920804217236729, "grad_norm": 2.273848968152947, "learning_rate": 9.734422880490298e-06, "loss": 0.8329, "step": 953 }, { "epoch": 0.029238690695108496, "grad_norm": 1.8055208573855306, "learning_rate": 9.744637385086824e-06, "loss": 0.9128, "step": 954 }, { "epoch": 0.0292693392178497, "grad_norm": 1.9972821998577173, "learning_rate": 9.754851889683351e-06, "loss": 0.8853, "step": 955 }, { "epoch": 0.029299987740590904, "grad_norm": 1.955329289399827, "learning_rate": 9.765066394279879e-06, "loss": 0.8455, "step": 956 }, { "epoch": 0.029330636263332106, "grad_norm": 1.9389536248104715, "learning_rate": 9.775280898876405e-06, "loss": 0.803, "step": 957 }, { "epoch": 0.029361284786073312, "grad_norm": 1.7403606790200812, "learning_rate": 9.785495403472932e-06, "loss": 0.8748, "step": 958 }, { "epoch": 0.029391933308814514, "grad_norm": 1.817253887727855, "learning_rate": 9.79570990806946e-06, "loss": 0.7956, "step": 959 }, { "epoch": 0.02942258183155572, "grad_norm": 1.8092469318042508, "learning_rate": 9.805924412665988e-06, "loss": 0.7949, "step": 960 }, { "epoch": 0.029453230354296922, "grad_norm": 1.9917492790169558, "learning_rate": 9.816138917262514e-06, "loss": 0.7987, "step": 961 }, { "epoch": 0.029483878877038128, "grad_norm": 0.6500638240904446, "learning_rate": 9.826353421859041e-06, "loss": 0.528, "step": 962 }, { "epoch": 0.02951452739977933, "grad_norm": 1.927201272310382, "learning_rate": 9.836567926455567e-06, "loss": 0.8202, "step": 963 }, { "epoch": 0.029545175922520536, "grad_norm": 1.707854827826056, "learning_rate": 9.846782431052095e-06, "loss": 0.7463, "step": 964 }, { "epoch": 0.029575824445261738, "grad_norm": 1.5578095476751441, "learning_rate": 9.856996935648622e-06, "loss": 0.8167, "step": 965 }, { "epoch": 0.029606472968002943, "grad_norm": 1.661822606012167, "learning_rate": 9.867211440245148e-06, "loss": 0.8036, "step": 966 }, { "epoch": 0.029637121490744146, "grad_norm": 1.8142764878305222, "learning_rate": 9.877425944841676e-06, "loss": 0.8342, "step": 967 }, { "epoch": 0.02966777001348535, "grad_norm": 1.7347581808004973, "learning_rate": 9.887640449438202e-06, "loss": 0.6947, "step": 968 }, { "epoch": 0.029698418536226553, "grad_norm": 1.728229462779671, "learning_rate": 9.89785495403473e-06, "loss": 0.8577, "step": 969 }, { "epoch": 0.02972906705896776, "grad_norm": 1.6582914351414573, "learning_rate": 9.908069458631257e-06, "loss": 0.7346, "step": 970 }, { "epoch": 0.02975971558170896, "grad_norm": 2.193580269486555, "learning_rate": 9.918283963227783e-06, "loss": 0.8595, "step": 971 }, { "epoch": 0.029790364104450167, "grad_norm": 1.9566225981470475, "learning_rate": 9.92849846782431e-06, "loss": 0.8843, "step": 972 }, { "epoch": 0.02982101262719137, "grad_norm": 1.8062449175125783, "learning_rate": 9.938712972420838e-06, "loss": 0.7005, "step": 973 }, { "epoch": 0.029851661149932575, "grad_norm": 0.564782952456695, "learning_rate": 9.948927477017364e-06, "loss": 0.5211, "step": 974 }, { "epoch": 0.029882309672673777, "grad_norm": 1.7504728818211253, "learning_rate": 9.959141981613892e-06, "loss": 0.8017, "step": 975 }, { "epoch": 0.029912958195414983, "grad_norm": 1.8094110748844865, "learning_rate": 9.96935648621042e-06, "loss": 0.844, "step": 976 }, { "epoch": 0.029943606718156185, "grad_norm": 1.8374805490286714, "learning_rate": 9.979570990806947e-06, "loss": 0.7868, "step": 977 }, { "epoch": 0.029974255240897387, "grad_norm": 1.747174546794957, "learning_rate": 9.989785495403473e-06, "loss": 0.8491, "step": 978 }, { "epoch": 0.030004903763638593, "grad_norm": 1.830622843404985, "learning_rate": 1e-05, "loss": 0.8379, "step": 979 }, { "epoch": 0.030035552286379795, "grad_norm": 1.7075649171153364, "learning_rate": 9.999999975366861e-06, "loss": 0.8968, "step": 980 }, { "epoch": 0.030066200809121, "grad_norm": 1.9044931621062071, "learning_rate": 9.999999901467443e-06, "loss": 0.7561, "step": 981 }, { "epoch": 0.030096849331862203, "grad_norm": 1.716161865507955, "learning_rate": 9.999999778301746e-06, "loss": 0.8834, "step": 982 }, { "epoch": 0.03012749785460341, "grad_norm": 1.7969479581227092, "learning_rate": 9.999999605869772e-06, "loss": 0.933, "step": 983 }, { "epoch": 0.03015814637734461, "grad_norm": 1.9247245643605444, "learning_rate": 9.999999384171522e-06, "loss": 0.799, "step": 984 }, { "epoch": 0.030188794900085816, "grad_norm": 0.5203613693153094, "learning_rate": 9.999999113207e-06, "loss": 0.5288, "step": 985 }, { "epoch": 0.03021944342282702, "grad_norm": 1.736039692682003, "learning_rate": 9.999998792976206e-06, "loss": 0.8258, "step": 986 }, { "epoch": 0.030250091945568224, "grad_norm": 2.0720212540387624, "learning_rate": 9.999998423479145e-06, "loss": 0.7769, "step": 987 }, { "epoch": 0.030280740468309426, "grad_norm": 1.9325921644735642, "learning_rate": 9.99999800471582e-06, "loss": 0.7938, "step": 988 }, { "epoch": 0.030311388991050632, "grad_norm": 0.5602600408086591, "learning_rate": 9.999997536686236e-06, "loss": 0.5195, "step": 989 }, { "epoch": 0.030342037513791834, "grad_norm": 1.7199467658016152, "learning_rate": 9.999997019390398e-06, "loss": 0.8124, "step": 990 }, { "epoch": 0.03037268603653304, "grad_norm": 2.0173649039207526, "learning_rate": 9.999996452828306e-06, "loss": 0.8736, "step": 991 }, { "epoch": 0.030403334559274242, "grad_norm": 0.5234357357613043, "learning_rate": 9.999995836999975e-06, "loss": 0.4892, "step": 992 }, { "epoch": 0.030433983082015448, "grad_norm": 1.7781630890827753, "learning_rate": 9.999995171905401e-06, "loss": 0.7044, "step": 993 }, { "epoch": 0.03046463160475665, "grad_norm": 1.7519683528637422, "learning_rate": 9.999994457544599e-06, "loss": 0.9192, "step": 994 }, { "epoch": 0.030495280127497856, "grad_norm": 0.5434105819011297, "learning_rate": 9.99999369391757e-06, "loss": 0.5244, "step": 995 }, { "epoch": 0.030525928650239058, "grad_norm": 1.7966712411608436, "learning_rate": 9.999992881024326e-06, "loss": 0.7751, "step": 996 }, { "epoch": 0.030556577172980263, "grad_norm": 1.871328572682183, "learning_rate": 9.999992018864871e-06, "loss": 0.8786, "step": 997 }, { "epoch": 0.030587225695721466, "grad_norm": 1.7747162870414896, "learning_rate": 9.999991107439215e-06, "loss": 0.7802, "step": 998 }, { "epoch": 0.03061787421846267, "grad_norm": 2.0896906805150115, "learning_rate": 9.999990146747369e-06, "loss": 0.8055, "step": 999 }, { "epoch": 0.030648522741203874, "grad_norm": 0.5265603573922315, "learning_rate": 9.999989136789339e-06, "loss": 0.5152, "step": 1000 }, { "epoch": 0.03067917126394508, "grad_norm": 1.685621883594803, "learning_rate": 9.999988077565138e-06, "loss": 0.832, "step": 1001 }, { "epoch": 0.03070981978668628, "grad_norm": 1.797557102951149, "learning_rate": 9.999986969074775e-06, "loss": 0.7665, "step": 1002 }, { "epoch": 0.030740468309427487, "grad_norm": 2.231197584372014, "learning_rate": 9.99998581131826e-06, "loss": 0.7471, "step": 1003 }, { "epoch": 0.03077111683216869, "grad_norm": 1.7290168487656783, "learning_rate": 9.999984604295606e-06, "loss": 0.8412, "step": 1004 }, { "epoch": 0.030801765354909895, "grad_norm": 1.8633494427988504, "learning_rate": 9.999983348006825e-06, "loss": 0.7899, "step": 1005 }, { "epoch": 0.030832413877651097, "grad_norm": 1.6963973601918756, "learning_rate": 9.999982042451927e-06, "loss": 0.8724, "step": 1006 }, { "epoch": 0.030863062400392303, "grad_norm": 1.6271720449424723, "learning_rate": 9.999980687630931e-06, "loss": 0.7439, "step": 1007 }, { "epoch": 0.030893710923133505, "grad_norm": 1.8644324009646784, "learning_rate": 9.999979283543842e-06, "loss": 0.8454, "step": 1008 }, { "epoch": 0.030924359445874707, "grad_norm": 1.618675966316438, "learning_rate": 9.99997783019068e-06, "loss": 0.8607, "step": 1009 }, { "epoch": 0.030955007968615913, "grad_norm": 1.6232066584085365, "learning_rate": 9.999976327571454e-06, "loss": 0.7185, "step": 1010 }, { "epoch": 0.030985656491357115, "grad_norm": 1.5902574043608404, "learning_rate": 9.999974775686186e-06, "loss": 0.7028, "step": 1011 }, { "epoch": 0.03101630501409832, "grad_norm": 1.842134630049514, "learning_rate": 9.999973174534885e-06, "loss": 0.774, "step": 1012 }, { "epoch": 0.031046953536839523, "grad_norm": 2.005327478108837, "learning_rate": 9.999971524117569e-06, "loss": 0.8457, "step": 1013 }, { "epoch": 0.03107760205958073, "grad_norm": 1.8758354358093272, "learning_rate": 9.999969824434255e-06, "loss": 0.7948, "step": 1014 }, { "epoch": 0.03110825058232193, "grad_norm": 2.4291819550273877, "learning_rate": 9.999968075484959e-06, "loss": 0.8101, "step": 1015 }, { "epoch": 0.031138899105063136, "grad_norm": 1.5515090666002826, "learning_rate": 9.999966277269697e-06, "loss": 0.722, "step": 1016 }, { "epoch": 0.03116954762780434, "grad_norm": 1.729085729727727, "learning_rate": 9.999964429788487e-06, "loss": 0.8332, "step": 1017 }, { "epoch": 0.031200196150545544, "grad_norm": 1.9714517461673449, "learning_rate": 9.999962533041352e-06, "loss": 0.8959, "step": 1018 }, { "epoch": 0.031230844673286746, "grad_norm": 1.7234644575781357, "learning_rate": 9.999960587028303e-06, "loss": 0.8388, "step": 1019 }, { "epoch": 0.03126149319602795, "grad_norm": 1.9404773929127657, "learning_rate": 9.999958591749366e-06, "loss": 0.8087, "step": 1020 }, { "epoch": 0.03129214171876916, "grad_norm": 2.0394182182707112, "learning_rate": 9.999956547204557e-06, "loss": 0.8709, "step": 1021 }, { "epoch": 0.031322790241510357, "grad_norm": 1.6084444429001008, "learning_rate": 9.999954453393896e-06, "loss": 0.7379, "step": 1022 }, { "epoch": 0.03135343876425156, "grad_norm": 1.8620681760285276, "learning_rate": 9.999952310317404e-06, "loss": 0.8879, "step": 1023 }, { "epoch": 0.03138408728699277, "grad_norm": 2.1200216937475282, "learning_rate": 9.999950117975104e-06, "loss": 0.7986, "step": 1024 }, { "epoch": 0.031414735809733974, "grad_norm": 1.8660791677191568, "learning_rate": 9.999947876367015e-06, "loss": 0.8022, "step": 1025 }, { "epoch": 0.03144538433247517, "grad_norm": 1.8000456244421217, "learning_rate": 9.999945585493163e-06, "loss": 0.7568, "step": 1026 }, { "epoch": 0.03147603285521638, "grad_norm": 1.7595307248170582, "learning_rate": 9.999943245353566e-06, "loss": 0.8118, "step": 1027 }, { "epoch": 0.031506681377957584, "grad_norm": 1.8626522738997378, "learning_rate": 9.999940855948247e-06, "loss": 0.8455, "step": 1028 }, { "epoch": 0.03153732990069879, "grad_norm": 1.5924051057955593, "learning_rate": 9.999938417277234e-06, "loss": 0.839, "step": 1029 }, { "epoch": 0.03156797842343999, "grad_norm": 1.8263586965367875, "learning_rate": 9.999935929340548e-06, "loss": 0.878, "step": 1030 }, { "epoch": 0.031598626946181194, "grad_norm": 2.07930683902866, "learning_rate": 9.999933392138212e-06, "loss": 0.8302, "step": 1031 }, { "epoch": 0.0316292754689224, "grad_norm": 2.0190845701641464, "learning_rate": 9.999930805670256e-06, "loss": 0.876, "step": 1032 }, { "epoch": 0.031659923991663605, "grad_norm": 2.4320760894702675, "learning_rate": 9.9999281699367e-06, "loss": 0.883, "step": 1033 }, { "epoch": 0.031690572514404804, "grad_norm": 1.7690311660978137, "learning_rate": 9.999925484937574e-06, "loss": 0.8457, "step": 1034 }, { "epoch": 0.03172122103714601, "grad_norm": 1.8961455269484782, "learning_rate": 9.999922750672903e-06, "loss": 0.8158, "step": 1035 }, { "epoch": 0.031751869559887215, "grad_norm": 1.8312936650780276, "learning_rate": 9.999919967142713e-06, "loss": 0.7988, "step": 1036 }, { "epoch": 0.03178251808262842, "grad_norm": 0.5162644130728282, "learning_rate": 9.999917134347032e-06, "loss": 0.5078, "step": 1037 }, { "epoch": 0.03181316660536962, "grad_norm": 0.5134976046324925, "learning_rate": 9.999914252285889e-06, "loss": 0.4954, "step": 1038 }, { "epoch": 0.031843815128110825, "grad_norm": 1.7823370170830555, "learning_rate": 9.999911320959312e-06, "loss": 0.818, "step": 1039 }, { "epoch": 0.03187446365085203, "grad_norm": 1.8165402180802295, "learning_rate": 9.999908340367331e-06, "loss": 0.9009, "step": 1040 }, { "epoch": 0.03190511217359323, "grad_norm": 1.8521413730468306, "learning_rate": 9.99990531050997e-06, "loss": 0.7851, "step": 1041 }, { "epoch": 0.031935760696334435, "grad_norm": 1.8199984951148964, "learning_rate": 9.999902231387264e-06, "loss": 0.8086, "step": 1042 }, { "epoch": 0.03196640921907564, "grad_norm": 1.5226961414529756, "learning_rate": 9.999899102999243e-06, "loss": 0.6956, "step": 1043 }, { "epoch": 0.031997057741816846, "grad_norm": 0.5748270879234684, "learning_rate": 9.999895925345938e-06, "loss": 0.5169, "step": 1044 }, { "epoch": 0.032027706264558045, "grad_norm": 1.8437360585837703, "learning_rate": 9.999892698427377e-06, "loss": 0.8325, "step": 1045 }, { "epoch": 0.03205835478729925, "grad_norm": 2.824619337907129, "learning_rate": 9.999889422243594e-06, "loss": 0.8661, "step": 1046 }, { "epoch": 0.032089003310040456, "grad_norm": 1.660439482853869, "learning_rate": 9.999886096794623e-06, "loss": 0.8634, "step": 1047 }, { "epoch": 0.03211965183278166, "grad_norm": 0.5333194366164254, "learning_rate": 9.999882722080491e-06, "loss": 0.5072, "step": 1048 }, { "epoch": 0.03215030035552286, "grad_norm": 1.9427998855013917, "learning_rate": 9.99987929810124e-06, "loss": 0.7814, "step": 1049 }, { "epoch": 0.032180948878264067, "grad_norm": 1.607631094645633, "learning_rate": 9.999875824856897e-06, "loss": 0.8034, "step": 1050 }, { "epoch": 0.03221159740100527, "grad_norm": 1.852716155110088, "learning_rate": 9.999872302347498e-06, "loss": 0.869, "step": 1051 }, { "epoch": 0.03224224592374648, "grad_norm": 1.7988990518369248, "learning_rate": 9.999868730573078e-06, "loss": 0.7964, "step": 1052 }, { "epoch": 0.03227289444648768, "grad_norm": 1.6748596470866175, "learning_rate": 9.999865109533672e-06, "loss": 0.8163, "step": 1053 }, { "epoch": 0.03230354296922888, "grad_norm": 0.5743837681558488, "learning_rate": 9.999861439229317e-06, "loss": 0.5047, "step": 1054 }, { "epoch": 0.03233419149197009, "grad_norm": 0.5699530957033376, "learning_rate": 9.999857719660047e-06, "loss": 0.5075, "step": 1055 }, { "epoch": 0.032364840014711294, "grad_norm": 1.7186051052069293, "learning_rate": 9.999853950825898e-06, "loss": 0.7405, "step": 1056 }, { "epoch": 0.03239548853745249, "grad_norm": 1.7957678772360777, "learning_rate": 9.999850132726912e-06, "loss": 0.8493, "step": 1057 }, { "epoch": 0.0324261370601937, "grad_norm": 1.6515919449572727, "learning_rate": 9.999846265363121e-06, "loss": 0.7354, "step": 1058 }, { "epoch": 0.032456785582934904, "grad_norm": 1.7843591882660432, "learning_rate": 9.999842348734566e-06, "loss": 0.8022, "step": 1059 }, { "epoch": 0.03248743410567611, "grad_norm": 1.755540227612119, "learning_rate": 9.999838382841285e-06, "loss": 0.799, "step": 1060 }, { "epoch": 0.03251808262841731, "grad_norm": 1.6345306920971145, "learning_rate": 9.99983436768332e-06, "loss": 0.7472, "step": 1061 }, { "epoch": 0.032548731151158514, "grad_norm": 0.5177666047901651, "learning_rate": 9.999830303260703e-06, "loss": 0.4958, "step": 1062 }, { "epoch": 0.03257937967389972, "grad_norm": 1.684606049533237, "learning_rate": 9.999826189573482e-06, "loss": 0.8327, "step": 1063 }, { "epoch": 0.032610028196640925, "grad_norm": 1.8899667474131157, "learning_rate": 9.999822026621692e-06, "loss": 0.7876, "step": 1064 }, { "epoch": 0.032640676719382124, "grad_norm": 1.655524633069341, "learning_rate": 9.999817814405379e-06, "loss": 0.7385, "step": 1065 }, { "epoch": 0.03267132524212333, "grad_norm": 1.8256131744024673, "learning_rate": 9.99981355292458e-06, "loss": 0.93, "step": 1066 }, { "epoch": 0.032701973764864535, "grad_norm": 1.6823389959526218, "learning_rate": 9.999809242179339e-06, "loss": 0.7291, "step": 1067 }, { "epoch": 0.03273262228760574, "grad_norm": 2.0205024096601116, "learning_rate": 9.9998048821697e-06, "loss": 0.885, "step": 1068 }, { "epoch": 0.03276327081034694, "grad_norm": 1.6955188816425213, "learning_rate": 9.999800472895701e-06, "loss": 0.8641, "step": 1069 }, { "epoch": 0.032793919333088145, "grad_norm": 1.747791520512722, "learning_rate": 9.99979601435739e-06, "loss": 0.7741, "step": 1070 }, { "epoch": 0.03282456785582935, "grad_norm": 1.594665186203857, "learning_rate": 9.99979150655481e-06, "loss": 0.7361, "step": 1071 }, { "epoch": 0.03285521637857055, "grad_norm": 1.6760004958546946, "learning_rate": 9.999786949488007e-06, "loss": 0.7634, "step": 1072 }, { "epoch": 0.032885864901311755, "grad_norm": 1.8531036482817973, "learning_rate": 9.99978234315702e-06, "loss": 0.86, "step": 1073 }, { "epoch": 0.03291651342405296, "grad_norm": 1.830093312362039, "learning_rate": 9.999777687561901e-06, "loss": 0.7858, "step": 1074 }, { "epoch": 0.032947161946794166, "grad_norm": 1.6419137513922273, "learning_rate": 9.999772982702694e-06, "loss": 0.8746, "step": 1075 }, { "epoch": 0.032977810469535365, "grad_norm": 1.9687543477399327, "learning_rate": 9.999768228579442e-06, "loss": 0.7526, "step": 1076 }, { "epoch": 0.03300845899227657, "grad_norm": 0.5406102256167228, "learning_rate": 9.999763425192195e-06, "loss": 0.5088, "step": 1077 }, { "epoch": 0.03303910751501778, "grad_norm": 1.71684153044754, "learning_rate": 9.999758572541001e-06, "loss": 0.7939, "step": 1078 }, { "epoch": 0.03306975603775898, "grad_norm": 2.5656535903336777, "learning_rate": 9.999753670625905e-06, "loss": 0.6674, "step": 1079 }, { "epoch": 0.03310040456050018, "grad_norm": 3.272086253006469, "learning_rate": 9.999748719446958e-06, "loss": 0.8858, "step": 1080 }, { "epoch": 0.03313105308324139, "grad_norm": 1.8525046783926842, "learning_rate": 9.999743719004206e-06, "loss": 0.7935, "step": 1081 }, { "epoch": 0.03316170160598259, "grad_norm": 1.7818975032217068, "learning_rate": 9.999738669297702e-06, "loss": 0.7768, "step": 1082 }, { "epoch": 0.0331923501287238, "grad_norm": 0.5052828581466188, "learning_rate": 9.999733570327492e-06, "loss": 0.5179, "step": 1083 }, { "epoch": 0.033222998651465, "grad_norm": 1.73816577466685, "learning_rate": 9.99972842209363e-06, "loss": 0.7487, "step": 1084 }, { "epoch": 0.0332536471742062, "grad_norm": 1.8912208635441152, "learning_rate": 9.999723224596163e-06, "loss": 0.789, "step": 1085 }, { "epoch": 0.03328429569694741, "grad_norm": 1.746533320839887, "learning_rate": 9.999717977835144e-06, "loss": 0.768, "step": 1086 }, { "epoch": 0.033314944219688614, "grad_norm": 1.7698448565586027, "learning_rate": 9.999712681810626e-06, "loss": 0.9424, "step": 1087 }, { "epoch": 0.03334559274242981, "grad_norm": 1.841138636668302, "learning_rate": 9.999707336522657e-06, "loss": 0.5854, "step": 1088 }, { "epoch": 0.03337624126517102, "grad_norm": 1.757100154260897, "learning_rate": 9.999701941971293e-06, "loss": 0.8871, "step": 1089 }, { "epoch": 0.033406889787912224, "grad_norm": 1.818957323611816, "learning_rate": 9.999696498156588e-06, "loss": 0.7396, "step": 1090 }, { "epoch": 0.03343753831065343, "grad_norm": 1.410325652383171, "learning_rate": 9.999691005078593e-06, "loss": 0.7652, "step": 1091 }, { "epoch": 0.03346818683339463, "grad_norm": 0.5323846303261861, "learning_rate": 9.999685462737364e-06, "loss": 0.4885, "step": 1092 }, { "epoch": 0.033498835356135834, "grad_norm": 1.620655744178799, "learning_rate": 9.999679871132955e-06, "loss": 0.806, "step": 1093 }, { "epoch": 0.03352948387887704, "grad_norm": 1.6446285888453018, "learning_rate": 9.999674230265421e-06, "loss": 0.7494, "step": 1094 }, { "epoch": 0.033560132401618245, "grad_norm": 1.6187725889942643, "learning_rate": 9.999668540134817e-06, "loss": 0.8356, "step": 1095 }, { "epoch": 0.033590780924359444, "grad_norm": 1.765860600050628, "learning_rate": 9.9996628007412e-06, "loss": 0.9103, "step": 1096 }, { "epoch": 0.03362142944710065, "grad_norm": 1.7937817509000307, "learning_rate": 9.999657012084627e-06, "loss": 0.7989, "step": 1097 }, { "epoch": 0.033652077969841855, "grad_norm": 1.6196125515497004, "learning_rate": 9.999651174165152e-06, "loss": 0.7287, "step": 1098 }, { "epoch": 0.03368272649258306, "grad_norm": 1.6429218577836973, "learning_rate": 9.999645286982838e-06, "loss": 0.8053, "step": 1099 }, { "epoch": 0.03371337501532426, "grad_norm": 1.8400904636856459, "learning_rate": 9.999639350537736e-06, "loss": 0.8856, "step": 1100 }, { "epoch": 0.033744023538065465, "grad_norm": 1.5724249733273064, "learning_rate": 9.99963336482991e-06, "loss": 0.7605, "step": 1101 }, { "epoch": 0.03377467206080667, "grad_norm": 0.5151130931708697, "learning_rate": 9.999627329859418e-06, "loss": 0.5058, "step": 1102 }, { "epoch": 0.03380532058354787, "grad_norm": 1.9224887963462964, "learning_rate": 9.999621245626318e-06, "loss": 0.83, "step": 1103 }, { "epoch": 0.033835969106289075, "grad_norm": 1.6750504971099198, "learning_rate": 9.999615112130671e-06, "loss": 0.843, "step": 1104 }, { "epoch": 0.03386661762903028, "grad_norm": 1.9769006520125645, "learning_rate": 9.999608929372537e-06, "loss": 0.7745, "step": 1105 }, { "epoch": 0.03389726615177149, "grad_norm": 1.651472762469559, "learning_rate": 9.999602697351978e-06, "loss": 0.827, "step": 1106 }, { "epoch": 0.033927914674512685, "grad_norm": 1.9114920120231245, "learning_rate": 9.999596416069052e-06, "loss": 0.7383, "step": 1107 }, { "epoch": 0.03395856319725389, "grad_norm": 1.8811329615426189, "learning_rate": 9.999590085523825e-06, "loss": 0.773, "step": 1108 }, { "epoch": 0.0339892117199951, "grad_norm": 1.6379477553234512, "learning_rate": 9.999583705716357e-06, "loss": 0.7383, "step": 1109 }, { "epoch": 0.0340198602427363, "grad_norm": 1.5960518713117284, "learning_rate": 9.999577276646712e-06, "loss": 0.7765, "step": 1110 }, { "epoch": 0.0340505087654775, "grad_norm": 1.5053869485879603, "learning_rate": 9.999570798314952e-06, "loss": 0.86, "step": 1111 }, { "epoch": 0.03408115728821871, "grad_norm": 1.7537169519226097, "learning_rate": 9.999564270721144e-06, "loss": 0.7437, "step": 1112 }, { "epoch": 0.03411180581095991, "grad_norm": 1.8563950735344499, "learning_rate": 9.999557693865349e-06, "loss": 0.7765, "step": 1113 }, { "epoch": 0.03414245433370112, "grad_norm": 1.60572992836695, "learning_rate": 9.999551067747632e-06, "loss": 0.7654, "step": 1114 }, { "epoch": 0.03417310285644232, "grad_norm": 1.6794497244866036, "learning_rate": 9.999544392368059e-06, "loss": 0.7389, "step": 1115 }, { "epoch": 0.03420375137918352, "grad_norm": 1.6357672453122647, "learning_rate": 9.999537667726697e-06, "loss": 0.8895, "step": 1116 }, { "epoch": 0.03423439990192473, "grad_norm": 1.843317779462099, "learning_rate": 9.99953089382361e-06, "loss": 0.8506, "step": 1117 }, { "epoch": 0.034265048424665934, "grad_norm": 0.5289539334868499, "learning_rate": 9.999524070658865e-06, "loss": 0.4868, "step": 1118 }, { "epoch": 0.03429569694740713, "grad_norm": 1.8603593244229848, "learning_rate": 9.999517198232533e-06, "loss": 0.8688, "step": 1119 }, { "epoch": 0.03432634547014834, "grad_norm": 1.6768008979020117, "learning_rate": 9.999510276544677e-06, "loss": 0.7333, "step": 1120 }, { "epoch": 0.034356993992889544, "grad_norm": 1.8241463551749322, "learning_rate": 9.999503305595369e-06, "loss": 0.7455, "step": 1121 }, { "epoch": 0.03438764251563075, "grad_norm": 1.7213923712550925, "learning_rate": 9.999496285384674e-06, "loss": 0.8513, "step": 1122 }, { "epoch": 0.03441829103837195, "grad_norm": 1.8468479373456193, "learning_rate": 9.999489215912664e-06, "loss": 0.7735, "step": 1123 }, { "epoch": 0.034448939561113154, "grad_norm": 1.933355117641368, "learning_rate": 9.999482097179406e-06, "loss": 0.8196, "step": 1124 }, { "epoch": 0.03447958808385436, "grad_norm": 1.684684434038791, "learning_rate": 9.999474929184972e-06, "loss": 0.7799, "step": 1125 }, { "epoch": 0.034510236606595565, "grad_norm": 1.7110349246879675, "learning_rate": 9.999467711929433e-06, "loss": 0.6959, "step": 1126 }, { "epoch": 0.034540885129336764, "grad_norm": 1.7982532177200405, "learning_rate": 9.99946044541286e-06, "loss": 0.8425, "step": 1127 }, { "epoch": 0.03457153365207797, "grad_norm": 1.616288257724621, "learning_rate": 9.999453129635324e-06, "loss": 0.7264, "step": 1128 }, { "epoch": 0.034602182174819175, "grad_norm": 1.8102076491624945, "learning_rate": 9.999445764596896e-06, "loss": 0.7203, "step": 1129 }, { "epoch": 0.03463283069756038, "grad_norm": 1.6288188227033829, "learning_rate": 9.99943835029765e-06, "loss": 0.7334, "step": 1130 }, { "epoch": 0.03466347922030158, "grad_norm": 1.6960100241422467, "learning_rate": 9.99943088673766e-06, "loss": 0.8266, "step": 1131 }, { "epoch": 0.034694127743042785, "grad_norm": 0.5088121491155283, "learning_rate": 9.999423373916997e-06, "loss": 0.5092, "step": 1132 }, { "epoch": 0.03472477626578399, "grad_norm": 1.9510743591655035, "learning_rate": 9.999415811835737e-06, "loss": 0.9085, "step": 1133 }, { "epoch": 0.03475542478852519, "grad_norm": 1.8356690849435737, "learning_rate": 9.999408200493954e-06, "loss": 0.8237, "step": 1134 }, { "epoch": 0.034786073311266395, "grad_norm": 1.6756251534307607, "learning_rate": 9.999400539891722e-06, "loss": 0.8251, "step": 1135 }, { "epoch": 0.0348167218340076, "grad_norm": 1.6326010312376227, "learning_rate": 9.999392830029118e-06, "loss": 0.8081, "step": 1136 }, { "epoch": 0.03484737035674881, "grad_norm": 1.5271569206752962, "learning_rate": 9.999385070906217e-06, "loss": 0.7965, "step": 1137 }, { "epoch": 0.034878018879490005, "grad_norm": 0.5188871591946739, "learning_rate": 9.999377262523095e-06, "loss": 0.5011, "step": 1138 }, { "epoch": 0.03490866740223121, "grad_norm": 1.6725081240101727, "learning_rate": 9.99936940487983e-06, "loss": 0.8434, "step": 1139 }, { "epoch": 0.03493931592497242, "grad_norm": 1.697173287761969, "learning_rate": 9.9993614979765e-06, "loss": 0.8039, "step": 1140 }, { "epoch": 0.03496996444771362, "grad_norm": 1.7909275081255809, "learning_rate": 9.999353541813182e-06, "loss": 0.8522, "step": 1141 }, { "epoch": 0.03500061297045482, "grad_norm": 1.6492358262336602, "learning_rate": 9.999345536389955e-06, "loss": 0.775, "step": 1142 }, { "epoch": 0.03503126149319603, "grad_norm": 1.756136982569441, "learning_rate": 9.999337481706897e-06, "loss": 0.755, "step": 1143 }, { "epoch": 0.03506191001593723, "grad_norm": 2.1821756631694047, "learning_rate": 9.999329377764086e-06, "loss": 0.7698, "step": 1144 }, { "epoch": 0.03509255853867844, "grad_norm": 1.7057705963963425, "learning_rate": 9.999321224561604e-06, "loss": 0.8252, "step": 1145 }, { "epoch": 0.03512320706141964, "grad_norm": 1.7975837242535742, "learning_rate": 9.999313022099533e-06, "loss": 0.8676, "step": 1146 }, { "epoch": 0.03515385558416084, "grad_norm": 1.6113390461163097, "learning_rate": 9.999304770377948e-06, "loss": 0.7547, "step": 1147 }, { "epoch": 0.03518450410690205, "grad_norm": 1.8522272972889189, "learning_rate": 9.999296469396938e-06, "loss": 0.8316, "step": 1148 }, { "epoch": 0.035215152629643254, "grad_norm": 1.4696959427106284, "learning_rate": 9.999288119156578e-06, "loss": 0.6652, "step": 1149 }, { "epoch": 0.03524580115238445, "grad_norm": 1.8777809422929268, "learning_rate": 9.999279719656953e-06, "loss": 0.8636, "step": 1150 }, { "epoch": 0.03527644967512566, "grad_norm": 0.5836287265412496, "learning_rate": 9.999271270898148e-06, "loss": 0.5018, "step": 1151 }, { "epoch": 0.035307098197866864, "grad_norm": 1.5487782618153685, "learning_rate": 9.999262772880241e-06, "loss": 0.7576, "step": 1152 }, { "epoch": 0.03533774672060807, "grad_norm": 1.6815248810258574, "learning_rate": 9.99925422560332e-06, "loss": 0.7389, "step": 1153 }, { "epoch": 0.03536839524334927, "grad_norm": 1.6629924107211675, "learning_rate": 9.999245629067469e-06, "loss": 0.7906, "step": 1154 }, { "epoch": 0.035399043766090474, "grad_norm": 3.310733764073704, "learning_rate": 9.999236983272772e-06, "loss": 0.7574, "step": 1155 }, { "epoch": 0.03542969228883168, "grad_norm": 1.6526529608922775, "learning_rate": 9.999228288219314e-06, "loss": 0.7975, "step": 1156 }, { "epoch": 0.035460340811572885, "grad_norm": 0.5376593588574893, "learning_rate": 9.99921954390718e-06, "loss": 0.5011, "step": 1157 }, { "epoch": 0.035490989334314084, "grad_norm": 1.6633554922027751, "learning_rate": 9.999210750336455e-06, "loss": 0.8609, "step": 1158 }, { "epoch": 0.03552163785705529, "grad_norm": 1.640402592893508, "learning_rate": 9.999201907507227e-06, "loss": 0.7939, "step": 1159 }, { "epoch": 0.035552286379796495, "grad_norm": 1.727270716882865, "learning_rate": 9.999193015419586e-06, "loss": 0.7311, "step": 1160 }, { "epoch": 0.0355829349025377, "grad_norm": 1.7152205383975987, "learning_rate": 9.999184074073618e-06, "loss": 0.7755, "step": 1161 }, { "epoch": 0.0356135834252789, "grad_norm": 1.8410184165993435, "learning_rate": 9.999175083469407e-06, "loss": 0.7544, "step": 1162 }, { "epoch": 0.035644231948020105, "grad_norm": 2.238409535319607, "learning_rate": 9.999166043607048e-06, "loss": 0.7701, "step": 1163 }, { "epoch": 0.03567488047076131, "grad_norm": 1.6187712425511729, "learning_rate": 9.999156954486624e-06, "loss": 0.7453, "step": 1164 }, { "epoch": 0.03570552899350252, "grad_norm": 1.5951520740853058, "learning_rate": 9.999147816108229e-06, "loss": 0.7508, "step": 1165 }, { "epoch": 0.035736177516243715, "grad_norm": 1.7626902598392094, "learning_rate": 9.999138628471951e-06, "loss": 0.7679, "step": 1166 }, { "epoch": 0.03576682603898492, "grad_norm": 0.547309588731121, "learning_rate": 9.999129391577882e-06, "loss": 0.5153, "step": 1167 }, { "epoch": 0.03579747456172613, "grad_norm": 1.785635902091402, "learning_rate": 9.99912010542611e-06, "loss": 0.8155, "step": 1168 }, { "epoch": 0.035828123084467325, "grad_norm": 1.6174494959179129, "learning_rate": 9.99911077001673e-06, "loss": 0.8667, "step": 1169 }, { "epoch": 0.03585877160720853, "grad_norm": 1.8487785876458556, "learning_rate": 9.999101385349831e-06, "loss": 0.7826, "step": 1170 }, { "epoch": 0.03588942012994974, "grad_norm": 1.8057417817239427, "learning_rate": 9.99909195142551e-06, "loss": 0.7438, "step": 1171 }, { "epoch": 0.03592006865269094, "grad_norm": 1.626627482723868, "learning_rate": 9.999082468243855e-06, "loss": 0.8037, "step": 1172 }, { "epoch": 0.03595071717543214, "grad_norm": 1.5856334722876904, "learning_rate": 9.99907293580496e-06, "loss": 0.7518, "step": 1173 }, { "epoch": 0.03598136569817335, "grad_norm": 1.9245201616468839, "learning_rate": 9.999063354108923e-06, "loss": 0.7901, "step": 1174 }, { "epoch": 0.03601201422091455, "grad_norm": 1.730484989223436, "learning_rate": 9.999053723155834e-06, "loss": 0.8478, "step": 1175 }, { "epoch": 0.03604266274365576, "grad_norm": 1.669962256179676, "learning_rate": 9.99904404294579e-06, "loss": 0.6833, "step": 1176 }, { "epoch": 0.03607331126639696, "grad_norm": 1.930805275930345, "learning_rate": 9.999034313478888e-06, "loss": 0.7312, "step": 1177 }, { "epoch": 0.03610395978913816, "grad_norm": 1.9107375135778626, "learning_rate": 9.99902453475522e-06, "loss": 0.787, "step": 1178 }, { "epoch": 0.03613460831187937, "grad_norm": 1.6265729751645022, "learning_rate": 9.999014706774883e-06, "loss": 0.7579, "step": 1179 }, { "epoch": 0.036165256834620574, "grad_norm": 1.5820860870655038, "learning_rate": 9.999004829537976e-06, "loss": 0.8122, "step": 1180 }, { "epoch": 0.03619590535736177, "grad_norm": 2.024717960574628, "learning_rate": 9.998994903044596e-06, "loss": 0.8135, "step": 1181 }, { "epoch": 0.03622655388010298, "grad_norm": 1.659729987533607, "learning_rate": 9.99898492729484e-06, "loss": 0.7505, "step": 1182 }, { "epoch": 0.036257202402844184, "grad_norm": 1.6864779760365385, "learning_rate": 9.998974902288805e-06, "loss": 0.7196, "step": 1183 }, { "epoch": 0.03628785092558539, "grad_norm": 1.713662556019679, "learning_rate": 9.998964828026594e-06, "loss": 0.7953, "step": 1184 }, { "epoch": 0.03631849944832659, "grad_norm": 1.736044324149282, "learning_rate": 9.998954704508301e-06, "loss": 0.8024, "step": 1185 }, { "epoch": 0.036349147971067794, "grad_norm": 1.5620520134976337, "learning_rate": 9.99894453173403e-06, "loss": 0.7781, "step": 1186 }, { "epoch": 0.036379796493809, "grad_norm": 1.5524782142921214, "learning_rate": 9.998934309703878e-06, "loss": 0.7554, "step": 1187 }, { "epoch": 0.036410445016550205, "grad_norm": 0.6104674279909205, "learning_rate": 9.998924038417947e-06, "loss": 0.4946, "step": 1188 }, { "epoch": 0.036441093539291404, "grad_norm": 1.6901403920064855, "learning_rate": 9.99891371787634e-06, "loss": 0.7683, "step": 1189 }, { "epoch": 0.03647174206203261, "grad_norm": 1.7528643674718256, "learning_rate": 9.998903348079157e-06, "loss": 0.8256, "step": 1190 }, { "epoch": 0.036502390584773815, "grad_norm": 1.8808248157149756, "learning_rate": 9.998892929026499e-06, "loss": 0.7738, "step": 1191 }, { "epoch": 0.03653303910751502, "grad_norm": 1.784426708141117, "learning_rate": 9.998882460718472e-06, "loss": 0.8748, "step": 1192 }, { "epoch": 0.03656368763025622, "grad_norm": 1.5607507670951204, "learning_rate": 9.998871943155175e-06, "loss": 0.8433, "step": 1193 }, { "epoch": 0.036594336152997425, "grad_norm": 1.8202786986896748, "learning_rate": 9.998861376336715e-06, "loss": 0.7971, "step": 1194 }, { "epoch": 0.03662498467573863, "grad_norm": 2.2807477253093476, "learning_rate": 9.998850760263194e-06, "loss": 0.8241, "step": 1195 }, { "epoch": 0.03665563319847984, "grad_norm": 1.7426954338702676, "learning_rate": 9.998840094934717e-06, "loss": 0.8199, "step": 1196 }, { "epoch": 0.036686281721221035, "grad_norm": 0.5242582104330498, "learning_rate": 9.998829380351391e-06, "loss": 0.5006, "step": 1197 }, { "epoch": 0.03671693024396224, "grad_norm": 1.7238749053521105, "learning_rate": 9.99881861651332e-06, "loss": 0.7797, "step": 1198 }, { "epoch": 0.03674757876670345, "grad_norm": 1.9096366139837857, "learning_rate": 9.998807803420609e-06, "loss": 0.796, "step": 1199 }, { "epoch": 0.036778227289444645, "grad_norm": 1.6227787326820915, "learning_rate": 9.998796941073365e-06, "loss": 0.8099, "step": 1200 }, { "epoch": 0.03680887581218585, "grad_norm": 1.7784524622101054, "learning_rate": 9.998786029471698e-06, "loss": 0.8361, "step": 1201 }, { "epoch": 0.03683952433492706, "grad_norm": 1.7767739919861267, "learning_rate": 9.998775068615713e-06, "loss": 0.7884, "step": 1202 }, { "epoch": 0.03687017285766826, "grad_norm": 1.7807771072352816, "learning_rate": 9.998764058505517e-06, "loss": 0.7279, "step": 1203 }, { "epoch": 0.03690082138040946, "grad_norm": 1.7659747605166634, "learning_rate": 9.99875299914122e-06, "loss": 0.7081, "step": 1204 }, { "epoch": 0.03693146990315067, "grad_norm": 1.5942484070219858, "learning_rate": 9.99874189052293e-06, "loss": 0.7872, "step": 1205 }, { "epoch": 0.03696211842589187, "grad_norm": 1.7355968702562121, "learning_rate": 9.99873073265076e-06, "loss": 0.8781, "step": 1206 }, { "epoch": 0.03699276694863308, "grad_norm": 1.7695887692185686, "learning_rate": 9.998719525524814e-06, "loss": 0.722, "step": 1207 }, { "epoch": 0.03702341547137428, "grad_norm": 1.8534321371964955, "learning_rate": 9.998708269145207e-06, "loss": 0.6864, "step": 1208 }, { "epoch": 0.03705406399411548, "grad_norm": 0.5487364962803577, "learning_rate": 9.99869696351205e-06, "loss": 0.533, "step": 1209 }, { "epoch": 0.03708471251685669, "grad_norm": 1.6329289070635078, "learning_rate": 9.99868560862545e-06, "loss": 0.7749, "step": 1210 }, { "epoch": 0.037115361039597894, "grad_norm": 0.558680909893005, "learning_rate": 9.998674204485524e-06, "loss": 0.5323, "step": 1211 }, { "epoch": 0.03714600956233909, "grad_norm": 1.760318350009961, "learning_rate": 9.99866275109238e-06, "loss": 0.7299, "step": 1212 }, { "epoch": 0.0371766580850803, "grad_norm": 1.8914935954255534, "learning_rate": 9.998651248446135e-06, "loss": 0.9074, "step": 1213 }, { "epoch": 0.037207306607821504, "grad_norm": 1.8976150424628448, "learning_rate": 9.9986396965469e-06, "loss": 0.7876, "step": 1214 }, { "epoch": 0.03723795513056271, "grad_norm": 1.8775194342254782, "learning_rate": 9.998628095394786e-06, "loss": 0.7714, "step": 1215 }, { "epoch": 0.03726860365330391, "grad_norm": 1.6750893720709967, "learning_rate": 9.998616444989912e-06, "loss": 0.7428, "step": 1216 }, { "epoch": 0.037299252176045114, "grad_norm": 1.674021329122576, "learning_rate": 9.998604745332394e-06, "loss": 0.7971, "step": 1217 }, { "epoch": 0.03732990069878632, "grad_norm": 1.8251491014221277, "learning_rate": 9.998592996422342e-06, "loss": 0.7582, "step": 1218 }, { "epoch": 0.037360549221527525, "grad_norm": 1.5815927936323524, "learning_rate": 9.998581198259875e-06, "loss": 0.7361, "step": 1219 }, { "epoch": 0.037391197744268724, "grad_norm": 1.916605869502781, "learning_rate": 9.998569350845107e-06, "loss": 0.8642, "step": 1220 }, { "epoch": 0.03742184626700993, "grad_norm": 1.69041357573398, "learning_rate": 9.998557454178158e-06, "loss": 0.7968, "step": 1221 }, { "epoch": 0.037452494789751135, "grad_norm": 0.6403570273408729, "learning_rate": 9.998545508259143e-06, "loss": 0.5074, "step": 1222 }, { "epoch": 0.03748314331249234, "grad_norm": 1.7869141218941071, "learning_rate": 9.99853351308818e-06, "loss": 0.8228, "step": 1223 }, { "epoch": 0.03751379183523354, "grad_norm": 1.7031457826191256, "learning_rate": 9.998521468665388e-06, "loss": 0.7559, "step": 1224 }, { "epoch": 0.037544440357974745, "grad_norm": 1.6165643516566068, "learning_rate": 9.998509374990885e-06, "loss": 0.7653, "step": 1225 }, { "epoch": 0.03757508888071595, "grad_norm": 1.72953835012869, "learning_rate": 9.998497232064789e-06, "loss": 0.8475, "step": 1226 }, { "epoch": 0.03760573740345716, "grad_norm": 1.5333665205120008, "learning_rate": 9.998485039887222e-06, "loss": 0.8061, "step": 1227 }, { "epoch": 0.037636385926198355, "grad_norm": 1.8371189139156174, "learning_rate": 9.998472798458302e-06, "loss": 0.8344, "step": 1228 }, { "epoch": 0.03766703444893956, "grad_norm": 1.9334368280273213, "learning_rate": 9.998460507778152e-06, "loss": 0.8836, "step": 1229 }, { "epoch": 0.03769768297168077, "grad_norm": 1.494770092650075, "learning_rate": 9.99844816784689e-06, "loss": 0.7721, "step": 1230 }, { "epoch": 0.037728331494421966, "grad_norm": 0.5397298337466175, "learning_rate": 9.99843577866464e-06, "loss": 0.5006, "step": 1231 }, { "epoch": 0.03775898001716317, "grad_norm": 1.815277127495547, "learning_rate": 9.998423340231524e-06, "loss": 0.8228, "step": 1232 }, { "epoch": 0.03778962853990438, "grad_norm": 1.6938253444337605, "learning_rate": 9.998410852547663e-06, "loss": 0.7791, "step": 1233 }, { "epoch": 0.03782027706264558, "grad_norm": 1.5964747283815286, "learning_rate": 9.998398315613182e-06, "loss": 0.7574, "step": 1234 }, { "epoch": 0.03785092558538678, "grad_norm": 1.7310708881302708, "learning_rate": 9.998385729428202e-06, "loss": 0.83, "step": 1235 }, { "epoch": 0.03788157410812799, "grad_norm": 1.5519059317132615, "learning_rate": 9.998373093992851e-06, "loss": 0.8229, "step": 1236 }, { "epoch": 0.03791222263086919, "grad_norm": 1.9821678707674124, "learning_rate": 9.998360409307248e-06, "loss": 0.8326, "step": 1237 }, { "epoch": 0.0379428711536104, "grad_norm": 1.8282235751849092, "learning_rate": 9.998347675371523e-06, "loss": 0.7931, "step": 1238 }, { "epoch": 0.0379735196763516, "grad_norm": 1.6890929760253868, "learning_rate": 9.998334892185799e-06, "loss": 0.7524, "step": 1239 }, { "epoch": 0.0380041681990928, "grad_norm": 0.5387374431240131, "learning_rate": 9.998322059750203e-06, "loss": 0.4745, "step": 1240 }, { "epoch": 0.03803481672183401, "grad_norm": 1.8797677556410664, "learning_rate": 9.99830917806486e-06, "loss": 0.8161, "step": 1241 }, { "epoch": 0.038065465244575214, "grad_norm": 1.5938736850875737, "learning_rate": 9.998296247129897e-06, "loss": 0.7889, "step": 1242 }, { "epoch": 0.03809611376731641, "grad_norm": 1.7385181503444422, "learning_rate": 9.998283266945444e-06, "loss": 0.8377, "step": 1243 }, { "epoch": 0.03812676229005762, "grad_norm": 1.7334947227336748, "learning_rate": 9.998270237511627e-06, "loss": 0.7654, "step": 1244 }, { "epoch": 0.038157410812798824, "grad_norm": 1.9372788428538132, "learning_rate": 9.998257158828572e-06, "loss": 0.7214, "step": 1245 }, { "epoch": 0.03818805933554003, "grad_norm": 1.6396950146509555, "learning_rate": 9.998244030896413e-06, "loss": 0.7624, "step": 1246 }, { "epoch": 0.03821870785828123, "grad_norm": 0.5483108326466671, "learning_rate": 9.998230853715276e-06, "loss": 0.5393, "step": 1247 }, { "epoch": 0.038249356381022434, "grad_norm": 0.5286294590516066, "learning_rate": 9.99821762728529e-06, "loss": 0.4935, "step": 1248 }, { "epoch": 0.03828000490376364, "grad_norm": 1.697669914762773, "learning_rate": 9.998204351606591e-06, "loss": 0.8181, "step": 1249 }, { "epoch": 0.038310653426504845, "grad_norm": 1.520131459577211, "learning_rate": 9.998191026679302e-06, "loss": 0.6901, "step": 1250 }, { "epoch": 0.038341301949246044, "grad_norm": 1.7329097483905846, "learning_rate": 9.99817765250356e-06, "loss": 0.7455, "step": 1251 }, { "epoch": 0.03837195047198725, "grad_norm": 1.935818729499156, "learning_rate": 9.998164229079491e-06, "loss": 0.7804, "step": 1252 }, { "epoch": 0.038402598994728455, "grad_norm": 1.5901216239176532, "learning_rate": 9.998150756407236e-06, "loss": 0.7596, "step": 1253 }, { "epoch": 0.03843324751746966, "grad_norm": 1.5965555315638718, "learning_rate": 9.99813723448692e-06, "loss": 0.7564, "step": 1254 }, { "epoch": 0.03846389604021086, "grad_norm": 1.5823963315339074, "learning_rate": 9.99812366331868e-06, "loss": 0.8657, "step": 1255 }, { "epoch": 0.038494544562952066, "grad_norm": 0.5204330562643371, "learning_rate": 9.998110042902648e-06, "loss": 0.4998, "step": 1256 }, { "epoch": 0.03852519308569327, "grad_norm": 1.64374687157019, "learning_rate": 9.998096373238958e-06, "loss": 0.7311, "step": 1257 }, { "epoch": 0.03855584160843448, "grad_norm": 1.5471515646309055, "learning_rate": 9.998082654327748e-06, "loss": 0.6961, "step": 1258 }, { "epoch": 0.038586490131175676, "grad_norm": 1.4748313181090105, "learning_rate": 9.99806888616915e-06, "loss": 0.8393, "step": 1259 }, { "epoch": 0.03861713865391688, "grad_norm": 1.9364733486403443, "learning_rate": 9.998055068763299e-06, "loss": 0.8146, "step": 1260 }, { "epoch": 0.03864778717665809, "grad_norm": 1.5283621638963762, "learning_rate": 9.998041202110334e-06, "loss": 0.6799, "step": 1261 }, { "epoch": 0.038678435699399286, "grad_norm": 0.5230471416557303, "learning_rate": 9.99802728621039e-06, "loss": 0.5075, "step": 1262 }, { "epoch": 0.03870908422214049, "grad_norm": 1.9157562506234542, "learning_rate": 9.998013321063604e-06, "loss": 0.9216, "step": 1263 }, { "epoch": 0.0387397327448817, "grad_norm": 1.495107200244563, "learning_rate": 9.997999306670114e-06, "loss": 0.8099, "step": 1264 }, { "epoch": 0.0387703812676229, "grad_norm": 1.5934107017774324, "learning_rate": 9.997985243030058e-06, "loss": 0.813, "step": 1265 }, { "epoch": 0.0388010297903641, "grad_norm": 1.7384428923769968, "learning_rate": 9.997971130143575e-06, "loss": 0.8998, "step": 1266 }, { "epoch": 0.03883167831310531, "grad_norm": 1.8784609083267925, "learning_rate": 9.997956968010803e-06, "loss": 0.7622, "step": 1267 }, { "epoch": 0.03886232683584651, "grad_norm": 0.5230753941918078, "learning_rate": 9.997942756631883e-06, "loss": 0.4973, "step": 1268 }, { "epoch": 0.03889297535858772, "grad_norm": 1.6669621764204932, "learning_rate": 9.997928496006954e-06, "loss": 0.8317, "step": 1269 }, { "epoch": 0.03892362388132892, "grad_norm": 1.6152826092720192, "learning_rate": 9.997914186136157e-06, "loss": 0.8151, "step": 1270 }, { "epoch": 0.03895427240407012, "grad_norm": 2.081349695293963, "learning_rate": 9.997899827019632e-06, "loss": 0.8343, "step": 1271 }, { "epoch": 0.03898492092681133, "grad_norm": 1.702218377981303, "learning_rate": 9.997885418657522e-06, "loss": 0.7812, "step": 1272 }, { "epoch": 0.039015569449552534, "grad_norm": 1.623503868741839, "learning_rate": 9.997870961049968e-06, "loss": 0.8227, "step": 1273 }, { "epoch": 0.03904621797229373, "grad_norm": 1.9461479651912672, "learning_rate": 9.997856454197112e-06, "loss": 0.8461, "step": 1274 }, { "epoch": 0.03907686649503494, "grad_norm": 1.5905390668488009, "learning_rate": 9.997841898099098e-06, "loss": 0.807, "step": 1275 }, { "epoch": 0.039107515017776144, "grad_norm": 0.48072708610024517, "learning_rate": 9.99782729275607e-06, "loss": 0.477, "step": 1276 }, { "epoch": 0.03913816354051735, "grad_norm": 1.7809249725507832, "learning_rate": 9.997812638168169e-06, "loss": 0.7832, "step": 1277 }, { "epoch": 0.03916881206325855, "grad_norm": 1.8505583358819284, "learning_rate": 9.997797934335542e-06, "loss": 0.9105, "step": 1278 }, { "epoch": 0.039199460585999754, "grad_norm": 1.7764142287048286, "learning_rate": 9.997783181258335e-06, "loss": 0.7078, "step": 1279 }, { "epoch": 0.03923010910874096, "grad_norm": 1.7266252094259802, "learning_rate": 9.99776837893669e-06, "loss": 0.8231, "step": 1280 }, { "epoch": 0.039260757631482165, "grad_norm": 1.7321331250059988, "learning_rate": 9.997753527370756e-06, "loss": 0.7548, "step": 1281 }, { "epoch": 0.039291406154223364, "grad_norm": 1.643931269138987, "learning_rate": 9.997738626560676e-06, "loss": 0.7186, "step": 1282 }, { "epoch": 0.03932205467696457, "grad_norm": 1.8423257831848276, "learning_rate": 9.9977236765066e-06, "loss": 0.8046, "step": 1283 }, { "epoch": 0.039352703199705776, "grad_norm": 1.9956003586059134, "learning_rate": 9.99770867720867e-06, "loss": 0.7684, "step": 1284 }, { "epoch": 0.03938335172244698, "grad_norm": 1.7207080688573422, "learning_rate": 9.997693628667042e-06, "loss": 0.7624, "step": 1285 }, { "epoch": 0.03941400024518818, "grad_norm": 1.6532964362605778, "learning_rate": 9.997678530881858e-06, "loss": 0.6596, "step": 1286 }, { "epoch": 0.039444648767929386, "grad_norm": 1.6213801996157753, "learning_rate": 9.99766338385327e-06, "loss": 0.8454, "step": 1287 }, { "epoch": 0.03947529729067059, "grad_norm": 1.5168520296378738, "learning_rate": 9.997648187581425e-06, "loss": 0.9081, "step": 1288 }, { "epoch": 0.0395059458134118, "grad_norm": 0.5417395014644866, "learning_rate": 9.997632942066473e-06, "loss": 0.5013, "step": 1289 }, { "epoch": 0.039536594336152996, "grad_norm": 1.5722373006645627, "learning_rate": 9.997617647308566e-06, "loss": 0.7228, "step": 1290 }, { "epoch": 0.0395672428588942, "grad_norm": 1.6620363578364559, "learning_rate": 9.997602303307854e-06, "loss": 0.7073, "step": 1291 }, { "epoch": 0.03959789138163541, "grad_norm": 1.4005231474936544, "learning_rate": 9.997586910064488e-06, "loss": 0.756, "step": 1292 }, { "epoch": 0.039628539904376606, "grad_norm": 1.656180806731981, "learning_rate": 9.997571467578617e-06, "loss": 0.6951, "step": 1293 }, { "epoch": 0.03965918842711781, "grad_norm": 1.9046061183554535, "learning_rate": 9.997555975850398e-06, "loss": 0.6914, "step": 1294 }, { "epoch": 0.03968983694985902, "grad_norm": 1.9058332845884065, "learning_rate": 9.997540434879981e-06, "loss": 0.8226, "step": 1295 }, { "epoch": 0.03972048547260022, "grad_norm": 1.530409099646988, "learning_rate": 9.997524844667519e-06, "loss": 0.7689, "step": 1296 }, { "epoch": 0.03975113399534142, "grad_norm": 1.6794485757551023, "learning_rate": 9.997509205213166e-06, "loss": 0.632, "step": 1297 }, { "epoch": 0.03978178251808263, "grad_norm": 1.7924078568684174, "learning_rate": 9.997493516517076e-06, "loss": 0.8549, "step": 1298 }, { "epoch": 0.03981243104082383, "grad_norm": 1.7945456992672788, "learning_rate": 9.997477778579404e-06, "loss": 0.775, "step": 1299 }, { "epoch": 0.03984307956356504, "grad_norm": 1.7459349687830656, "learning_rate": 9.997461991400306e-06, "loss": 0.8177, "step": 1300 }, { "epoch": 0.03987372808630624, "grad_norm": 1.6297227843055364, "learning_rate": 9.997446154979936e-06, "loss": 0.8607, "step": 1301 }, { "epoch": 0.03990437660904744, "grad_norm": 0.5622450669216099, "learning_rate": 9.99743026931845e-06, "loss": 0.5185, "step": 1302 }, { "epoch": 0.03993502513178865, "grad_norm": 1.691150618255671, "learning_rate": 9.997414334416002e-06, "loss": 0.8306, "step": 1303 }, { "epoch": 0.039965673654529854, "grad_norm": 1.7369691714068876, "learning_rate": 9.997398350272755e-06, "loss": 0.8503, "step": 1304 }, { "epoch": 0.03999632217727105, "grad_norm": 1.6049637554418557, "learning_rate": 9.997382316888864e-06, "loss": 0.7237, "step": 1305 }, { "epoch": 0.04002697070001226, "grad_norm": 2.0797359306107244, "learning_rate": 9.997366234264484e-06, "loss": 0.8232, "step": 1306 }, { "epoch": 0.040057619222753464, "grad_norm": 2.003012288507845, "learning_rate": 9.997350102399777e-06, "loss": 0.7547, "step": 1307 }, { "epoch": 0.04008826774549467, "grad_norm": 1.7112232736927555, "learning_rate": 9.997333921294901e-06, "loss": 0.7354, "step": 1308 }, { "epoch": 0.04011891626823587, "grad_norm": 0.5274270299350793, "learning_rate": 9.997317690950015e-06, "loss": 0.4865, "step": 1309 }, { "epoch": 0.040149564790977074, "grad_norm": 1.8921609643928292, "learning_rate": 9.997301411365279e-06, "loss": 0.8214, "step": 1310 }, { "epoch": 0.04018021331371828, "grad_norm": 1.4373163594056046, "learning_rate": 9.997285082540854e-06, "loss": 0.8271, "step": 1311 }, { "epoch": 0.040210861836459486, "grad_norm": 1.9653152496453195, "learning_rate": 9.9972687044769e-06, "loss": 0.8834, "step": 1312 }, { "epoch": 0.040241510359200684, "grad_norm": 1.4777693996602277, "learning_rate": 9.997252277173579e-06, "loss": 0.7723, "step": 1313 }, { "epoch": 0.04027215888194189, "grad_norm": 1.6727655050669237, "learning_rate": 9.997235800631053e-06, "loss": 0.7824, "step": 1314 }, { "epoch": 0.040302807404683096, "grad_norm": 1.641655537947684, "learning_rate": 9.997219274849483e-06, "loss": 0.8258, "step": 1315 }, { "epoch": 0.0403334559274243, "grad_norm": 1.7375055378598794, "learning_rate": 9.997202699829035e-06, "loss": 0.8394, "step": 1316 }, { "epoch": 0.0403641044501655, "grad_norm": 1.9950139793804131, "learning_rate": 9.997186075569869e-06, "loss": 0.7665, "step": 1317 }, { "epoch": 0.040394752972906706, "grad_norm": 1.5258732712686571, "learning_rate": 9.99716940207215e-06, "loss": 0.8422, "step": 1318 }, { "epoch": 0.04042540149564791, "grad_norm": 1.6957874557600208, "learning_rate": 9.997152679336041e-06, "loss": 0.8341, "step": 1319 }, { "epoch": 0.04045605001838912, "grad_norm": 1.6626474287962052, "learning_rate": 9.99713590736171e-06, "loss": 0.7876, "step": 1320 }, { "epoch": 0.040486698541130316, "grad_norm": 1.6353102294579123, "learning_rate": 9.99711908614932e-06, "loss": 0.8289, "step": 1321 }, { "epoch": 0.04051734706387152, "grad_norm": 1.6516028743627378, "learning_rate": 9.997102215699037e-06, "loss": 0.8665, "step": 1322 }, { "epoch": 0.04054799558661273, "grad_norm": 0.5382360262261924, "learning_rate": 9.997085296011027e-06, "loss": 0.5146, "step": 1323 }, { "epoch": 0.040578644109353926, "grad_norm": 0.5091380174835185, "learning_rate": 9.997068327085458e-06, "loss": 0.4811, "step": 1324 }, { "epoch": 0.04060929263209513, "grad_norm": 1.655613619761504, "learning_rate": 9.997051308922495e-06, "loss": 0.866, "step": 1325 }, { "epoch": 0.04063994115483634, "grad_norm": 0.4891070007484205, "learning_rate": 9.997034241522308e-06, "loss": 0.5026, "step": 1326 }, { "epoch": 0.04067058967757754, "grad_norm": 2.232449951729007, "learning_rate": 9.997017124885063e-06, "loss": 0.7179, "step": 1327 }, { "epoch": 0.04070123820031874, "grad_norm": 1.7311497089177637, "learning_rate": 9.99699995901093e-06, "loss": 0.6802, "step": 1328 }, { "epoch": 0.04073188672305995, "grad_norm": 1.794321625893074, "learning_rate": 9.996982743900077e-06, "loss": 0.7848, "step": 1329 }, { "epoch": 0.04076253524580115, "grad_norm": 1.7365973247044473, "learning_rate": 9.996965479552675e-06, "loss": 0.8841, "step": 1330 }, { "epoch": 0.04079318376854236, "grad_norm": 1.5727511257762763, "learning_rate": 9.996948165968896e-06, "loss": 0.7541, "step": 1331 }, { "epoch": 0.04082383229128356, "grad_norm": 1.5267576083132837, "learning_rate": 9.996930803148905e-06, "loss": 0.7211, "step": 1332 }, { "epoch": 0.04085448081402476, "grad_norm": 1.6462789221810967, "learning_rate": 9.996913391092877e-06, "loss": 0.8067, "step": 1333 }, { "epoch": 0.04088512933676597, "grad_norm": 1.7182870492677897, "learning_rate": 9.996895929800986e-06, "loss": 0.8713, "step": 1334 }, { "epoch": 0.040915777859507174, "grad_norm": 1.5727043229055742, "learning_rate": 9.996878419273397e-06, "loss": 0.8101, "step": 1335 }, { "epoch": 0.04094642638224837, "grad_norm": 1.5244521158344502, "learning_rate": 9.996860859510286e-06, "loss": 0.792, "step": 1336 }, { "epoch": 0.04097707490498958, "grad_norm": 0.5986787082294759, "learning_rate": 9.99684325051183e-06, "loss": 0.511, "step": 1337 }, { "epoch": 0.041007723427730784, "grad_norm": 1.6803644231508201, "learning_rate": 9.996825592278197e-06, "loss": 0.7743, "step": 1338 }, { "epoch": 0.04103837195047199, "grad_norm": 1.5545449417716917, "learning_rate": 9.996807884809563e-06, "loss": 0.7865, "step": 1339 }, { "epoch": 0.04106902047321319, "grad_norm": 1.5226690987384999, "learning_rate": 9.996790128106101e-06, "loss": 0.6977, "step": 1340 }, { "epoch": 0.041099668995954394, "grad_norm": 1.6446477362034067, "learning_rate": 9.99677232216799e-06, "loss": 0.6986, "step": 1341 }, { "epoch": 0.0411303175186956, "grad_norm": 1.7073136024989228, "learning_rate": 9.996754466995401e-06, "loss": 0.718, "step": 1342 }, { "epoch": 0.041160966041436806, "grad_norm": 1.5541061678278802, "learning_rate": 9.996736562588513e-06, "loss": 0.8481, "step": 1343 }, { "epoch": 0.041191614564178004, "grad_norm": 1.8108783351424824, "learning_rate": 9.9967186089475e-06, "loss": 0.758, "step": 1344 }, { "epoch": 0.04122226308691921, "grad_norm": 1.613535776268603, "learning_rate": 9.996700606072542e-06, "loss": 0.7799, "step": 1345 }, { "epoch": 0.041252911609660416, "grad_norm": 1.9431073560773502, "learning_rate": 9.996682553963813e-06, "loss": 0.8623, "step": 1346 }, { "epoch": 0.04128356013240162, "grad_norm": 0.5127338659826863, "learning_rate": 9.996664452621492e-06, "loss": 0.4934, "step": 1347 }, { "epoch": 0.04131420865514282, "grad_norm": 1.69631626012103, "learning_rate": 9.996646302045758e-06, "loss": 0.8633, "step": 1348 }, { "epoch": 0.041344857177884026, "grad_norm": 1.7947029536619705, "learning_rate": 9.996628102236789e-06, "loss": 0.7873, "step": 1349 }, { "epoch": 0.04137550570062523, "grad_norm": 1.7486710859039536, "learning_rate": 9.996609853194766e-06, "loss": 0.8233, "step": 1350 }, { "epoch": 0.04140615422336644, "grad_norm": 1.543394235286066, "learning_rate": 9.996591554919868e-06, "loss": 0.7915, "step": 1351 }, { "epoch": 0.041436802746107636, "grad_norm": 1.5497099138581016, "learning_rate": 9.996573207412275e-06, "loss": 0.9145, "step": 1352 }, { "epoch": 0.04146745126884884, "grad_norm": 1.7561896939523323, "learning_rate": 9.996554810672165e-06, "loss": 0.7897, "step": 1353 }, { "epoch": 0.04149809979159005, "grad_norm": 1.6222680458662753, "learning_rate": 9.996536364699726e-06, "loss": 0.7886, "step": 1354 }, { "epoch": 0.041528748314331246, "grad_norm": 1.654486862836275, "learning_rate": 9.996517869495133e-06, "loss": 0.7994, "step": 1355 }, { "epoch": 0.04155939683707245, "grad_norm": 1.7988057846358405, "learning_rate": 9.996499325058572e-06, "loss": 0.7105, "step": 1356 }, { "epoch": 0.04159004535981366, "grad_norm": 1.9382398122816185, "learning_rate": 9.996480731390224e-06, "loss": 0.7996, "step": 1357 }, { "epoch": 0.04162069388255486, "grad_norm": 2.0667863885327162, "learning_rate": 9.996462088490273e-06, "loss": 0.7216, "step": 1358 }, { "epoch": 0.04165134240529606, "grad_norm": 1.6336972255021005, "learning_rate": 9.996443396358904e-06, "loss": 0.6988, "step": 1359 }, { "epoch": 0.04168199092803727, "grad_norm": 1.7818747533512502, "learning_rate": 9.9964246549963e-06, "loss": 0.7106, "step": 1360 }, { "epoch": 0.04171263945077847, "grad_norm": 1.8309543191229223, "learning_rate": 9.996405864402644e-06, "loss": 0.8046, "step": 1361 }, { "epoch": 0.04174328797351968, "grad_norm": 1.4779285558737236, "learning_rate": 9.996387024578122e-06, "loss": 0.7954, "step": 1362 }, { "epoch": 0.04177393649626088, "grad_norm": 1.632729401480267, "learning_rate": 9.996368135522922e-06, "loss": 0.7732, "step": 1363 }, { "epoch": 0.04180458501900208, "grad_norm": 0.5287503169178965, "learning_rate": 9.996349197237228e-06, "loss": 0.5064, "step": 1364 }, { "epoch": 0.04183523354174329, "grad_norm": 1.9212699877356305, "learning_rate": 9.996330209721226e-06, "loss": 0.8368, "step": 1365 }, { "epoch": 0.041865882064484494, "grad_norm": 1.6902943709384253, "learning_rate": 9.996311172975105e-06, "loss": 0.7429, "step": 1366 }, { "epoch": 0.04189653058722569, "grad_norm": 1.601132064527281, "learning_rate": 9.996292086999051e-06, "loss": 0.7587, "step": 1367 }, { "epoch": 0.0419271791099669, "grad_norm": 1.80454712026168, "learning_rate": 9.996272951793253e-06, "loss": 0.7496, "step": 1368 }, { "epoch": 0.041957827632708104, "grad_norm": 1.6347208934187754, "learning_rate": 9.9962537673579e-06, "loss": 0.7226, "step": 1369 }, { "epoch": 0.04198847615544931, "grad_norm": 1.5545843921881508, "learning_rate": 9.99623453369318e-06, "loss": 0.7919, "step": 1370 }, { "epoch": 0.04201912467819051, "grad_norm": 1.5317603666672175, "learning_rate": 9.996215250799282e-06, "loss": 0.8324, "step": 1371 }, { "epoch": 0.042049773200931714, "grad_norm": 0.579421262661551, "learning_rate": 9.996195918676397e-06, "loss": 0.5061, "step": 1372 }, { "epoch": 0.04208042172367292, "grad_norm": 0.5526155749373941, "learning_rate": 9.996176537324715e-06, "loss": 0.497, "step": 1373 }, { "epoch": 0.042111070246414126, "grad_norm": 1.6225069917222477, "learning_rate": 9.996157106744429e-06, "loss": 0.7157, "step": 1374 }, { "epoch": 0.042141718769155324, "grad_norm": 1.6977247514618712, "learning_rate": 9.996137626935727e-06, "loss": 0.7497, "step": 1375 }, { "epoch": 0.04217236729189653, "grad_norm": 1.5526252569470669, "learning_rate": 9.996118097898804e-06, "loss": 0.7843, "step": 1376 }, { "epoch": 0.042203015814637736, "grad_norm": 1.6099911855046767, "learning_rate": 9.99609851963385e-06, "loss": 0.7219, "step": 1377 }, { "epoch": 0.04223366433737894, "grad_norm": 1.8335235271005583, "learning_rate": 9.996078892141059e-06, "loss": 0.7049, "step": 1378 }, { "epoch": 0.04226431286012014, "grad_norm": 0.5280509012034322, "learning_rate": 9.996059215420625e-06, "loss": 0.5094, "step": 1379 }, { "epoch": 0.042294961382861346, "grad_norm": 1.6945902135978943, "learning_rate": 9.996039489472741e-06, "loss": 0.6922, "step": 1380 }, { "epoch": 0.04232560990560255, "grad_norm": 1.7306239828056067, "learning_rate": 9.996019714297601e-06, "loss": 0.8104, "step": 1381 }, { "epoch": 0.04235625842834376, "grad_norm": 1.6659541036101744, "learning_rate": 9.9959998898954e-06, "loss": 0.758, "step": 1382 }, { "epoch": 0.042386906951084956, "grad_norm": 1.6865266357679438, "learning_rate": 9.995980016266335e-06, "loss": 0.6668, "step": 1383 }, { "epoch": 0.04241755547382616, "grad_norm": 1.5891241981471373, "learning_rate": 9.995960093410601e-06, "loss": 0.8103, "step": 1384 }, { "epoch": 0.04244820399656737, "grad_norm": 0.6031932423368972, "learning_rate": 9.995940121328394e-06, "loss": 0.5078, "step": 1385 }, { "epoch": 0.042478852519308566, "grad_norm": 1.7486970985787869, "learning_rate": 9.995920100019909e-06, "loss": 0.8099, "step": 1386 }, { "epoch": 0.04250950104204977, "grad_norm": 1.6804536669378445, "learning_rate": 9.995900029485348e-06, "loss": 0.7051, "step": 1387 }, { "epoch": 0.04254014956479098, "grad_norm": 1.7597478861883864, "learning_rate": 9.995879909724903e-06, "loss": 0.7876, "step": 1388 }, { "epoch": 0.04257079808753218, "grad_norm": 1.7242220781472688, "learning_rate": 9.995859740738776e-06, "loss": 0.8324, "step": 1389 }, { "epoch": 0.04260144661027338, "grad_norm": 1.5835543569121433, "learning_rate": 9.995839522527165e-06, "loss": 0.8294, "step": 1390 }, { "epoch": 0.04263209513301459, "grad_norm": 0.5015446099786395, "learning_rate": 9.995819255090266e-06, "loss": 0.4952, "step": 1391 }, { "epoch": 0.04266274365575579, "grad_norm": 0.48657789321625, "learning_rate": 9.995798938428285e-06, "loss": 0.5004, "step": 1392 }, { "epoch": 0.042693392178497, "grad_norm": 1.723888141510831, "learning_rate": 9.995778572541419e-06, "loss": 0.7982, "step": 1393 }, { "epoch": 0.0427240407012382, "grad_norm": 1.7445725928625873, "learning_rate": 9.995758157429867e-06, "loss": 0.798, "step": 1394 }, { "epoch": 0.0427546892239794, "grad_norm": 1.7550658259104346, "learning_rate": 9.995737693093833e-06, "loss": 0.7861, "step": 1395 }, { "epoch": 0.04278533774672061, "grad_norm": 1.532862639024838, "learning_rate": 9.995717179533515e-06, "loss": 0.7227, "step": 1396 }, { "epoch": 0.042815986269461814, "grad_norm": 1.8442634853823148, "learning_rate": 9.99569661674912e-06, "loss": 0.7969, "step": 1397 }, { "epoch": 0.04284663479220301, "grad_norm": 1.8776121739263947, "learning_rate": 9.995676004740846e-06, "loss": 0.8863, "step": 1398 }, { "epoch": 0.04287728331494422, "grad_norm": 1.5381351789337707, "learning_rate": 9.9956553435089e-06, "loss": 0.7926, "step": 1399 }, { "epoch": 0.042907931837685424, "grad_norm": 1.6495044250908286, "learning_rate": 9.995634633053481e-06, "loss": 0.8326, "step": 1400 }, { "epoch": 0.04293858036042663, "grad_norm": 1.632289822279453, "learning_rate": 9.995613873374798e-06, "loss": 0.8251, "step": 1401 }, { "epoch": 0.04296922888316783, "grad_norm": 1.750340021920516, "learning_rate": 9.995593064473053e-06, "loss": 0.7815, "step": 1402 }, { "epoch": 0.042999877405909034, "grad_norm": 1.3698025709802137, "learning_rate": 9.99557220634845e-06, "loss": 0.6548, "step": 1403 }, { "epoch": 0.04303052592865024, "grad_norm": 0.5903766280143794, "learning_rate": 9.995551299001198e-06, "loss": 0.5244, "step": 1404 }, { "epoch": 0.043061174451391446, "grad_norm": 1.8915861854389446, "learning_rate": 9.9955303424315e-06, "loss": 0.8031, "step": 1405 }, { "epoch": 0.043091822974132644, "grad_norm": 1.8276250845597426, "learning_rate": 9.995509336639563e-06, "loss": 0.831, "step": 1406 }, { "epoch": 0.04312247149687385, "grad_norm": 1.632836633376533, "learning_rate": 9.995488281625594e-06, "loss": 0.6994, "step": 1407 }, { "epoch": 0.043153120019615056, "grad_norm": 1.661988845113715, "learning_rate": 9.995467177389801e-06, "loss": 0.7118, "step": 1408 }, { "epoch": 0.04318376854235626, "grad_norm": 1.6214963930319126, "learning_rate": 9.995446023932394e-06, "loss": 0.7771, "step": 1409 }, { "epoch": 0.04321441706509746, "grad_norm": 1.6110221675932583, "learning_rate": 9.995424821253577e-06, "loss": 0.8394, "step": 1410 }, { "epoch": 0.043245065587838666, "grad_norm": 1.4339076640578414, "learning_rate": 9.99540356935356e-06, "loss": 0.7087, "step": 1411 }, { "epoch": 0.04327571411057987, "grad_norm": 1.6562506461940898, "learning_rate": 9.995382268232556e-06, "loss": 0.8829, "step": 1412 }, { "epoch": 0.04330636263332108, "grad_norm": 1.5985458384116036, "learning_rate": 9.99536091789077e-06, "loss": 0.7518, "step": 1413 }, { "epoch": 0.043337011156062276, "grad_norm": 1.661328728303662, "learning_rate": 9.995339518328418e-06, "loss": 0.7932, "step": 1414 }, { "epoch": 0.04336765967880348, "grad_norm": 1.6899798505747414, "learning_rate": 9.995318069545706e-06, "loss": 0.8245, "step": 1415 }, { "epoch": 0.04339830820154469, "grad_norm": 0.5616618104971641, "learning_rate": 9.995296571542845e-06, "loss": 0.4872, "step": 1416 }, { "epoch": 0.04342895672428589, "grad_norm": 1.9150087222831051, "learning_rate": 9.995275024320051e-06, "loss": 0.8078, "step": 1417 }, { "epoch": 0.04345960524702709, "grad_norm": 1.6604813665309246, "learning_rate": 9.995253427877533e-06, "loss": 0.764, "step": 1418 }, { "epoch": 0.0434902537697683, "grad_norm": 1.838459162226352, "learning_rate": 9.995231782215506e-06, "loss": 0.7787, "step": 1419 }, { "epoch": 0.0435209022925095, "grad_norm": 0.5418433891798373, "learning_rate": 9.995210087334182e-06, "loss": 0.4893, "step": 1420 }, { "epoch": 0.0435515508152507, "grad_norm": 1.7080294637466262, "learning_rate": 9.995188343233775e-06, "loss": 0.7233, "step": 1421 }, { "epoch": 0.04358219933799191, "grad_norm": 1.7101213344823867, "learning_rate": 9.995166549914498e-06, "loss": 0.7305, "step": 1422 }, { "epoch": 0.04361284786073311, "grad_norm": 1.6756361555241797, "learning_rate": 9.995144707376568e-06, "loss": 0.8529, "step": 1423 }, { "epoch": 0.04364349638347432, "grad_norm": 1.5545463444094614, "learning_rate": 9.995122815620199e-06, "loss": 0.7372, "step": 1424 }, { "epoch": 0.04367414490621552, "grad_norm": 1.6201334590329097, "learning_rate": 9.995100874645605e-06, "loss": 0.8359, "step": 1425 }, { "epoch": 0.04370479342895672, "grad_norm": 0.6210085736175037, "learning_rate": 9.995078884453006e-06, "loss": 0.488, "step": 1426 }, { "epoch": 0.04373544195169793, "grad_norm": 1.7309860622308115, "learning_rate": 9.995056845042616e-06, "loss": 0.7583, "step": 1427 }, { "epoch": 0.043766090474439134, "grad_norm": 1.8621620362426794, "learning_rate": 9.995034756414655e-06, "loss": 0.8327, "step": 1428 }, { "epoch": 0.04379673899718033, "grad_norm": 1.5093949388013685, "learning_rate": 9.995012618569335e-06, "loss": 0.7929, "step": 1429 }, { "epoch": 0.04382738751992154, "grad_norm": 1.5438478399383209, "learning_rate": 9.99499043150688e-06, "loss": 0.7747, "step": 1430 }, { "epoch": 0.043858036042662744, "grad_norm": 1.6217836041953884, "learning_rate": 9.994968195227505e-06, "loss": 0.7974, "step": 1431 }, { "epoch": 0.04388868456540395, "grad_norm": 1.6747178664325038, "learning_rate": 9.994945909731432e-06, "loss": 0.7993, "step": 1432 }, { "epoch": 0.04391933308814515, "grad_norm": 0.5329478076039167, "learning_rate": 9.994923575018878e-06, "loss": 0.5153, "step": 1433 }, { "epoch": 0.043949981610886354, "grad_norm": 1.7187325991882694, "learning_rate": 9.994901191090063e-06, "loss": 0.8569, "step": 1434 }, { "epoch": 0.04398063013362756, "grad_norm": 0.5278164338992885, "learning_rate": 9.99487875794521e-06, "loss": 0.5083, "step": 1435 }, { "epoch": 0.044011278656368766, "grad_norm": 1.6498006920178183, "learning_rate": 9.994856275584537e-06, "loss": 0.8477, "step": 1436 }, { "epoch": 0.044041927179109965, "grad_norm": 1.4731355247391462, "learning_rate": 9.99483374400827e-06, "loss": 0.6945, "step": 1437 }, { "epoch": 0.04407257570185117, "grad_norm": 1.6881580181356763, "learning_rate": 9.994811163216625e-06, "loss": 0.7391, "step": 1438 }, { "epoch": 0.044103224224592376, "grad_norm": 1.7482806194414549, "learning_rate": 9.994788533209829e-06, "loss": 0.7868, "step": 1439 }, { "epoch": 0.04413387274733358, "grad_norm": 1.6647076760786987, "learning_rate": 9.994765853988105e-06, "loss": 0.8789, "step": 1440 }, { "epoch": 0.04416452127007478, "grad_norm": 1.4886097727828451, "learning_rate": 9.994743125551672e-06, "loss": 0.902, "step": 1441 }, { "epoch": 0.044195169792815986, "grad_norm": 1.4784749960315835, "learning_rate": 9.994720347900759e-06, "loss": 0.7536, "step": 1442 }, { "epoch": 0.04422581831555719, "grad_norm": 1.4450868782207995, "learning_rate": 9.994697521035588e-06, "loss": 0.7582, "step": 1443 }, { "epoch": 0.0442564668382984, "grad_norm": 1.700990287422924, "learning_rate": 9.994674644956385e-06, "loss": 0.7978, "step": 1444 }, { "epoch": 0.044287115361039596, "grad_norm": 1.5667780030655563, "learning_rate": 9.994651719663373e-06, "loss": 0.6614, "step": 1445 }, { "epoch": 0.0443177638837808, "grad_norm": 1.6111554857354942, "learning_rate": 9.994628745156782e-06, "loss": 0.8014, "step": 1446 }, { "epoch": 0.04434841240652201, "grad_norm": 1.56731969874899, "learning_rate": 9.994605721436836e-06, "loss": 0.6882, "step": 1447 }, { "epoch": 0.04437906092926321, "grad_norm": 1.6076062091319268, "learning_rate": 9.99458264850376e-06, "loss": 0.6878, "step": 1448 }, { "epoch": 0.04440970945200441, "grad_norm": 1.4971866986155857, "learning_rate": 9.994559526357785e-06, "loss": 0.7323, "step": 1449 }, { "epoch": 0.04444035797474562, "grad_norm": 1.7276370912735293, "learning_rate": 9.994536354999136e-06, "loss": 0.7219, "step": 1450 }, { "epoch": 0.04447100649748682, "grad_norm": 1.468479003452563, "learning_rate": 9.994513134428042e-06, "loss": 0.7072, "step": 1451 }, { "epoch": 0.04450165502022802, "grad_norm": 1.8771543293139419, "learning_rate": 9.994489864644733e-06, "loss": 0.8242, "step": 1452 }, { "epoch": 0.04453230354296923, "grad_norm": 1.3819295272593521, "learning_rate": 9.994466545649437e-06, "loss": 0.7487, "step": 1453 }, { "epoch": 0.04456295206571043, "grad_norm": 1.6298524276351591, "learning_rate": 9.994443177442386e-06, "loss": 0.6653, "step": 1454 }, { "epoch": 0.04459360058845164, "grad_norm": 1.5659074169115428, "learning_rate": 9.994419760023806e-06, "loss": 0.8131, "step": 1455 }, { "epoch": 0.04462424911119284, "grad_norm": 1.6530795701817331, "learning_rate": 9.994396293393932e-06, "loss": 0.7671, "step": 1456 }, { "epoch": 0.04465489763393404, "grad_norm": 1.4080027076536974, "learning_rate": 9.994372777552992e-06, "loss": 0.7904, "step": 1457 }, { "epoch": 0.04468554615667525, "grad_norm": 0.6490532579733646, "learning_rate": 9.99434921250122e-06, "loss": 0.5129, "step": 1458 }, { "epoch": 0.044716194679416454, "grad_norm": 1.431264680267745, "learning_rate": 9.994325598238847e-06, "loss": 0.7666, "step": 1459 }, { "epoch": 0.04474684320215765, "grad_norm": 1.5962471133597858, "learning_rate": 9.994301934766106e-06, "loss": 0.7392, "step": 1460 }, { "epoch": 0.04477749172489886, "grad_norm": 2.123442440028512, "learning_rate": 9.99427822208323e-06, "loss": 0.7186, "step": 1461 }, { "epoch": 0.044808140247640064, "grad_norm": 1.6023039683056646, "learning_rate": 9.994254460190453e-06, "loss": 0.7555, "step": 1462 }, { "epoch": 0.04483878877038127, "grad_norm": 1.7803491262183644, "learning_rate": 9.99423064908801e-06, "loss": 0.8123, "step": 1463 }, { "epoch": 0.04486943729312247, "grad_norm": 1.6852387461982767, "learning_rate": 9.994206788776133e-06, "loss": 0.8449, "step": 1464 }, { "epoch": 0.044900085815863675, "grad_norm": 1.7602742237039795, "learning_rate": 9.99418287925506e-06, "loss": 0.7656, "step": 1465 }, { "epoch": 0.04493073433860488, "grad_norm": 1.7775107486373007, "learning_rate": 9.994158920525024e-06, "loss": 0.7862, "step": 1466 }, { "epoch": 0.044961382861346086, "grad_norm": 1.670407745086068, "learning_rate": 9.994134912586265e-06, "loss": 0.7184, "step": 1467 }, { "epoch": 0.044992031384087285, "grad_norm": 1.759475270076223, "learning_rate": 9.994110855439014e-06, "loss": 0.7318, "step": 1468 }, { "epoch": 0.04502267990682849, "grad_norm": 1.869796014199928, "learning_rate": 9.994086749083512e-06, "loss": 0.8335, "step": 1469 }, { "epoch": 0.045053328429569696, "grad_norm": 0.6153727278416276, "learning_rate": 9.994062593519995e-06, "loss": 0.4841, "step": 1470 }, { "epoch": 0.0450839769523109, "grad_norm": 1.89355393762183, "learning_rate": 9.994038388748702e-06, "loss": 0.8502, "step": 1471 }, { "epoch": 0.0451146254750521, "grad_norm": 1.8047754424293803, "learning_rate": 9.994014134769872e-06, "loss": 0.7221, "step": 1472 }, { "epoch": 0.045145273997793306, "grad_norm": 1.5484613607653352, "learning_rate": 9.993989831583742e-06, "loss": 0.889, "step": 1473 }, { "epoch": 0.04517592252053451, "grad_norm": 1.632101658647336, "learning_rate": 9.99396547919055e-06, "loss": 0.7199, "step": 1474 }, { "epoch": 0.04520657104327572, "grad_norm": 1.7097268225006823, "learning_rate": 9.99394107759054e-06, "loss": 0.7728, "step": 1475 }, { "epoch": 0.045237219566016916, "grad_norm": 1.5655960366508277, "learning_rate": 9.993916626783952e-06, "loss": 0.7611, "step": 1476 }, { "epoch": 0.04526786808875812, "grad_norm": 1.7937283932607517, "learning_rate": 9.993892126771026e-06, "loss": 0.9381, "step": 1477 }, { "epoch": 0.04529851661149933, "grad_norm": 1.6833917465810957, "learning_rate": 9.993867577552003e-06, "loss": 0.7216, "step": 1478 }, { "epoch": 0.04532916513424053, "grad_norm": 1.717496586804676, "learning_rate": 9.993842979127124e-06, "loss": 0.8223, "step": 1479 }, { "epoch": 0.04535981365698173, "grad_norm": 1.605764781483956, "learning_rate": 9.993818331496632e-06, "loss": 0.7866, "step": 1480 }, { "epoch": 0.04539046217972294, "grad_norm": 1.8361098256426114, "learning_rate": 9.99379363466077e-06, "loss": 0.7942, "step": 1481 }, { "epoch": 0.04542111070246414, "grad_norm": 1.712200138326599, "learning_rate": 9.993768888619783e-06, "loss": 0.7276, "step": 1482 }, { "epoch": 0.04545175922520534, "grad_norm": 1.4061644346765314, "learning_rate": 9.993744093373915e-06, "loss": 0.7316, "step": 1483 }, { "epoch": 0.04548240774794655, "grad_norm": 0.5482680821082387, "learning_rate": 9.993719248923406e-06, "loss": 0.4992, "step": 1484 }, { "epoch": 0.04551305627068775, "grad_norm": 1.6867912466974055, "learning_rate": 9.993694355268504e-06, "loss": 0.7305, "step": 1485 }, { "epoch": 0.04554370479342896, "grad_norm": 1.640253520163276, "learning_rate": 9.993669412409455e-06, "loss": 0.7277, "step": 1486 }, { "epoch": 0.04557435331617016, "grad_norm": 1.6624894492427678, "learning_rate": 9.993644420346503e-06, "loss": 0.884, "step": 1487 }, { "epoch": 0.04560500183891136, "grad_norm": 1.4654392847698376, "learning_rate": 9.993619379079893e-06, "loss": 0.7723, "step": 1488 }, { "epoch": 0.04563565036165257, "grad_norm": 1.5491898319537882, "learning_rate": 9.993594288609876e-06, "loss": 0.7622, "step": 1489 }, { "epoch": 0.045666298884393774, "grad_norm": 1.728308327546118, "learning_rate": 9.993569148936695e-06, "loss": 0.76, "step": 1490 }, { "epoch": 0.04569694740713497, "grad_norm": 1.5369122699012425, "learning_rate": 9.993543960060601e-06, "loss": 0.7166, "step": 1491 }, { "epoch": 0.04572759592987618, "grad_norm": 1.6687331018418772, "learning_rate": 9.99351872198184e-06, "loss": 0.7864, "step": 1492 }, { "epoch": 0.045758244452617385, "grad_norm": 1.720256964326744, "learning_rate": 9.99349343470066e-06, "loss": 0.8468, "step": 1493 }, { "epoch": 0.04578889297535859, "grad_norm": 1.5600848648589252, "learning_rate": 9.993468098217313e-06, "loss": 0.8918, "step": 1494 }, { "epoch": 0.04581954149809979, "grad_norm": 1.7040276908610275, "learning_rate": 9.993442712532048e-06, "loss": 0.7746, "step": 1495 }, { "epoch": 0.045850190020840995, "grad_norm": 1.6650023386518529, "learning_rate": 9.993417277645114e-06, "loss": 0.7282, "step": 1496 }, { "epoch": 0.0458808385435822, "grad_norm": 1.6017678236087396, "learning_rate": 9.99339179355676e-06, "loss": 0.7393, "step": 1497 }, { "epoch": 0.045911487066323406, "grad_norm": 1.6745181670516938, "learning_rate": 9.993366260267243e-06, "loss": 0.7894, "step": 1498 }, { "epoch": 0.045942135589064605, "grad_norm": 1.5570740148030964, "learning_rate": 9.993340677776809e-06, "loss": 0.668, "step": 1499 }, { "epoch": 0.04597278411180581, "grad_norm": 0.5837289219067237, "learning_rate": 9.99331504608571e-06, "loss": 0.4745, "step": 1500 }, { "epoch": 0.046003432634547016, "grad_norm": 1.8828093851865382, "learning_rate": 9.993289365194201e-06, "loss": 0.7913, "step": 1501 }, { "epoch": 0.04603408115728822, "grad_norm": 1.5050148645377222, "learning_rate": 9.993263635102534e-06, "loss": 0.8248, "step": 1502 }, { "epoch": 0.04606472968002942, "grad_norm": 0.5219715529315029, "learning_rate": 9.993237855810964e-06, "loss": 0.4933, "step": 1503 }, { "epoch": 0.046095378202770626, "grad_norm": 1.7367655315877255, "learning_rate": 9.993212027319742e-06, "loss": 0.8433, "step": 1504 }, { "epoch": 0.04612602672551183, "grad_norm": 0.5148712449479944, "learning_rate": 9.993186149629127e-06, "loss": 0.4869, "step": 1505 }, { "epoch": 0.04615667524825304, "grad_norm": 1.5499941580594727, "learning_rate": 9.993160222739369e-06, "loss": 0.81, "step": 1506 }, { "epoch": 0.046187323770994236, "grad_norm": 1.6453557244814139, "learning_rate": 9.993134246650726e-06, "loss": 0.7225, "step": 1507 }, { "epoch": 0.04621797229373544, "grad_norm": 1.5004164851965778, "learning_rate": 9.993108221363454e-06, "loss": 0.7557, "step": 1508 }, { "epoch": 0.04624862081647665, "grad_norm": 1.7420450570077048, "learning_rate": 9.99308214687781e-06, "loss": 0.9695, "step": 1509 }, { "epoch": 0.04627926933921785, "grad_norm": 0.5622668503646434, "learning_rate": 9.99305602319405e-06, "loss": 0.5224, "step": 1510 }, { "epoch": 0.04630991786195905, "grad_norm": 1.57764064566429, "learning_rate": 9.99302985031243e-06, "loss": 0.7289, "step": 1511 }, { "epoch": 0.04634056638470026, "grad_norm": 1.5772637956464457, "learning_rate": 9.993003628233211e-06, "loss": 0.719, "step": 1512 }, { "epoch": 0.04637121490744146, "grad_norm": 1.6918174068642564, "learning_rate": 9.99297735695665e-06, "loss": 0.8474, "step": 1513 }, { "epoch": 0.04640186343018266, "grad_norm": 1.4808369211793324, "learning_rate": 9.992951036483003e-06, "loss": 0.7313, "step": 1514 }, { "epoch": 0.04643251195292387, "grad_norm": 0.5332413097354904, "learning_rate": 9.992924666812533e-06, "loss": 0.5009, "step": 1515 }, { "epoch": 0.04646316047566507, "grad_norm": 1.6345522890729975, "learning_rate": 9.9928982479455e-06, "loss": 0.7781, "step": 1516 }, { "epoch": 0.04649380899840628, "grad_norm": 1.5589921298101717, "learning_rate": 9.992871779882164e-06, "loss": 0.7242, "step": 1517 }, { "epoch": 0.04652445752114748, "grad_norm": 1.588166240448136, "learning_rate": 9.992845262622782e-06, "loss": 0.7213, "step": 1518 }, { "epoch": 0.04655510604388868, "grad_norm": 0.5232745006609435, "learning_rate": 9.99281869616762e-06, "loss": 0.5026, "step": 1519 }, { "epoch": 0.04658575456662989, "grad_norm": 1.8018002313271393, "learning_rate": 9.992792080516938e-06, "loss": 0.8252, "step": 1520 }, { "epoch": 0.046616403089371095, "grad_norm": 0.5019101120402668, "learning_rate": 9.992765415670998e-06, "loss": 0.5094, "step": 1521 }, { "epoch": 0.04664705161211229, "grad_norm": 1.5130653809234382, "learning_rate": 9.992738701630061e-06, "loss": 0.7326, "step": 1522 }, { "epoch": 0.0466777001348535, "grad_norm": 0.48974856682671003, "learning_rate": 9.992711938394394e-06, "loss": 0.4891, "step": 1523 }, { "epoch": 0.046708348657594705, "grad_norm": 1.676565406138487, "learning_rate": 9.992685125964259e-06, "loss": 0.7796, "step": 1524 }, { "epoch": 0.04673899718033591, "grad_norm": 1.80311249755592, "learning_rate": 9.992658264339918e-06, "loss": 0.8516, "step": 1525 }, { "epoch": 0.04676964570307711, "grad_norm": 1.6145584003998497, "learning_rate": 9.99263135352164e-06, "loss": 0.8381, "step": 1526 }, { "epoch": 0.046800294225818315, "grad_norm": 0.5078927871497232, "learning_rate": 9.992604393509687e-06, "loss": 0.4838, "step": 1527 }, { "epoch": 0.04683094274855952, "grad_norm": 0.5002519799644947, "learning_rate": 9.992577384304325e-06, "loss": 0.485, "step": 1528 }, { "epoch": 0.046861591271300726, "grad_norm": 1.9860384709802252, "learning_rate": 9.992550325905821e-06, "loss": 0.699, "step": 1529 }, { "epoch": 0.046892239794041925, "grad_norm": 1.5541575167053618, "learning_rate": 9.992523218314442e-06, "loss": 0.7598, "step": 1530 }, { "epoch": 0.04692288831678313, "grad_norm": 0.5142732037158645, "learning_rate": 9.992496061530454e-06, "loss": 0.5374, "step": 1531 }, { "epoch": 0.046953536839524336, "grad_norm": 1.6168603990205554, "learning_rate": 9.992468855554125e-06, "loss": 0.8455, "step": 1532 }, { "epoch": 0.04698418536226554, "grad_norm": 1.719235349224108, "learning_rate": 9.992441600385721e-06, "loss": 0.7933, "step": 1533 }, { "epoch": 0.04701483388500674, "grad_norm": 0.5001486819269273, "learning_rate": 9.992414296025514e-06, "loss": 0.5162, "step": 1534 }, { "epoch": 0.047045482407747946, "grad_norm": 1.6971618082612738, "learning_rate": 9.992386942473773e-06, "loss": 0.7616, "step": 1535 }, { "epoch": 0.04707613093048915, "grad_norm": 1.4913641588679143, "learning_rate": 9.992359539730763e-06, "loss": 0.8303, "step": 1536 }, { "epoch": 0.04710677945323036, "grad_norm": 1.5032191091744636, "learning_rate": 9.99233208779676e-06, "loss": 0.7437, "step": 1537 }, { "epoch": 0.047137427975971556, "grad_norm": 0.5155770874397317, "learning_rate": 9.99230458667203e-06, "loss": 0.5071, "step": 1538 }, { "epoch": 0.04716807649871276, "grad_norm": 1.7456182395770141, "learning_rate": 9.992277036356846e-06, "loss": 0.8151, "step": 1539 }, { "epoch": 0.04719872502145397, "grad_norm": 1.4678503809421795, "learning_rate": 9.99224943685148e-06, "loss": 0.7621, "step": 1540 }, { "epoch": 0.04722937354419517, "grad_norm": 1.7312804840790916, "learning_rate": 9.992221788156202e-06, "loss": 0.6965, "step": 1541 }, { "epoch": 0.04726002206693637, "grad_norm": 1.7144881486886203, "learning_rate": 9.992194090271285e-06, "loss": 0.7277, "step": 1542 }, { "epoch": 0.04729067058967758, "grad_norm": 1.658186871494617, "learning_rate": 9.992166343197002e-06, "loss": 0.7375, "step": 1543 }, { "epoch": 0.04732131911241878, "grad_norm": 1.525546475719328, "learning_rate": 9.992138546933629e-06, "loss": 0.7205, "step": 1544 }, { "epoch": 0.04735196763515998, "grad_norm": 1.6475771969439732, "learning_rate": 9.992110701481436e-06, "loss": 0.7717, "step": 1545 }, { "epoch": 0.04738261615790119, "grad_norm": 1.6324579903438068, "learning_rate": 9.9920828068407e-06, "loss": 0.8304, "step": 1546 }, { "epoch": 0.04741326468064239, "grad_norm": 1.8452747086127574, "learning_rate": 9.992054863011693e-06, "loss": 0.6849, "step": 1547 }, { "epoch": 0.0474439132033836, "grad_norm": 1.499732800862167, "learning_rate": 9.992026869994694e-06, "loss": 0.7488, "step": 1548 }, { "epoch": 0.0474745617261248, "grad_norm": 1.674917066800949, "learning_rate": 9.991998827789975e-06, "loss": 0.7641, "step": 1549 }, { "epoch": 0.047505210248866, "grad_norm": 1.4678306889410004, "learning_rate": 9.991970736397817e-06, "loss": 0.6796, "step": 1550 }, { "epoch": 0.04753585877160721, "grad_norm": 1.5444387638646244, "learning_rate": 9.991942595818491e-06, "loss": 0.6802, "step": 1551 }, { "epoch": 0.047566507294348415, "grad_norm": 1.5628599239535927, "learning_rate": 9.991914406052279e-06, "loss": 0.8277, "step": 1552 }, { "epoch": 0.04759715581708961, "grad_norm": 0.6146431088636145, "learning_rate": 9.991886167099456e-06, "loss": 0.4878, "step": 1553 }, { "epoch": 0.04762780433983082, "grad_norm": 1.5633624777552995, "learning_rate": 9.991857878960303e-06, "loss": 0.7132, "step": 1554 }, { "epoch": 0.047658452862572025, "grad_norm": 0.5931213403637277, "learning_rate": 9.991829541635097e-06, "loss": 0.5238, "step": 1555 }, { "epoch": 0.04768910138531323, "grad_norm": 0.5163688644394027, "learning_rate": 9.991801155124116e-06, "loss": 0.4878, "step": 1556 }, { "epoch": 0.04771974990805443, "grad_norm": 0.5138878652092083, "learning_rate": 9.991772719427642e-06, "loss": 0.5192, "step": 1557 }, { "epoch": 0.047750398430795635, "grad_norm": 1.7782711503602104, "learning_rate": 9.991744234545952e-06, "loss": 0.8217, "step": 1558 }, { "epoch": 0.04778104695353684, "grad_norm": 1.7287454835751117, "learning_rate": 9.991715700479333e-06, "loss": 0.8161, "step": 1559 }, { "epoch": 0.047811695476278046, "grad_norm": 1.5069159636855332, "learning_rate": 9.99168711722806e-06, "loss": 0.8219, "step": 1560 }, { "epoch": 0.047842343999019245, "grad_norm": 1.6686943626880935, "learning_rate": 9.991658484792416e-06, "loss": 0.7655, "step": 1561 }, { "epoch": 0.04787299252176045, "grad_norm": 1.5682617321240702, "learning_rate": 9.991629803172684e-06, "loss": 0.7852, "step": 1562 }, { "epoch": 0.047903641044501656, "grad_norm": 1.573943464961091, "learning_rate": 9.991601072369145e-06, "loss": 0.7041, "step": 1563 }, { "epoch": 0.04793428956724286, "grad_norm": 0.6193517423092403, "learning_rate": 9.991572292382086e-06, "loss": 0.4916, "step": 1564 }, { "epoch": 0.04796493808998406, "grad_norm": 1.6261900850044906, "learning_rate": 9.991543463211788e-06, "loss": 0.7474, "step": 1565 }, { "epoch": 0.047995586612725266, "grad_norm": 1.5392908424018066, "learning_rate": 9.991514584858534e-06, "loss": 0.718, "step": 1566 }, { "epoch": 0.04802623513546647, "grad_norm": 1.6849444014257267, "learning_rate": 9.991485657322609e-06, "loss": 0.7167, "step": 1567 }, { "epoch": 0.04805688365820768, "grad_norm": 0.5919075862831269, "learning_rate": 9.9914566806043e-06, "loss": 0.5156, "step": 1568 }, { "epoch": 0.048087532180948876, "grad_norm": 1.795267631834151, "learning_rate": 9.99142765470389e-06, "loss": 0.7863, "step": 1569 }, { "epoch": 0.04811818070369008, "grad_norm": 1.4668987315014343, "learning_rate": 9.991398579621668e-06, "loss": 0.6627, "step": 1570 }, { "epoch": 0.04814882922643129, "grad_norm": 0.539769993979188, "learning_rate": 9.991369455357918e-06, "loss": 0.4804, "step": 1571 }, { "epoch": 0.04817947774917249, "grad_norm": 0.54654599495026, "learning_rate": 9.991340281912927e-06, "loss": 0.5072, "step": 1572 }, { "epoch": 0.04821012627191369, "grad_norm": 1.8868509135923484, "learning_rate": 9.991311059286984e-06, "loss": 0.7203, "step": 1573 }, { "epoch": 0.0482407747946549, "grad_norm": 1.5679743745919692, "learning_rate": 9.991281787480377e-06, "loss": 0.7794, "step": 1574 }, { "epoch": 0.0482714233173961, "grad_norm": 0.5146529253000058, "learning_rate": 9.991252466493391e-06, "loss": 0.4853, "step": 1575 }, { "epoch": 0.0483020718401373, "grad_norm": 1.777877245076419, "learning_rate": 9.991223096326316e-06, "loss": 0.8224, "step": 1576 }, { "epoch": 0.04833272036287851, "grad_norm": 1.7707186151323324, "learning_rate": 9.991193676979448e-06, "loss": 0.7066, "step": 1577 }, { "epoch": 0.04836336888561971, "grad_norm": 1.7210754514233986, "learning_rate": 9.991164208453069e-06, "loss": 0.8158, "step": 1578 }, { "epoch": 0.04839401740836092, "grad_norm": 1.6516006711252949, "learning_rate": 9.99113469074747e-06, "loss": 0.8278, "step": 1579 }, { "epoch": 0.04842466593110212, "grad_norm": 1.7390763685257185, "learning_rate": 9.991105123862944e-06, "loss": 0.716, "step": 1580 }, { "epoch": 0.04845531445384332, "grad_norm": 1.623932786100328, "learning_rate": 9.991075507799786e-06, "loss": 0.7144, "step": 1581 }, { "epoch": 0.04848596297658453, "grad_norm": 1.7908108679236838, "learning_rate": 9.991045842558282e-06, "loss": 0.8755, "step": 1582 }, { "epoch": 0.048516611499325735, "grad_norm": 1.848232253248307, "learning_rate": 9.991016128138726e-06, "loss": 0.8622, "step": 1583 }, { "epoch": 0.04854726002206693, "grad_norm": 1.9043035075634227, "learning_rate": 9.990986364541411e-06, "loss": 0.8561, "step": 1584 }, { "epoch": 0.04857790854480814, "grad_norm": 1.5574432904640494, "learning_rate": 9.990956551766631e-06, "loss": 0.7579, "step": 1585 }, { "epoch": 0.048608557067549345, "grad_norm": 1.721860961930786, "learning_rate": 9.990926689814678e-06, "loss": 0.7613, "step": 1586 }, { "epoch": 0.04863920559029055, "grad_norm": 1.5354795820278755, "learning_rate": 9.990896778685847e-06, "loss": 0.7734, "step": 1587 }, { "epoch": 0.04866985411303175, "grad_norm": 1.7755511639664816, "learning_rate": 9.990866818380436e-06, "loss": 0.7807, "step": 1588 }, { "epoch": 0.048700502635772955, "grad_norm": 1.8031503502368282, "learning_rate": 9.990836808898736e-06, "loss": 0.8361, "step": 1589 }, { "epoch": 0.04873115115851416, "grad_norm": 1.4711893307271005, "learning_rate": 9.990806750241043e-06, "loss": 0.6812, "step": 1590 }, { "epoch": 0.048761799681255366, "grad_norm": 1.6888638962916078, "learning_rate": 9.990776642407653e-06, "loss": 0.7453, "step": 1591 }, { "epoch": 0.048792448203996565, "grad_norm": 1.7680237857068217, "learning_rate": 9.990746485398866e-06, "loss": 0.7532, "step": 1592 }, { "epoch": 0.04882309672673777, "grad_norm": 1.7504865907611094, "learning_rate": 9.990716279214976e-06, "loss": 0.8006, "step": 1593 }, { "epoch": 0.048853745249478976, "grad_norm": 1.6312594647799852, "learning_rate": 9.990686023856282e-06, "loss": 0.8423, "step": 1594 }, { "epoch": 0.04888439377222018, "grad_norm": 1.9723492739643445, "learning_rate": 9.990655719323082e-06, "loss": 0.8597, "step": 1595 }, { "epoch": 0.04891504229496138, "grad_norm": 1.5010535659686153, "learning_rate": 9.990625365615674e-06, "loss": 0.7287, "step": 1596 }, { "epoch": 0.048945690817702586, "grad_norm": 1.5377301534505243, "learning_rate": 9.990594962734357e-06, "loss": 0.6299, "step": 1597 }, { "epoch": 0.04897633934044379, "grad_norm": 1.5722398245704414, "learning_rate": 9.99056451067943e-06, "loss": 0.8546, "step": 1598 }, { "epoch": 0.049006987863185, "grad_norm": 2.417702289019093, "learning_rate": 9.990534009451195e-06, "loss": 0.8906, "step": 1599 }, { "epoch": 0.049037636385926196, "grad_norm": 1.6128566938685256, "learning_rate": 9.990503459049951e-06, "loss": 0.7862, "step": 1600 }, { "epoch": 0.0490682849086674, "grad_norm": 1.6368167419071447, "learning_rate": 9.990472859476002e-06, "loss": 0.708, "step": 1601 }, { "epoch": 0.04909893343140861, "grad_norm": 1.7783897766860688, "learning_rate": 9.990442210729646e-06, "loss": 0.7081, "step": 1602 }, { "epoch": 0.04912958195414981, "grad_norm": 3.418235404016945, "learning_rate": 9.990411512811185e-06, "loss": 0.8347, "step": 1603 }, { "epoch": 0.04916023047689101, "grad_norm": 1.4447037081514358, "learning_rate": 9.990380765720922e-06, "loss": 0.7039, "step": 1604 }, { "epoch": 0.04919087899963222, "grad_norm": 1.707204956440589, "learning_rate": 9.990349969459162e-06, "loss": 0.8181, "step": 1605 }, { "epoch": 0.04922152752237342, "grad_norm": 0.8651001543715358, "learning_rate": 9.990319124026205e-06, "loss": 0.4971, "step": 1606 }, { "epoch": 0.04925217604511462, "grad_norm": 1.7143304923686395, "learning_rate": 9.990288229422357e-06, "loss": 0.7813, "step": 1607 }, { "epoch": 0.04928282456785583, "grad_norm": 1.7063778017024993, "learning_rate": 9.990257285647923e-06, "loss": 0.8294, "step": 1608 }, { "epoch": 0.04931347309059703, "grad_norm": 1.6151160918113512, "learning_rate": 9.990226292703208e-06, "loss": 0.7767, "step": 1609 }, { "epoch": 0.04934412161333824, "grad_norm": 1.6605008816779898, "learning_rate": 9.990195250588515e-06, "loss": 0.8725, "step": 1610 }, { "epoch": 0.04937477013607944, "grad_norm": 0.5773611397993186, "learning_rate": 9.990164159304152e-06, "loss": 0.5042, "step": 1611 }, { "epoch": 0.04940541865882064, "grad_norm": 1.5586513240980657, "learning_rate": 9.990133018850427e-06, "loss": 0.7863, "step": 1612 }, { "epoch": 0.04943606718156185, "grad_norm": 1.4880679961388978, "learning_rate": 9.990101829227643e-06, "loss": 0.772, "step": 1613 }, { "epoch": 0.049466715704303055, "grad_norm": 1.8625754979514333, "learning_rate": 9.990070590436107e-06, "loss": 0.8632, "step": 1614 }, { "epoch": 0.049497364227044253, "grad_norm": 1.95694800994393, "learning_rate": 9.99003930247613e-06, "loss": 0.6918, "step": 1615 }, { "epoch": 0.04952801274978546, "grad_norm": 1.8836212204380256, "learning_rate": 9.99000796534802e-06, "loss": 0.717, "step": 1616 }, { "epoch": 0.049558661272526665, "grad_norm": 1.7556079997745913, "learning_rate": 9.989976579052082e-06, "loss": 0.8176, "step": 1617 }, { "epoch": 0.04958930979526787, "grad_norm": 1.6109398044638994, "learning_rate": 9.989945143588633e-06, "loss": 0.7508, "step": 1618 }, { "epoch": 0.04961995831800907, "grad_norm": 1.9023360936354197, "learning_rate": 9.989913658957973e-06, "loss": 0.7418, "step": 1619 }, { "epoch": 0.049650606840750275, "grad_norm": 1.528828361899304, "learning_rate": 9.989882125160419e-06, "loss": 0.7597, "step": 1620 }, { "epoch": 0.04968125536349148, "grad_norm": 1.597994195502266, "learning_rate": 9.98985054219628e-06, "loss": 0.7656, "step": 1621 }, { "epoch": 0.049711903886232686, "grad_norm": 1.6439468720298152, "learning_rate": 9.989818910065868e-06, "loss": 0.8683, "step": 1622 }, { "epoch": 0.049742552408973885, "grad_norm": 1.7594999939140974, "learning_rate": 9.989787228769489e-06, "loss": 0.6897, "step": 1623 }, { "epoch": 0.04977320093171509, "grad_norm": 1.8158373241234054, "learning_rate": 9.989755498307464e-06, "loss": 0.8581, "step": 1624 }, { "epoch": 0.049803849454456296, "grad_norm": 1.69935464075391, "learning_rate": 9.9897237186801e-06, "loss": 0.7892, "step": 1625 }, { "epoch": 0.0498344979771975, "grad_norm": 1.707322373272423, "learning_rate": 9.98969188988771e-06, "loss": 0.7764, "step": 1626 }, { "epoch": 0.0498651464999387, "grad_norm": 1.8397022284341973, "learning_rate": 9.989660011930612e-06, "loss": 0.8081, "step": 1627 }, { "epoch": 0.049895795022679906, "grad_norm": 1.4693961051619906, "learning_rate": 9.989628084809116e-06, "loss": 0.8115, "step": 1628 }, { "epoch": 0.04992644354542111, "grad_norm": 1.8589150603257953, "learning_rate": 9.989596108523537e-06, "loss": 0.7904, "step": 1629 }, { "epoch": 0.04995709206816232, "grad_norm": 1.7613869249138536, "learning_rate": 9.989564083074193e-06, "loss": 0.8256, "step": 1630 }, { "epoch": 0.049987740590903516, "grad_norm": 1.5998300873424727, "learning_rate": 9.989532008461396e-06, "loss": 0.8097, "step": 1631 }, { "epoch": 0.05001838911364472, "grad_norm": 0.8846944586167308, "learning_rate": 9.989499884685465e-06, "loss": 0.4995, "step": 1632 }, { "epoch": 0.05004903763638593, "grad_norm": 1.481155428532669, "learning_rate": 9.989467711746711e-06, "loss": 0.807, "step": 1633 }, { "epoch": 0.05007968615912713, "grad_norm": 1.719432099442601, "learning_rate": 9.98943548964546e-06, "loss": 0.8177, "step": 1634 }, { "epoch": 0.05011033468186833, "grad_norm": 0.6155577918937082, "learning_rate": 9.98940321838202e-06, "loss": 0.4996, "step": 1635 }, { "epoch": 0.05014098320460954, "grad_norm": 1.7208910890157505, "learning_rate": 9.989370897956718e-06, "loss": 0.7898, "step": 1636 }, { "epoch": 0.05017163172735074, "grad_norm": 1.5351054296520181, "learning_rate": 9.989338528369866e-06, "loss": 0.7914, "step": 1637 }, { "epoch": 0.05020228025009194, "grad_norm": 1.6363975450112125, "learning_rate": 9.989306109621783e-06, "loss": 0.7732, "step": 1638 }, { "epoch": 0.05023292877283315, "grad_norm": 1.6746875920595032, "learning_rate": 9.989273641712792e-06, "loss": 0.7434, "step": 1639 }, { "epoch": 0.05026357729557435, "grad_norm": 0.7795043767310161, "learning_rate": 9.989241124643212e-06, "loss": 0.5014, "step": 1640 }, { "epoch": 0.05029422581831556, "grad_norm": 0.7395490356802075, "learning_rate": 9.989208558413361e-06, "loss": 0.5128, "step": 1641 }, { "epoch": 0.05032487434105676, "grad_norm": 1.7282294579274042, "learning_rate": 9.989175943023563e-06, "loss": 0.809, "step": 1642 }, { "epoch": 0.050355522863797963, "grad_norm": 1.8865184021346655, "learning_rate": 9.989143278474136e-06, "loss": 0.7627, "step": 1643 }, { "epoch": 0.05038617138653917, "grad_norm": 1.6668384853272096, "learning_rate": 9.989110564765406e-06, "loss": 0.7924, "step": 1644 }, { "epoch": 0.050416819909280375, "grad_norm": 1.4001713635888018, "learning_rate": 9.989077801897691e-06, "loss": 0.7667, "step": 1645 }, { "epoch": 0.050447468432021574, "grad_norm": 1.679677070470053, "learning_rate": 9.989044989871317e-06, "loss": 0.7733, "step": 1646 }, { "epoch": 0.05047811695476278, "grad_norm": 1.818296334123281, "learning_rate": 9.989012128686609e-06, "loss": 0.9105, "step": 1647 }, { "epoch": 0.050508765477503985, "grad_norm": 1.473961897199017, "learning_rate": 9.988979218343884e-06, "loss": 0.7314, "step": 1648 }, { "epoch": 0.05053941400024519, "grad_norm": 1.7770271453068398, "learning_rate": 9.988946258843471e-06, "loss": 0.7941, "step": 1649 }, { "epoch": 0.05057006252298639, "grad_norm": 1.6199654655452735, "learning_rate": 9.988913250185695e-06, "loss": 0.8312, "step": 1650 }, { "epoch": 0.050600711045727595, "grad_norm": 1.546197984176314, "learning_rate": 9.98888019237088e-06, "loss": 0.6923, "step": 1651 }, { "epoch": 0.0506313595684688, "grad_norm": 1.5825999890668456, "learning_rate": 9.988847085399351e-06, "loss": 0.8196, "step": 1652 }, { "epoch": 0.050662008091210006, "grad_norm": 1.5907058619922483, "learning_rate": 9.988813929271438e-06, "loss": 0.7893, "step": 1653 }, { "epoch": 0.050692656613951205, "grad_norm": 1.7337880042305942, "learning_rate": 9.988780723987461e-06, "loss": 0.7543, "step": 1654 }, { "epoch": 0.05072330513669241, "grad_norm": 0.7325478488862522, "learning_rate": 9.988747469547752e-06, "loss": 0.4806, "step": 1655 }, { "epoch": 0.050753953659433616, "grad_norm": 1.6505482357688088, "learning_rate": 9.98871416595264e-06, "loss": 0.754, "step": 1656 }, { "epoch": 0.05078460218217482, "grad_norm": 1.6644141228536355, "learning_rate": 9.98868081320245e-06, "loss": 0.7262, "step": 1657 }, { "epoch": 0.05081525070491602, "grad_norm": 1.5701401046853347, "learning_rate": 9.988647411297512e-06, "loss": 0.845, "step": 1658 }, { "epoch": 0.050845899227657226, "grad_norm": 1.7507712047072503, "learning_rate": 9.988613960238152e-06, "loss": 0.774, "step": 1659 }, { "epoch": 0.05087654775039843, "grad_norm": 1.5129769032027482, "learning_rate": 9.988580460024705e-06, "loss": 0.7324, "step": 1660 }, { "epoch": 0.05090719627313964, "grad_norm": 1.645769753661642, "learning_rate": 9.988546910657497e-06, "loss": 0.7913, "step": 1661 }, { "epoch": 0.050937844795880836, "grad_norm": 1.6934763401334425, "learning_rate": 9.98851331213686e-06, "loss": 0.7609, "step": 1662 }, { "epoch": 0.05096849331862204, "grad_norm": 1.4622227758456825, "learning_rate": 9.988479664463127e-06, "loss": 0.7009, "step": 1663 }, { "epoch": 0.05099914184136325, "grad_norm": 1.5120964868737963, "learning_rate": 9.988445967636624e-06, "loss": 0.6576, "step": 1664 }, { "epoch": 0.05102979036410445, "grad_norm": 0.5696184737374042, "learning_rate": 9.988412221657689e-06, "loss": 0.5005, "step": 1665 }, { "epoch": 0.05106043888684565, "grad_norm": 1.7849402542100177, "learning_rate": 9.988378426526653e-06, "loss": 0.7991, "step": 1666 }, { "epoch": 0.05109108740958686, "grad_norm": 1.6589838859971273, "learning_rate": 9.988344582243845e-06, "loss": 0.6595, "step": 1667 }, { "epoch": 0.05112173593232806, "grad_norm": 1.414204915860292, "learning_rate": 9.988310688809603e-06, "loss": 0.7291, "step": 1668 }, { "epoch": 0.05115238445506927, "grad_norm": 1.7837900677092082, "learning_rate": 9.98827674622426e-06, "loss": 0.7088, "step": 1669 }, { "epoch": 0.05118303297781047, "grad_norm": 1.489710935468647, "learning_rate": 9.988242754488149e-06, "loss": 0.7524, "step": 1670 }, { "epoch": 0.051213681500551674, "grad_norm": 1.539627932286094, "learning_rate": 9.988208713601606e-06, "loss": 0.6554, "step": 1671 }, { "epoch": 0.05124433002329288, "grad_norm": 0.5078931028611001, "learning_rate": 9.988174623564967e-06, "loss": 0.4992, "step": 1672 }, { "epoch": 0.05127497854603408, "grad_norm": 1.7874613259758774, "learning_rate": 9.988140484378567e-06, "loss": 0.6961, "step": 1673 }, { "epoch": 0.051305627068775284, "grad_norm": 1.5537691008656407, "learning_rate": 9.988106296042741e-06, "loss": 0.6866, "step": 1674 }, { "epoch": 0.05133627559151649, "grad_norm": 1.5186244274255154, "learning_rate": 9.988072058557829e-06, "loss": 0.7413, "step": 1675 }, { "epoch": 0.051366924114257695, "grad_norm": 1.5851053463159994, "learning_rate": 9.988037771924167e-06, "loss": 0.7581, "step": 1676 }, { "epoch": 0.051397572636998894, "grad_norm": 1.7862836503076833, "learning_rate": 9.98800343614209e-06, "loss": 0.8344, "step": 1677 }, { "epoch": 0.0514282211597401, "grad_norm": 0.5255093019117535, "learning_rate": 9.987969051211942e-06, "loss": 0.4931, "step": 1678 }, { "epoch": 0.051458869682481305, "grad_norm": 1.7072293233200027, "learning_rate": 9.98793461713406e-06, "loss": 0.7005, "step": 1679 }, { "epoch": 0.05148951820522251, "grad_norm": 1.5868278099287232, "learning_rate": 9.987900133908777e-06, "loss": 0.7833, "step": 1680 }, { "epoch": 0.05152016672796371, "grad_norm": 1.628693187854298, "learning_rate": 9.98786560153644e-06, "loss": 0.7581, "step": 1681 }, { "epoch": 0.051550815250704915, "grad_norm": 1.6295727149551127, "learning_rate": 9.987831020017389e-06, "loss": 0.7966, "step": 1682 }, { "epoch": 0.05158146377344612, "grad_norm": 1.7534810570156372, "learning_rate": 9.98779638935196e-06, "loss": 0.7888, "step": 1683 }, { "epoch": 0.051612112296187326, "grad_norm": 1.4826871116569877, "learning_rate": 9.987761709540497e-06, "loss": 0.8282, "step": 1684 }, { "epoch": 0.051642760818928525, "grad_norm": 1.5546735621518886, "learning_rate": 9.987726980583343e-06, "loss": 0.7671, "step": 1685 }, { "epoch": 0.05167340934166973, "grad_norm": 1.3967686844139733, "learning_rate": 9.98769220248084e-06, "loss": 0.7507, "step": 1686 }, { "epoch": 0.051704057864410936, "grad_norm": 1.5072842392271986, "learning_rate": 9.987657375233329e-06, "loss": 0.7926, "step": 1687 }, { "epoch": 0.05173470638715214, "grad_norm": 1.8607218707997324, "learning_rate": 9.987622498841151e-06, "loss": 0.7603, "step": 1688 }, { "epoch": 0.05176535490989334, "grad_norm": 1.4503982966944828, "learning_rate": 9.987587573304655e-06, "loss": 0.7465, "step": 1689 }, { "epoch": 0.051796003432634546, "grad_norm": 1.514164265671312, "learning_rate": 9.987552598624182e-06, "loss": 0.8408, "step": 1690 }, { "epoch": 0.05182665195537575, "grad_norm": 1.5528845056527525, "learning_rate": 9.987517574800077e-06, "loss": 0.7374, "step": 1691 }, { "epoch": 0.05185730047811696, "grad_norm": 1.4127888026193895, "learning_rate": 9.987482501832686e-06, "loss": 0.7694, "step": 1692 }, { "epoch": 0.051887949000858156, "grad_norm": 1.5589603008037973, "learning_rate": 9.987447379722353e-06, "loss": 0.7497, "step": 1693 }, { "epoch": 0.05191859752359936, "grad_norm": 1.4907633715752602, "learning_rate": 9.987412208469424e-06, "loss": 0.7592, "step": 1694 }, { "epoch": 0.05194924604634057, "grad_norm": 1.6692570604840138, "learning_rate": 9.987376988074248e-06, "loss": 0.7146, "step": 1695 }, { "epoch": 0.051979894569081773, "grad_norm": 1.4232447509278332, "learning_rate": 9.987341718537169e-06, "loss": 0.715, "step": 1696 }, { "epoch": 0.05201054309182297, "grad_norm": 1.6127789085030457, "learning_rate": 9.987306399858537e-06, "loss": 0.7385, "step": 1697 }, { "epoch": 0.05204119161456418, "grad_norm": 1.6639059886458694, "learning_rate": 9.987271032038698e-06, "loss": 0.8268, "step": 1698 }, { "epoch": 0.052071840137305384, "grad_norm": 1.4994476775916026, "learning_rate": 9.987235615078003e-06, "loss": 0.7324, "step": 1699 }, { "epoch": 0.05210248866004659, "grad_norm": 1.707277554923166, "learning_rate": 9.987200148976798e-06, "loss": 0.7151, "step": 1700 }, { "epoch": 0.05213313718278779, "grad_norm": 1.7155405675817743, "learning_rate": 9.987164633735436e-06, "loss": 0.734, "step": 1701 }, { "epoch": 0.052163785705528994, "grad_norm": 1.7030566683028285, "learning_rate": 9.987129069354264e-06, "loss": 0.689, "step": 1702 }, { "epoch": 0.0521944342282702, "grad_norm": 0.5414862015209935, "learning_rate": 9.987093455833632e-06, "loss": 0.4873, "step": 1703 }, { "epoch": 0.0522250827510114, "grad_norm": 0.5551226875567424, "learning_rate": 9.987057793173893e-06, "loss": 0.4917, "step": 1704 }, { "epoch": 0.052255731273752604, "grad_norm": 1.7211846409368332, "learning_rate": 9.987022081375397e-06, "loss": 0.8307, "step": 1705 }, { "epoch": 0.05228637979649381, "grad_norm": 1.5847872932651563, "learning_rate": 9.986986320438496e-06, "loss": 0.7507, "step": 1706 }, { "epoch": 0.052317028319235015, "grad_norm": 1.6106229339760039, "learning_rate": 9.986950510363544e-06, "loss": 0.7041, "step": 1707 }, { "epoch": 0.052347676841976214, "grad_norm": 1.5691454999378969, "learning_rate": 9.986914651150894e-06, "loss": 0.8127, "step": 1708 }, { "epoch": 0.05237832536471742, "grad_norm": 1.63203465745586, "learning_rate": 9.986878742800895e-06, "loss": 0.6978, "step": 1709 }, { "epoch": 0.052408973887458625, "grad_norm": 0.6147856204144465, "learning_rate": 9.986842785313906e-06, "loss": 0.4914, "step": 1710 }, { "epoch": 0.05243962241019983, "grad_norm": 0.5768051406367004, "learning_rate": 9.986806778690277e-06, "loss": 0.4857, "step": 1711 }, { "epoch": 0.05247027093294103, "grad_norm": 1.7351111339171468, "learning_rate": 9.986770722930368e-06, "loss": 0.744, "step": 1712 }, { "epoch": 0.052500919455682235, "grad_norm": 1.7817053318343157, "learning_rate": 9.986734618034529e-06, "loss": 0.7167, "step": 1713 }, { "epoch": 0.05253156797842344, "grad_norm": 1.5204968867110444, "learning_rate": 9.986698464003117e-06, "loss": 0.7552, "step": 1714 }, { "epoch": 0.052562216501164646, "grad_norm": 1.573135142755208, "learning_rate": 9.986662260836491e-06, "loss": 0.7026, "step": 1715 }, { "epoch": 0.052592865023905845, "grad_norm": 0.6389044563887541, "learning_rate": 9.986626008535005e-06, "loss": 0.5163, "step": 1716 }, { "epoch": 0.05262351354664705, "grad_norm": 1.673064516995094, "learning_rate": 9.986589707099017e-06, "loss": 0.7671, "step": 1717 }, { "epoch": 0.052654162069388256, "grad_norm": 1.742167941197522, "learning_rate": 9.986553356528885e-06, "loss": 0.7284, "step": 1718 }, { "epoch": 0.05268481059212946, "grad_norm": 1.4376863996023586, "learning_rate": 9.986516956824967e-06, "loss": 0.593, "step": 1719 }, { "epoch": 0.05271545911487066, "grad_norm": 0.6752131714982433, "learning_rate": 9.98648050798762e-06, "loss": 0.5281, "step": 1720 }, { "epoch": 0.052746107637611866, "grad_norm": 1.708606562387132, "learning_rate": 9.986444010017207e-06, "loss": 0.792, "step": 1721 }, { "epoch": 0.05277675616035307, "grad_norm": 0.5071362814021425, "learning_rate": 9.986407462914086e-06, "loss": 0.4804, "step": 1722 }, { "epoch": 0.05280740468309428, "grad_norm": 0.497799753944869, "learning_rate": 9.986370866678614e-06, "loss": 0.4871, "step": 1723 }, { "epoch": 0.05283805320583548, "grad_norm": 1.6020892130771707, "learning_rate": 9.986334221311155e-06, "loss": 0.8127, "step": 1724 }, { "epoch": 0.05286870172857668, "grad_norm": 1.4790164780694641, "learning_rate": 9.986297526812068e-06, "loss": 0.8433, "step": 1725 }, { "epoch": 0.05289935025131789, "grad_norm": 1.5683524941079217, "learning_rate": 9.986260783181718e-06, "loss": 0.809, "step": 1726 }, { "epoch": 0.052929998774059094, "grad_norm": 0.7266815711675202, "learning_rate": 9.986223990420462e-06, "loss": 0.4958, "step": 1727 }, { "epoch": 0.05296064729680029, "grad_norm": 0.5946922413404221, "learning_rate": 9.986187148528668e-06, "loss": 0.4495, "step": 1728 }, { "epoch": 0.0529912958195415, "grad_norm": 1.42011226299563, "learning_rate": 9.986150257506696e-06, "loss": 0.7068, "step": 1729 }, { "epoch": 0.053021944342282704, "grad_norm": 1.6036216022849994, "learning_rate": 9.986113317354907e-06, "loss": 0.8148, "step": 1730 }, { "epoch": 0.05305259286502391, "grad_norm": 0.5012577133795125, "learning_rate": 9.986076328073669e-06, "loss": 0.4886, "step": 1731 }, { "epoch": 0.05308324138776511, "grad_norm": 1.6069752983392729, "learning_rate": 9.986039289663346e-06, "loss": 0.7882, "step": 1732 }, { "epoch": 0.053113889910506314, "grad_norm": 0.5008944731716587, "learning_rate": 9.986002202124302e-06, "loss": 0.4604, "step": 1733 }, { "epoch": 0.05314453843324752, "grad_norm": 0.5374866057877995, "learning_rate": 9.985965065456902e-06, "loss": 0.4915, "step": 1734 }, { "epoch": 0.05317518695598872, "grad_norm": 1.5092416635569008, "learning_rate": 9.985927879661513e-06, "loss": 0.7629, "step": 1735 }, { "epoch": 0.053205835478729924, "grad_norm": 1.6740481922431938, "learning_rate": 9.9858906447385e-06, "loss": 0.7814, "step": 1736 }, { "epoch": 0.05323648400147113, "grad_norm": 0.5054576920667494, "learning_rate": 9.985853360688232e-06, "loss": 0.4926, "step": 1737 }, { "epoch": 0.053267132524212335, "grad_norm": 1.5032931877197917, "learning_rate": 9.985816027511075e-06, "loss": 0.7589, "step": 1738 }, { "epoch": 0.053297781046953534, "grad_norm": 1.4245892864708607, "learning_rate": 9.985778645207397e-06, "loss": 0.7579, "step": 1739 }, { "epoch": 0.05332842956969474, "grad_norm": 1.9220328382478045, "learning_rate": 9.985741213777566e-06, "loss": 0.8531, "step": 1740 }, { "epoch": 0.053359078092435945, "grad_norm": 1.3990485704236013, "learning_rate": 9.985703733221951e-06, "loss": 0.7047, "step": 1741 }, { "epoch": 0.05338972661517715, "grad_norm": 1.537175049755796, "learning_rate": 9.985666203540923e-06, "loss": 0.7482, "step": 1742 }, { "epoch": 0.05342037513791835, "grad_norm": 1.7427271156156015, "learning_rate": 9.985628624734847e-06, "loss": 0.7919, "step": 1743 }, { "epoch": 0.053451023660659555, "grad_norm": 1.5597587176401995, "learning_rate": 9.985590996804099e-06, "loss": 0.8113, "step": 1744 }, { "epoch": 0.05348167218340076, "grad_norm": 0.5901441440460438, "learning_rate": 9.985553319749047e-06, "loss": 0.4857, "step": 1745 }, { "epoch": 0.053512320706141966, "grad_norm": 0.5461964901770641, "learning_rate": 9.985515593570061e-06, "loss": 0.5042, "step": 1746 }, { "epoch": 0.053542969228883165, "grad_norm": 1.788981616562603, "learning_rate": 9.985477818267517e-06, "loss": 0.7379, "step": 1747 }, { "epoch": 0.05357361775162437, "grad_norm": 1.859883247004284, "learning_rate": 9.985439993841783e-06, "loss": 0.818, "step": 1748 }, { "epoch": 0.053604266274365577, "grad_norm": 1.5562396285778513, "learning_rate": 9.985402120293232e-06, "loss": 0.7425, "step": 1749 }, { "epoch": 0.05363491479710678, "grad_norm": 1.7813374493132137, "learning_rate": 9.98536419762224e-06, "loss": 0.8041, "step": 1750 }, { "epoch": 0.05366556331984798, "grad_norm": 0.5326814090435376, "learning_rate": 9.985326225829179e-06, "loss": 0.4599, "step": 1751 }, { "epoch": 0.05369621184258919, "grad_norm": 2.596190917478796, "learning_rate": 9.985288204914424e-06, "loss": 0.7706, "step": 1752 }, { "epoch": 0.05372686036533039, "grad_norm": 1.5380800214602421, "learning_rate": 9.985250134878346e-06, "loss": 0.7838, "step": 1753 }, { "epoch": 0.0537575088880716, "grad_norm": 1.682627309552837, "learning_rate": 9.985212015721325e-06, "loss": 0.7719, "step": 1754 }, { "epoch": 0.0537881574108128, "grad_norm": 1.5583789776113346, "learning_rate": 9.985173847443733e-06, "loss": 0.8361, "step": 1755 }, { "epoch": 0.053818805933554, "grad_norm": 1.3937246651065567, "learning_rate": 9.985135630045948e-06, "loss": 0.7527, "step": 1756 }, { "epoch": 0.05384945445629521, "grad_norm": 1.5431864186842237, "learning_rate": 9.985097363528347e-06, "loss": 0.6584, "step": 1757 }, { "epoch": 0.053880102979036414, "grad_norm": 1.4232510630563193, "learning_rate": 9.985059047891305e-06, "loss": 0.5998, "step": 1758 }, { "epoch": 0.05391075150177761, "grad_norm": 1.686470998131619, "learning_rate": 9.9850206831352e-06, "loss": 0.7766, "step": 1759 }, { "epoch": 0.05394140002451882, "grad_norm": 1.7449409067064985, "learning_rate": 9.984982269260414e-06, "loss": 0.7722, "step": 1760 }, { "epoch": 0.053972048547260024, "grad_norm": 1.6050408566645016, "learning_rate": 9.984943806267319e-06, "loss": 0.7635, "step": 1761 }, { "epoch": 0.05400269707000123, "grad_norm": 1.6183800493794804, "learning_rate": 9.9849052941563e-06, "loss": 0.7856, "step": 1762 }, { "epoch": 0.05403334559274243, "grad_norm": 1.4695368810492722, "learning_rate": 9.984866732927731e-06, "loss": 0.7245, "step": 1763 }, { "epoch": 0.054063994115483634, "grad_norm": 0.5928176824815738, "learning_rate": 9.984828122581998e-06, "loss": 0.4899, "step": 1764 }, { "epoch": 0.05409464263822484, "grad_norm": 1.954822006391554, "learning_rate": 9.984789463119475e-06, "loss": 0.7754, "step": 1765 }, { "epoch": 0.05412529116096604, "grad_norm": 1.5765915590662998, "learning_rate": 9.984750754540547e-06, "loss": 0.7932, "step": 1766 }, { "epoch": 0.054155939683707244, "grad_norm": 1.670755652856968, "learning_rate": 9.984711996845596e-06, "loss": 0.8589, "step": 1767 }, { "epoch": 0.05418658820644845, "grad_norm": 0.5028199881491163, "learning_rate": 9.984673190034998e-06, "loss": 0.5101, "step": 1768 }, { "epoch": 0.054217236729189655, "grad_norm": 1.753455300180669, "learning_rate": 9.984634334109143e-06, "loss": 0.7127, "step": 1769 }, { "epoch": 0.054247885251930854, "grad_norm": 1.7480798942839832, "learning_rate": 9.984595429068411e-06, "loss": 0.7021, "step": 1770 }, { "epoch": 0.05427853377467206, "grad_norm": 0.5008270853026678, "learning_rate": 9.984556474913183e-06, "loss": 0.4699, "step": 1771 }, { "epoch": 0.054309182297413265, "grad_norm": 1.637742227421136, "learning_rate": 9.984517471643846e-06, "loss": 0.712, "step": 1772 }, { "epoch": 0.05433983082015447, "grad_norm": 1.8883571998348303, "learning_rate": 9.984478419260782e-06, "loss": 0.7355, "step": 1773 }, { "epoch": 0.05437047934289567, "grad_norm": 1.7044962253223015, "learning_rate": 9.984439317764378e-06, "loss": 0.7747, "step": 1774 }, { "epoch": 0.054401127865636875, "grad_norm": 0.5083626778345076, "learning_rate": 9.984400167155017e-06, "loss": 0.493, "step": 1775 }, { "epoch": 0.05443177638837808, "grad_norm": 1.5537346911030612, "learning_rate": 9.984360967433086e-06, "loss": 0.715, "step": 1776 }, { "epoch": 0.054462424911119287, "grad_norm": 1.6432394381292879, "learning_rate": 9.984321718598972e-06, "loss": 0.7713, "step": 1777 }, { "epoch": 0.054493073433860485, "grad_norm": 1.553096030182615, "learning_rate": 9.98428242065306e-06, "loss": 0.7938, "step": 1778 }, { "epoch": 0.05452372195660169, "grad_norm": 1.5729293245670015, "learning_rate": 9.98424307359574e-06, "loss": 0.7324, "step": 1779 }, { "epoch": 0.0545543704793429, "grad_norm": 1.442220391051885, "learning_rate": 9.984203677427393e-06, "loss": 0.7203, "step": 1780 }, { "epoch": 0.0545850190020841, "grad_norm": 1.4813785860453803, "learning_rate": 9.984164232148415e-06, "loss": 0.6332, "step": 1781 }, { "epoch": 0.0546156675248253, "grad_norm": 1.776307371956226, "learning_rate": 9.984124737759192e-06, "loss": 0.6677, "step": 1782 }, { "epoch": 0.05464631604756651, "grad_norm": 1.64038741749527, "learning_rate": 9.984085194260112e-06, "loss": 0.7416, "step": 1783 }, { "epoch": 0.05467696457030771, "grad_norm": 1.4707519209537734, "learning_rate": 9.984045601651566e-06, "loss": 0.8194, "step": 1784 }, { "epoch": 0.05470761309304892, "grad_norm": 1.6236418062442692, "learning_rate": 9.984005959933942e-06, "loss": 0.7895, "step": 1785 }, { "epoch": 0.05473826161579012, "grad_norm": 1.5723633387053468, "learning_rate": 9.983966269107634e-06, "loss": 0.759, "step": 1786 }, { "epoch": 0.05476891013853132, "grad_norm": 0.5181727035630732, "learning_rate": 9.98392652917303e-06, "loss": 0.4963, "step": 1787 }, { "epoch": 0.05479955866127253, "grad_norm": 1.5320697095607987, "learning_rate": 9.983886740130521e-06, "loss": 0.7837, "step": 1788 }, { "epoch": 0.054830207184013734, "grad_norm": 1.6803188404135478, "learning_rate": 9.983846901980505e-06, "loss": 0.7174, "step": 1789 }, { "epoch": 0.05486085570675493, "grad_norm": 1.5310712263304385, "learning_rate": 9.983807014723367e-06, "loss": 0.7362, "step": 1790 }, { "epoch": 0.05489150422949614, "grad_norm": 1.5930090194290953, "learning_rate": 9.983767078359505e-06, "loss": 0.8203, "step": 1791 }, { "epoch": 0.054922152752237344, "grad_norm": 1.6152724913239342, "learning_rate": 9.983727092889309e-06, "loss": 0.756, "step": 1792 }, { "epoch": 0.05495280127497855, "grad_norm": 1.6363487820675005, "learning_rate": 9.983687058313177e-06, "loss": 0.6636, "step": 1793 }, { "epoch": 0.05498344979771975, "grad_norm": 1.6988364724268028, "learning_rate": 9.9836469746315e-06, "loss": 0.8051, "step": 1794 }, { "epoch": 0.055014098320460954, "grad_norm": 1.4384806373699182, "learning_rate": 9.983606841844672e-06, "loss": 0.7385, "step": 1795 }, { "epoch": 0.05504474684320216, "grad_norm": 1.6551127972900572, "learning_rate": 9.983566659953094e-06, "loss": 0.7188, "step": 1796 }, { "epoch": 0.05507539536594336, "grad_norm": 0.5200306686870884, "learning_rate": 9.983526428957157e-06, "loss": 0.4774, "step": 1797 }, { "epoch": 0.055106043888684564, "grad_norm": 1.5471342338267902, "learning_rate": 9.98348614885726e-06, "loss": 0.7677, "step": 1798 }, { "epoch": 0.05513669241142577, "grad_norm": 1.6360282703096833, "learning_rate": 9.983445819653798e-06, "loss": 0.8186, "step": 1799 }, { "epoch": 0.055167340934166975, "grad_norm": 1.8233930476045126, "learning_rate": 9.983405441347171e-06, "loss": 0.6789, "step": 1800 }, { "epoch": 0.055197989456908174, "grad_norm": 1.5030621983045191, "learning_rate": 9.983365013937774e-06, "loss": 0.7577, "step": 1801 }, { "epoch": 0.05522863797964938, "grad_norm": 1.5260026416842731, "learning_rate": 9.983324537426007e-06, "loss": 0.7667, "step": 1802 }, { "epoch": 0.055259286502390585, "grad_norm": 1.5057150500816916, "learning_rate": 9.983284011812267e-06, "loss": 0.6857, "step": 1803 }, { "epoch": 0.05528993502513179, "grad_norm": 1.6056386003960113, "learning_rate": 9.983243437096955e-06, "loss": 0.7239, "step": 1804 }, { "epoch": 0.05532058354787299, "grad_norm": 1.6052158554043365, "learning_rate": 9.983202813280472e-06, "loss": 0.7777, "step": 1805 }, { "epoch": 0.055351232070614195, "grad_norm": 1.6818682418651043, "learning_rate": 9.983162140363214e-06, "loss": 0.5889, "step": 1806 }, { "epoch": 0.0553818805933554, "grad_norm": 1.6082125253442625, "learning_rate": 9.983121418345587e-06, "loss": 0.6899, "step": 1807 }, { "epoch": 0.05541252911609661, "grad_norm": 0.5291878722310691, "learning_rate": 9.983080647227987e-06, "loss": 0.4724, "step": 1808 }, { "epoch": 0.055443177638837805, "grad_norm": 1.5342105325013762, "learning_rate": 9.98303982701082e-06, "loss": 0.8025, "step": 1809 }, { "epoch": 0.05547382616157901, "grad_norm": 2.0378929380817987, "learning_rate": 9.982998957694487e-06, "loss": 0.7265, "step": 1810 }, { "epoch": 0.05550447468432022, "grad_norm": 0.48896882037838596, "learning_rate": 9.98295803927939e-06, "loss": 0.5186, "step": 1811 }, { "epoch": 0.05553512320706142, "grad_norm": 1.5270848111329527, "learning_rate": 9.982917071765932e-06, "loss": 0.8533, "step": 1812 }, { "epoch": 0.05556577172980262, "grad_norm": 1.478307910966331, "learning_rate": 9.982876055154518e-06, "loss": 0.8184, "step": 1813 }, { "epoch": 0.05559642025254383, "grad_norm": 0.5079361309571758, "learning_rate": 9.98283498944555e-06, "loss": 0.4997, "step": 1814 }, { "epoch": 0.05562706877528503, "grad_norm": 2.182294166614875, "learning_rate": 9.982793874639436e-06, "loss": 0.7717, "step": 1815 }, { "epoch": 0.05565771729802624, "grad_norm": 1.772745667100513, "learning_rate": 9.982752710736577e-06, "loss": 0.8588, "step": 1816 }, { "epoch": 0.05568836582076744, "grad_norm": 1.402077905536804, "learning_rate": 9.982711497737382e-06, "loss": 0.7315, "step": 1817 }, { "epoch": 0.05571901434350864, "grad_norm": 0.46779764565080617, "learning_rate": 9.982670235642255e-06, "loss": 0.4601, "step": 1818 }, { "epoch": 0.05574966286624985, "grad_norm": 1.5391730270631192, "learning_rate": 9.982628924451603e-06, "loss": 0.7028, "step": 1819 }, { "epoch": 0.055780311388991054, "grad_norm": 1.645628018176252, "learning_rate": 9.982587564165835e-06, "loss": 0.7517, "step": 1820 }, { "epoch": 0.05581095991173225, "grad_norm": 0.49216546830911506, "learning_rate": 9.982546154785355e-06, "loss": 0.4999, "step": 1821 }, { "epoch": 0.05584160843447346, "grad_norm": 1.7981320728332442, "learning_rate": 9.982504696310574e-06, "loss": 0.7674, "step": 1822 }, { "epoch": 0.055872256957214664, "grad_norm": 1.5787220256853671, "learning_rate": 9.982463188741897e-06, "loss": 0.7478, "step": 1823 }, { "epoch": 0.05590290547995587, "grad_norm": 0.4938809725977553, "learning_rate": 9.982421632079738e-06, "loss": 0.4924, "step": 1824 }, { "epoch": 0.05593355400269707, "grad_norm": 1.6308399937505058, "learning_rate": 9.982380026324505e-06, "loss": 0.7552, "step": 1825 }, { "epoch": 0.055964202525438274, "grad_norm": 0.4994076251937569, "learning_rate": 9.982338371476604e-06, "loss": 0.5131, "step": 1826 }, { "epoch": 0.05599485104817948, "grad_norm": 1.4145605635894427, "learning_rate": 9.982296667536449e-06, "loss": 0.7523, "step": 1827 }, { "epoch": 0.05602549957092068, "grad_norm": 1.530722538991202, "learning_rate": 9.98225491450445e-06, "loss": 0.7192, "step": 1828 }, { "epoch": 0.056056148093661884, "grad_norm": 1.634634909523429, "learning_rate": 9.98221311238102e-06, "loss": 0.809, "step": 1829 }, { "epoch": 0.05608679661640309, "grad_norm": 1.5926846069393463, "learning_rate": 9.982171261166568e-06, "loss": 0.6968, "step": 1830 }, { "epoch": 0.056117445139144295, "grad_norm": 1.5968682386585638, "learning_rate": 9.982129360861507e-06, "loss": 0.81, "step": 1831 }, { "epoch": 0.056148093661885494, "grad_norm": 1.8631560689520088, "learning_rate": 9.982087411466253e-06, "loss": 0.775, "step": 1832 }, { "epoch": 0.0561787421846267, "grad_norm": 1.5829909313734092, "learning_rate": 9.982045412981217e-06, "loss": 0.8472, "step": 1833 }, { "epoch": 0.056209390707367905, "grad_norm": 1.5110513849191942, "learning_rate": 9.982003365406812e-06, "loss": 0.7088, "step": 1834 }, { "epoch": 0.05624003923010911, "grad_norm": 1.5491146706624688, "learning_rate": 9.981961268743453e-06, "loss": 0.7072, "step": 1835 }, { "epoch": 0.05627068775285031, "grad_norm": 1.4126322005727614, "learning_rate": 9.981919122991554e-06, "loss": 0.6672, "step": 1836 }, { "epoch": 0.056301336275591515, "grad_norm": 1.6047072587211784, "learning_rate": 9.981876928151532e-06, "loss": 0.8743, "step": 1837 }, { "epoch": 0.05633198479833272, "grad_norm": 1.6523917560997028, "learning_rate": 9.9818346842238e-06, "loss": 0.8433, "step": 1838 }, { "epoch": 0.05636263332107393, "grad_norm": 1.679916635912449, "learning_rate": 9.98179239120878e-06, "loss": 0.7508, "step": 1839 }, { "epoch": 0.056393281843815125, "grad_norm": 1.4748021699557985, "learning_rate": 9.981750049106882e-06, "loss": 0.7139, "step": 1840 }, { "epoch": 0.05642393036655633, "grad_norm": 1.640980551440036, "learning_rate": 9.981707657918529e-06, "loss": 0.7456, "step": 1841 }, { "epoch": 0.05645457888929754, "grad_norm": 1.6016076772152663, "learning_rate": 9.981665217644134e-06, "loss": 0.7313, "step": 1842 }, { "epoch": 0.05648522741203874, "grad_norm": 1.524728137702015, "learning_rate": 9.981622728284117e-06, "loss": 0.7134, "step": 1843 }, { "epoch": 0.05651587593477994, "grad_norm": 1.614745668809681, "learning_rate": 9.981580189838896e-06, "loss": 0.7678, "step": 1844 }, { "epoch": 0.05654652445752115, "grad_norm": 1.569907018902672, "learning_rate": 9.981537602308892e-06, "loss": 0.9047, "step": 1845 }, { "epoch": 0.05657717298026235, "grad_norm": 1.5823474727461697, "learning_rate": 9.981494965694522e-06, "loss": 0.73, "step": 1846 }, { "epoch": 0.05660782150300356, "grad_norm": 1.6091837355224619, "learning_rate": 9.981452279996208e-06, "loss": 0.8319, "step": 1847 }, { "epoch": 0.05663847002574476, "grad_norm": 1.5988165345941996, "learning_rate": 9.981409545214371e-06, "loss": 0.773, "step": 1848 }, { "epoch": 0.05666911854848596, "grad_norm": 1.713768850915778, "learning_rate": 9.981366761349431e-06, "loss": 0.8476, "step": 1849 }, { "epoch": 0.05669976707122717, "grad_norm": 1.6845708179130143, "learning_rate": 9.981323928401809e-06, "loss": 0.6519, "step": 1850 }, { "epoch": 0.056730415593968374, "grad_norm": 1.6576612034493081, "learning_rate": 9.981281046371928e-06, "loss": 0.7696, "step": 1851 }, { "epoch": 0.05676106411670957, "grad_norm": 1.6724628170714604, "learning_rate": 9.981238115260212e-06, "loss": 0.745, "step": 1852 }, { "epoch": 0.05679171263945078, "grad_norm": 1.6165089694105188, "learning_rate": 9.981195135067081e-06, "loss": 0.835, "step": 1853 }, { "epoch": 0.056822361162191984, "grad_norm": 1.6529525244535281, "learning_rate": 9.981152105792959e-06, "loss": 0.6999, "step": 1854 }, { "epoch": 0.05685300968493319, "grad_norm": 1.832314804682502, "learning_rate": 9.981109027438273e-06, "loss": 0.786, "step": 1855 }, { "epoch": 0.05688365820767439, "grad_norm": 1.5217277810475778, "learning_rate": 9.981065900003444e-06, "loss": 0.7334, "step": 1856 }, { "epoch": 0.056914306730415594, "grad_norm": 1.822436718039519, "learning_rate": 9.981022723488897e-06, "loss": 0.8176, "step": 1857 }, { "epoch": 0.0569449552531568, "grad_norm": 1.5087438008348288, "learning_rate": 9.980979497895061e-06, "loss": 0.7717, "step": 1858 }, { "epoch": 0.056975603775898, "grad_norm": 1.4384585222223674, "learning_rate": 9.980936223222358e-06, "loss": 0.6251, "step": 1859 }, { "epoch": 0.057006252298639204, "grad_norm": 1.5893567249284244, "learning_rate": 9.980892899471216e-06, "loss": 0.722, "step": 1860 }, { "epoch": 0.05703690082138041, "grad_norm": 1.550091728895623, "learning_rate": 9.980849526642063e-06, "loss": 0.734, "step": 1861 }, { "epoch": 0.057067549344121615, "grad_norm": 1.613917614144211, "learning_rate": 9.980806104735325e-06, "loss": 0.6685, "step": 1862 }, { "epoch": 0.057098197866862814, "grad_norm": 1.5385713493409514, "learning_rate": 9.980762633751429e-06, "loss": 0.702, "step": 1863 }, { "epoch": 0.05712884638960402, "grad_norm": 1.6061683435870215, "learning_rate": 9.980719113690805e-06, "loss": 0.7417, "step": 1864 }, { "epoch": 0.057159494912345225, "grad_norm": 0.5252915347936102, "learning_rate": 9.980675544553881e-06, "loss": 0.4833, "step": 1865 }, { "epoch": 0.05719014343508643, "grad_norm": 1.4495292115763776, "learning_rate": 9.980631926341086e-06, "loss": 0.7004, "step": 1866 }, { "epoch": 0.05722079195782763, "grad_norm": 1.5311584565074754, "learning_rate": 9.980588259052853e-06, "loss": 0.773, "step": 1867 }, { "epoch": 0.057251440480568835, "grad_norm": 1.709438608933789, "learning_rate": 9.980544542689606e-06, "loss": 0.7626, "step": 1868 }, { "epoch": 0.05728208900331004, "grad_norm": 1.4564927221654589, "learning_rate": 9.98050077725178e-06, "loss": 0.6438, "step": 1869 }, { "epoch": 0.05731273752605125, "grad_norm": 1.4347155023667129, "learning_rate": 9.980456962739808e-06, "loss": 0.7326, "step": 1870 }, { "epoch": 0.057343386048792445, "grad_norm": 1.647747852506988, "learning_rate": 9.980413099154116e-06, "loss": 0.872, "step": 1871 }, { "epoch": 0.05737403457153365, "grad_norm": 1.836014225240604, "learning_rate": 9.98036918649514e-06, "loss": 0.7457, "step": 1872 }, { "epoch": 0.05740468309427486, "grad_norm": 1.9657631601525845, "learning_rate": 9.980325224763315e-06, "loss": 0.8472, "step": 1873 }, { "epoch": 0.05743533161701606, "grad_norm": 1.5615759564871092, "learning_rate": 9.980281213959069e-06, "loss": 0.8674, "step": 1874 }, { "epoch": 0.05746598013975726, "grad_norm": 1.595394831576239, "learning_rate": 9.980237154082838e-06, "loss": 0.8034, "step": 1875 }, { "epoch": 0.05749662866249847, "grad_norm": 1.5973615386348863, "learning_rate": 9.980193045135056e-06, "loss": 0.8167, "step": 1876 }, { "epoch": 0.05752727718523967, "grad_norm": 1.4257258866306253, "learning_rate": 9.980148887116158e-06, "loss": 0.7716, "step": 1877 }, { "epoch": 0.05755792570798088, "grad_norm": 1.6489971408405375, "learning_rate": 9.980104680026579e-06, "loss": 0.7772, "step": 1878 }, { "epoch": 0.05758857423072208, "grad_norm": 1.7273348226609218, "learning_rate": 9.980060423866756e-06, "loss": 0.773, "step": 1879 }, { "epoch": 0.05761922275346328, "grad_norm": 1.592229629202806, "learning_rate": 9.98001611863712e-06, "loss": 0.6838, "step": 1880 }, { "epoch": 0.05764987127620449, "grad_norm": 1.5563808339680854, "learning_rate": 9.979971764338112e-06, "loss": 0.7589, "step": 1881 }, { "epoch": 0.057680519798945694, "grad_norm": 1.5919520498286528, "learning_rate": 9.97992736097017e-06, "loss": 0.7354, "step": 1882 }, { "epoch": 0.05771116832168689, "grad_norm": 2.2135323740584854, "learning_rate": 9.979882908533728e-06, "loss": 0.8767, "step": 1883 }, { "epoch": 0.0577418168444281, "grad_norm": 1.6775124294934314, "learning_rate": 9.979838407029226e-06, "loss": 0.787, "step": 1884 }, { "epoch": 0.057772465367169304, "grad_norm": 1.4876336768822038, "learning_rate": 9.9797938564571e-06, "loss": 0.8497, "step": 1885 }, { "epoch": 0.05780311388991051, "grad_norm": 1.5885357036909498, "learning_rate": 9.979749256817794e-06, "loss": 0.7143, "step": 1886 }, { "epoch": 0.05783376241265171, "grad_norm": 1.6931427582560934, "learning_rate": 9.979704608111742e-06, "loss": 0.7102, "step": 1887 }, { "epoch": 0.057864410935392914, "grad_norm": 0.5494740101648646, "learning_rate": 9.97965991033939e-06, "loss": 0.4805, "step": 1888 }, { "epoch": 0.05789505945813412, "grad_norm": 1.7331481837275555, "learning_rate": 9.979615163501172e-06, "loss": 0.7763, "step": 1889 }, { "epoch": 0.05792570798087532, "grad_norm": 1.6617876781452454, "learning_rate": 9.979570367597532e-06, "loss": 0.6952, "step": 1890 }, { "epoch": 0.057956356503616524, "grad_norm": 1.770503737169132, "learning_rate": 9.97952552262891e-06, "loss": 0.7833, "step": 1891 }, { "epoch": 0.05798700502635773, "grad_norm": 0.5062635885687481, "learning_rate": 9.97948062859575e-06, "loss": 0.476, "step": 1892 }, { "epoch": 0.058017653549098935, "grad_norm": 1.4034733334050082, "learning_rate": 9.979435685498496e-06, "loss": 0.6651, "step": 1893 }, { "epoch": 0.058048302071840134, "grad_norm": 1.5574119773059296, "learning_rate": 9.979390693337585e-06, "loss": 0.7364, "step": 1894 }, { "epoch": 0.05807895059458134, "grad_norm": 1.5830486338691714, "learning_rate": 9.979345652113464e-06, "loss": 0.6927, "step": 1895 }, { "epoch": 0.058109599117322545, "grad_norm": 1.5268833307839642, "learning_rate": 9.979300561826576e-06, "loss": 0.7186, "step": 1896 }, { "epoch": 0.05814024764006375, "grad_norm": 1.3703925226156228, "learning_rate": 9.979255422477366e-06, "loss": 0.7192, "step": 1897 }, { "epoch": 0.05817089616280495, "grad_norm": 1.6355100206817725, "learning_rate": 9.979210234066278e-06, "loss": 0.884, "step": 1898 }, { "epoch": 0.058201544685546155, "grad_norm": 0.5581848288722439, "learning_rate": 9.979164996593757e-06, "loss": 0.4891, "step": 1899 }, { "epoch": 0.05823219320828736, "grad_norm": 1.4619067919912367, "learning_rate": 9.979119710060252e-06, "loss": 0.7968, "step": 1900 }, { "epoch": 0.05826284173102857, "grad_norm": 1.6527291512458255, "learning_rate": 9.979074374466203e-06, "loss": 0.7698, "step": 1901 }, { "epoch": 0.058293490253769766, "grad_norm": 1.8016341102171338, "learning_rate": 9.979028989812064e-06, "loss": 0.7204, "step": 1902 }, { "epoch": 0.05832413877651097, "grad_norm": 1.6668899879280203, "learning_rate": 9.978983556098274e-06, "loss": 0.8034, "step": 1903 }, { "epoch": 0.05835478729925218, "grad_norm": 0.5550269358074226, "learning_rate": 9.978938073325288e-06, "loss": 0.5194, "step": 1904 }, { "epoch": 0.05838543582199338, "grad_norm": 1.6284747364413554, "learning_rate": 9.97889254149355e-06, "loss": 0.8133, "step": 1905 }, { "epoch": 0.05841608434473458, "grad_norm": 1.4930069549127418, "learning_rate": 9.978846960603512e-06, "loss": 0.7434, "step": 1906 }, { "epoch": 0.05844673286747579, "grad_norm": 1.7883525996718985, "learning_rate": 9.97880133065562e-06, "loss": 0.7257, "step": 1907 }, { "epoch": 0.05847738139021699, "grad_norm": 1.6704261598499814, "learning_rate": 9.978755651650322e-06, "loss": 0.8542, "step": 1908 }, { "epoch": 0.0585080299129582, "grad_norm": 1.4370444988320863, "learning_rate": 9.978709923588074e-06, "loss": 0.7507, "step": 1909 }, { "epoch": 0.0585386784356994, "grad_norm": 1.736090622608554, "learning_rate": 9.978664146469323e-06, "loss": 0.7835, "step": 1910 }, { "epoch": 0.0585693269584406, "grad_norm": 1.5803388223395243, "learning_rate": 9.978618320294518e-06, "loss": 0.7506, "step": 1911 }, { "epoch": 0.05859997548118181, "grad_norm": 1.7609429428766095, "learning_rate": 9.978572445064114e-06, "loss": 0.7717, "step": 1912 }, { "epoch": 0.058630624003923014, "grad_norm": 1.77417856174638, "learning_rate": 9.978526520778564e-06, "loss": 0.8253, "step": 1913 }, { "epoch": 0.05866127252666421, "grad_norm": 1.5177380951153072, "learning_rate": 9.978480547438317e-06, "loss": 0.717, "step": 1914 }, { "epoch": 0.05869192104940542, "grad_norm": 1.5665868042613589, "learning_rate": 9.978434525043825e-06, "loss": 0.7486, "step": 1915 }, { "epoch": 0.058722569572146624, "grad_norm": 1.58285746704552, "learning_rate": 9.978388453595547e-06, "loss": 0.8323, "step": 1916 }, { "epoch": 0.05875321809488783, "grad_norm": 1.4611009576070293, "learning_rate": 9.978342333093932e-06, "loss": 0.662, "step": 1917 }, { "epoch": 0.05878386661762903, "grad_norm": 1.6692404818348932, "learning_rate": 9.978296163539436e-06, "loss": 0.7359, "step": 1918 }, { "epoch": 0.058814515140370234, "grad_norm": 1.615866064863875, "learning_rate": 9.978249944932515e-06, "loss": 0.745, "step": 1919 }, { "epoch": 0.05884516366311144, "grad_norm": 0.525262308984349, "learning_rate": 9.978203677273623e-06, "loss": 0.5043, "step": 1920 }, { "epoch": 0.058875812185852645, "grad_norm": 1.7431781138999887, "learning_rate": 9.97815736056322e-06, "loss": 0.8015, "step": 1921 }, { "epoch": 0.058906460708593844, "grad_norm": 1.7114115351618886, "learning_rate": 9.978110994801754e-06, "loss": 0.793, "step": 1922 }, { "epoch": 0.05893710923133505, "grad_norm": 1.5040333647453994, "learning_rate": 9.978064579989688e-06, "loss": 0.8277, "step": 1923 }, { "epoch": 0.058967757754076255, "grad_norm": 1.4673032099823644, "learning_rate": 9.97801811612748e-06, "loss": 0.8189, "step": 1924 }, { "epoch": 0.058998406276817454, "grad_norm": 1.6613155598079077, "learning_rate": 9.977971603215583e-06, "loss": 0.7217, "step": 1925 }, { "epoch": 0.05902905479955866, "grad_norm": 1.784174577844942, "learning_rate": 9.97792504125446e-06, "loss": 0.8398, "step": 1926 }, { "epoch": 0.059059703322299865, "grad_norm": 1.5658647088380262, "learning_rate": 9.977878430244566e-06, "loss": 0.7939, "step": 1927 }, { "epoch": 0.05909035184504107, "grad_norm": 0.4996372769607213, "learning_rate": 9.977831770186364e-06, "loss": 0.5056, "step": 1928 }, { "epoch": 0.05912100036778227, "grad_norm": 0.49110507956185445, "learning_rate": 9.977785061080312e-06, "loss": 0.4744, "step": 1929 }, { "epoch": 0.059151648890523476, "grad_norm": 0.4987605310036383, "learning_rate": 9.97773830292687e-06, "loss": 0.4913, "step": 1930 }, { "epoch": 0.05918229741326468, "grad_norm": 1.4562651606960908, "learning_rate": 9.977691495726498e-06, "loss": 0.8194, "step": 1931 }, { "epoch": 0.05921294593600589, "grad_norm": 1.5905182216653473, "learning_rate": 9.977644639479658e-06, "loss": 0.7772, "step": 1932 }, { "epoch": 0.059243594458747086, "grad_norm": 1.6221861518024447, "learning_rate": 9.977597734186813e-06, "loss": 0.8708, "step": 1933 }, { "epoch": 0.05927424298148829, "grad_norm": 1.732555117614081, "learning_rate": 9.977550779848422e-06, "loss": 0.8988, "step": 1934 }, { "epoch": 0.0593048915042295, "grad_norm": 1.4423002158861338, "learning_rate": 9.977503776464952e-06, "loss": 0.7428, "step": 1935 }, { "epoch": 0.0593355400269707, "grad_norm": 1.5217823458785054, "learning_rate": 9.977456724036862e-06, "loss": 0.7442, "step": 1936 }, { "epoch": 0.0593661885497119, "grad_norm": 1.4123392075351056, "learning_rate": 9.977409622564619e-06, "loss": 0.7829, "step": 1937 }, { "epoch": 0.05939683707245311, "grad_norm": 1.5263750360936146, "learning_rate": 9.977362472048685e-06, "loss": 0.7502, "step": 1938 }, { "epoch": 0.05942748559519431, "grad_norm": 1.64540378864104, "learning_rate": 9.977315272489523e-06, "loss": 0.8554, "step": 1939 }, { "epoch": 0.05945813411793552, "grad_norm": 0.6105712992364507, "learning_rate": 9.9772680238876e-06, "loss": 0.501, "step": 1940 }, { "epoch": 0.05948878264067672, "grad_norm": 1.7297015364344552, "learning_rate": 9.977220726243384e-06, "loss": 0.801, "step": 1941 }, { "epoch": 0.05951943116341792, "grad_norm": 1.497344718572308, "learning_rate": 9.977173379557338e-06, "loss": 0.754, "step": 1942 }, { "epoch": 0.05955007968615913, "grad_norm": 1.4658576760718314, "learning_rate": 9.97712598382993e-06, "loss": 0.676, "step": 1943 }, { "epoch": 0.059580728208900334, "grad_norm": 1.4511573337218349, "learning_rate": 9.977078539061625e-06, "loss": 0.7224, "step": 1944 }, { "epoch": 0.05961137673164153, "grad_norm": 1.477621015809709, "learning_rate": 9.977031045252892e-06, "loss": 0.7176, "step": 1945 }, { "epoch": 0.05964202525438274, "grad_norm": 1.4945806266078348, "learning_rate": 9.976983502404199e-06, "loss": 0.8357, "step": 1946 }, { "epoch": 0.059672673777123944, "grad_norm": 1.488701617813769, "learning_rate": 9.976935910516015e-06, "loss": 0.7056, "step": 1947 }, { "epoch": 0.05970332229986515, "grad_norm": 1.478422058943943, "learning_rate": 9.976888269588806e-06, "loss": 0.6784, "step": 1948 }, { "epoch": 0.05973397082260635, "grad_norm": 1.5445529074443165, "learning_rate": 9.976840579623045e-06, "loss": 0.8546, "step": 1949 }, { "epoch": 0.059764619345347554, "grad_norm": 1.4284957911700247, "learning_rate": 9.9767928406192e-06, "loss": 0.7297, "step": 1950 }, { "epoch": 0.05979526786808876, "grad_norm": 1.4375071525710972, "learning_rate": 9.976745052577741e-06, "loss": 0.7564, "step": 1951 }, { "epoch": 0.059825916390829965, "grad_norm": 0.5945446047006581, "learning_rate": 9.97669721549914e-06, "loss": 0.5121, "step": 1952 }, { "epoch": 0.059856564913571164, "grad_norm": 1.535823964234296, "learning_rate": 9.97664932938387e-06, "loss": 0.6956, "step": 1953 }, { "epoch": 0.05988721343631237, "grad_norm": 1.4947042812865337, "learning_rate": 9.9766013942324e-06, "loss": 0.7709, "step": 1954 }, { "epoch": 0.059917861959053575, "grad_norm": 2.0006574364808016, "learning_rate": 9.9765534100452e-06, "loss": 0.7327, "step": 1955 }, { "epoch": 0.059948510481794774, "grad_norm": 0.512480541672164, "learning_rate": 9.97650537682275e-06, "loss": 0.4898, "step": 1956 }, { "epoch": 0.05997915900453598, "grad_norm": 1.614709307618773, "learning_rate": 9.976457294565515e-06, "loss": 0.7977, "step": 1957 }, { "epoch": 0.060009807527277186, "grad_norm": 0.5243431438678429, "learning_rate": 9.976409163273977e-06, "loss": 0.4848, "step": 1958 }, { "epoch": 0.06004045605001839, "grad_norm": 1.5375107117502296, "learning_rate": 9.976360982948605e-06, "loss": 0.783, "step": 1959 }, { "epoch": 0.06007110457275959, "grad_norm": 1.6748676553849664, "learning_rate": 9.976312753589874e-06, "loss": 0.8292, "step": 1960 }, { "epoch": 0.060101753095500796, "grad_norm": 0.5227008438910874, "learning_rate": 9.976264475198261e-06, "loss": 0.4995, "step": 1961 }, { "epoch": 0.060132401618242, "grad_norm": 1.495077353843014, "learning_rate": 9.976216147774242e-06, "loss": 0.7893, "step": 1962 }, { "epoch": 0.06016305014098321, "grad_norm": 0.49258976346237937, "learning_rate": 9.97616777131829e-06, "loss": 0.5111, "step": 1963 }, { "epoch": 0.060193698663724406, "grad_norm": 1.6108630688564982, "learning_rate": 9.976119345830885e-06, "loss": 0.8205, "step": 1964 }, { "epoch": 0.06022434718646561, "grad_norm": 1.5682388878003528, "learning_rate": 9.976070871312502e-06, "loss": 0.721, "step": 1965 }, { "epoch": 0.06025499570920682, "grad_norm": 1.8959642666894987, "learning_rate": 9.976022347763621e-06, "loss": 0.8284, "step": 1966 }, { "epoch": 0.06028564423194802, "grad_norm": 1.6492021444104787, "learning_rate": 9.975973775184718e-06, "loss": 0.7874, "step": 1967 }, { "epoch": 0.06031629275468922, "grad_norm": 1.6653648478094483, "learning_rate": 9.975925153576271e-06, "loss": 0.8824, "step": 1968 }, { "epoch": 0.06034694127743043, "grad_norm": 1.5398752588028592, "learning_rate": 9.97587648293876e-06, "loss": 0.737, "step": 1969 }, { "epoch": 0.06037758980017163, "grad_norm": 1.8254579222258562, "learning_rate": 9.975827763272667e-06, "loss": 0.8136, "step": 1970 }, { "epoch": 0.06040823832291284, "grad_norm": 1.7516377328410786, "learning_rate": 9.975778994578469e-06, "loss": 0.788, "step": 1971 }, { "epoch": 0.06043888684565404, "grad_norm": 1.700758385201495, "learning_rate": 9.975730176856648e-06, "loss": 0.7008, "step": 1972 }, { "epoch": 0.06046953536839524, "grad_norm": 1.59651982518107, "learning_rate": 9.975681310107683e-06, "loss": 0.7346, "step": 1973 }, { "epoch": 0.06050018389113645, "grad_norm": 0.6449975762822089, "learning_rate": 9.975632394332057e-06, "loss": 0.4843, "step": 1974 }, { "epoch": 0.060530832413877654, "grad_norm": 1.6266215306582217, "learning_rate": 9.975583429530255e-06, "loss": 0.7924, "step": 1975 }, { "epoch": 0.06056148093661885, "grad_norm": 1.71273516595213, "learning_rate": 9.975534415702753e-06, "loss": 0.763, "step": 1976 }, { "epoch": 0.06059212945936006, "grad_norm": 1.5424797512061492, "learning_rate": 9.97548535285004e-06, "loss": 0.8609, "step": 1977 }, { "epoch": 0.060622777982101264, "grad_norm": 1.7250599505788689, "learning_rate": 9.975436240972594e-06, "loss": 0.7855, "step": 1978 }, { "epoch": 0.06065342650484247, "grad_norm": 1.5902299892475589, "learning_rate": 9.975387080070904e-06, "loss": 0.693, "step": 1979 }, { "epoch": 0.06068407502758367, "grad_norm": 0.5348938895503836, "learning_rate": 9.975337870145451e-06, "loss": 0.5065, "step": 1980 }, { "epoch": 0.060714723550324874, "grad_norm": 1.5788575913639602, "learning_rate": 9.975288611196721e-06, "loss": 0.7718, "step": 1981 }, { "epoch": 0.06074537207306608, "grad_norm": 1.578350875361505, "learning_rate": 9.975239303225199e-06, "loss": 0.8144, "step": 1982 }, { "epoch": 0.060776020595807285, "grad_norm": 0.5296969480354558, "learning_rate": 9.975189946231372e-06, "loss": 0.4836, "step": 1983 }, { "epoch": 0.060806669118548484, "grad_norm": 1.4818710621787496, "learning_rate": 9.975140540215725e-06, "loss": 0.7301, "step": 1984 }, { "epoch": 0.06083731764128969, "grad_norm": 1.594481590931772, "learning_rate": 9.975091085178745e-06, "loss": 0.8104, "step": 1985 }, { "epoch": 0.060867966164030896, "grad_norm": 1.5737847781102596, "learning_rate": 9.975041581120922e-06, "loss": 0.7024, "step": 1986 }, { "epoch": 0.060898614686772094, "grad_norm": 1.5729157885885399, "learning_rate": 9.974992028042738e-06, "loss": 0.7593, "step": 1987 }, { "epoch": 0.0609292632095133, "grad_norm": 0.519460435952789, "learning_rate": 9.974942425944687e-06, "loss": 0.4868, "step": 1988 }, { "epoch": 0.060959911732254506, "grad_norm": 1.777366082891446, "learning_rate": 9.974892774827254e-06, "loss": 0.7335, "step": 1989 }, { "epoch": 0.06099056025499571, "grad_norm": 1.5425269881868782, "learning_rate": 9.974843074690929e-06, "loss": 0.8397, "step": 1990 }, { "epoch": 0.06102120877773691, "grad_norm": 1.5433822090302778, "learning_rate": 9.974793325536206e-06, "loss": 0.7429, "step": 1991 }, { "epoch": 0.061051857300478116, "grad_norm": 1.4877663084640549, "learning_rate": 9.974743527363569e-06, "loss": 0.8583, "step": 1992 }, { "epoch": 0.06108250582321932, "grad_norm": 1.6371003542631917, "learning_rate": 9.97469368017351e-06, "loss": 0.8783, "step": 1993 }, { "epoch": 0.06111315434596053, "grad_norm": 1.624271181891992, "learning_rate": 9.974643783966522e-06, "loss": 0.7206, "step": 1994 }, { "epoch": 0.061143802868701726, "grad_norm": 1.5138623139062823, "learning_rate": 9.974593838743097e-06, "loss": 0.7574, "step": 1995 }, { "epoch": 0.06117445139144293, "grad_norm": 1.6735992802063953, "learning_rate": 9.974543844503726e-06, "loss": 0.7609, "step": 1996 }, { "epoch": 0.06120509991418414, "grad_norm": 1.5541168414669935, "learning_rate": 9.9744938012489e-06, "loss": 0.7645, "step": 1997 }, { "epoch": 0.06123574843692534, "grad_norm": 1.5100750943977053, "learning_rate": 9.974443708979116e-06, "loss": 0.6966, "step": 1998 }, { "epoch": 0.06126639695966654, "grad_norm": 0.5664807093032703, "learning_rate": 9.974393567694864e-06, "loss": 0.5189, "step": 1999 }, { "epoch": 0.06129704548240775, "grad_norm": 2.6716826713566557, "learning_rate": 9.97434337739664e-06, "loss": 0.8215, "step": 2000 }, { "epoch": 0.06132769400514895, "grad_norm": 1.688810634890798, "learning_rate": 9.974293138084939e-06, "loss": 0.7149, "step": 2001 }, { "epoch": 0.06135834252789016, "grad_norm": 1.5854550095225919, "learning_rate": 9.974242849760253e-06, "loss": 0.6964, "step": 2002 }, { "epoch": 0.06138899105063136, "grad_norm": 1.4586450063192458, "learning_rate": 9.97419251242308e-06, "loss": 0.7076, "step": 2003 }, { "epoch": 0.06141963957337256, "grad_norm": 1.51241084581744, "learning_rate": 9.974142126073915e-06, "loss": 0.7308, "step": 2004 }, { "epoch": 0.06145028809611377, "grad_norm": 1.5663599097064307, "learning_rate": 9.974091690713256e-06, "loss": 0.7983, "step": 2005 }, { "epoch": 0.061480936618854974, "grad_norm": 1.5518451746913906, "learning_rate": 9.974041206341599e-06, "loss": 0.7586, "step": 2006 }, { "epoch": 0.06151158514159617, "grad_norm": 0.5473580006967738, "learning_rate": 9.97399067295944e-06, "loss": 0.4943, "step": 2007 }, { "epoch": 0.06154223366433738, "grad_norm": 1.552419447890971, "learning_rate": 9.97394009056728e-06, "loss": 0.7729, "step": 2008 }, { "epoch": 0.061572882187078584, "grad_norm": 1.7622265272961026, "learning_rate": 9.973889459165615e-06, "loss": 0.7535, "step": 2009 }, { "epoch": 0.06160353070981979, "grad_norm": 1.5960854269048896, "learning_rate": 9.973838778754944e-06, "loss": 0.7315, "step": 2010 }, { "epoch": 0.06163417923256099, "grad_norm": 1.5663269613982334, "learning_rate": 9.973788049335768e-06, "loss": 0.8592, "step": 2011 }, { "epoch": 0.061664827755302194, "grad_norm": 1.6370059289246246, "learning_rate": 9.973737270908584e-06, "loss": 0.7972, "step": 2012 }, { "epoch": 0.0616954762780434, "grad_norm": 1.4708948280659118, "learning_rate": 9.973686443473895e-06, "loss": 0.6881, "step": 2013 }, { "epoch": 0.061726124800784606, "grad_norm": 1.4212874813265672, "learning_rate": 9.973635567032201e-06, "loss": 0.6687, "step": 2014 }, { "epoch": 0.061756773323525804, "grad_norm": 0.5056327722250269, "learning_rate": 9.973584641584005e-06, "loss": 0.4933, "step": 2015 }, { "epoch": 0.06178742184626701, "grad_norm": 0.48093346843899454, "learning_rate": 9.973533667129804e-06, "loss": 0.4854, "step": 2016 }, { "epoch": 0.061818070369008216, "grad_norm": 1.5119849515742627, "learning_rate": 9.973482643670106e-06, "loss": 0.7676, "step": 2017 }, { "epoch": 0.061848718891749414, "grad_norm": 1.4967882633177165, "learning_rate": 9.973431571205408e-06, "loss": 0.7116, "step": 2018 }, { "epoch": 0.06187936741449062, "grad_norm": 1.507667728374246, "learning_rate": 9.973380449736218e-06, "loss": 0.8163, "step": 2019 }, { "epoch": 0.061910015937231826, "grad_norm": 1.5461857351147237, "learning_rate": 9.973329279263038e-06, "loss": 0.7294, "step": 2020 }, { "epoch": 0.06194066445997303, "grad_norm": 1.4922737235775938, "learning_rate": 9.97327805978637e-06, "loss": 0.7903, "step": 2021 }, { "epoch": 0.06197131298271423, "grad_norm": 1.49687010697193, "learning_rate": 9.973226791306723e-06, "loss": 0.6731, "step": 2022 }, { "epoch": 0.062001961505455436, "grad_norm": 1.9483341846094495, "learning_rate": 9.9731754738246e-06, "loss": 0.9354, "step": 2023 }, { "epoch": 0.06203261002819664, "grad_norm": 1.778165013120364, "learning_rate": 9.973124107340506e-06, "loss": 0.7434, "step": 2024 }, { "epoch": 0.06206325855093785, "grad_norm": 1.442929571283632, "learning_rate": 9.973072691854949e-06, "loss": 0.7431, "step": 2025 }, { "epoch": 0.062093907073679046, "grad_norm": 1.6421458240718125, "learning_rate": 9.97302122736843e-06, "loss": 0.8272, "step": 2026 }, { "epoch": 0.06212455559642025, "grad_norm": 1.6323139458697458, "learning_rate": 9.972969713881466e-06, "loss": 0.815, "step": 2027 }, { "epoch": 0.06215520411916146, "grad_norm": 1.5246123234322948, "learning_rate": 9.972918151394556e-06, "loss": 0.7072, "step": 2028 }, { "epoch": 0.06218585264190266, "grad_norm": 1.6698496385885706, "learning_rate": 9.972866539908212e-06, "loss": 0.7912, "step": 2029 }, { "epoch": 0.06221650116464386, "grad_norm": 0.560781586239256, "learning_rate": 9.97281487942294e-06, "loss": 0.4969, "step": 2030 }, { "epoch": 0.06224714968738507, "grad_norm": 1.3946586062454416, "learning_rate": 9.972763169939252e-06, "loss": 0.7194, "step": 2031 }, { "epoch": 0.06227779821012627, "grad_norm": 1.5032468561699235, "learning_rate": 9.972711411457657e-06, "loss": 0.7512, "step": 2032 }, { "epoch": 0.06230844673286748, "grad_norm": 1.4493395257296702, "learning_rate": 9.972659603978664e-06, "loss": 0.6957, "step": 2033 }, { "epoch": 0.06233909525560868, "grad_norm": 0.5264427065391163, "learning_rate": 9.972607747502782e-06, "loss": 0.5072, "step": 2034 }, { "epoch": 0.06236974377834988, "grad_norm": 1.4562437368107957, "learning_rate": 9.972555842030525e-06, "loss": 0.7758, "step": 2035 }, { "epoch": 0.06240039230109109, "grad_norm": 1.550467109373207, "learning_rate": 9.972503887562403e-06, "loss": 0.6399, "step": 2036 }, { "epoch": 0.062431040823832294, "grad_norm": 1.7637536598149155, "learning_rate": 9.972451884098927e-06, "loss": 0.8251, "step": 2037 }, { "epoch": 0.06246168934657349, "grad_norm": 0.49139470495028637, "learning_rate": 9.97239983164061e-06, "loss": 0.4802, "step": 2038 }, { "epoch": 0.0624923378693147, "grad_norm": 1.3419296048074432, "learning_rate": 9.972347730187967e-06, "loss": 0.787, "step": 2039 }, { "epoch": 0.0625229863920559, "grad_norm": 1.5107765632865338, "learning_rate": 9.972295579741508e-06, "loss": 0.7199, "step": 2040 }, { "epoch": 0.06255363491479711, "grad_norm": 1.423351390017544, "learning_rate": 9.972243380301749e-06, "loss": 0.7421, "step": 2041 }, { "epoch": 0.06258428343753832, "grad_norm": 1.6210189678180273, "learning_rate": 9.972191131869204e-06, "loss": 0.8404, "step": 2042 }, { "epoch": 0.06261493196027952, "grad_norm": 2.614659169095516, "learning_rate": 9.972138834444387e-06, "loss": 0.7508, "step": 2043 }, { "epoch": 0.06264558048302071, "grad_norm": 1.7881752821580055, "learning_rate": 9.972086488027815e-06, "loss": 0.7847, "step": 2044 }, { "epoch": 0.06267622900576192, "grad_norm": 1.7704812349486736, "learning_rate": 9.97203409262e-06, "loss": 0.7325, "step": 2045 }, { "epoch": 0.06270687752850312, "grad_norm": 1.6050000224323873, "learning_rate": 9.971981648221463e-06, "loss": 0.6767, "step": 2046 }, { "epoch": 0.06273752605124433, "grad_norm": 1.6539988819542932, "learning_rate": 9.97192915483272e-06, "loss": 0.7786, "step": 2047 }, { "epoch": 0.06276817457398554, "grad_norm": 1.5101944603358344, "learning_rate": 9.971876612454285e-06, "loss": 0.7751, "step": 2048 }, { "epoch": 0.06279882309672674, "grad_norm": 0.5442729865161412, "learning_rate": 9.971824021086677e-06, "loss": 0.499, "step": 2049 }, { "epoch": 0.06282947161946795, "grad_norm": 1.6833668423223123, "learning_rate": 9.971771380730418e-06, "loss": 0.796, "step": 2050 }, { "epoch": 0.06286012014220914, "grad_norm": 1.6682852591673156, "learning_rate": 9.97171869138602e-06, "loss": 0.697, "step": 2051 }, { "epoch": 0.06289076866495034, "grad_norm": 1.4723559686322054, "learning_rate": 9.971665953054007e-06, "loss": 0.8187, "step": 2052 }, { "epoch": 0.06292141718769155, "grad_norm": 1.5560435017630598, "learning_rate": 9.971613165734897e-06, "loss": 0.7572, "step": 2053 }, { "epoch": 0.06295206571043276, "grad_norm": 1.433773994229561, "learning_rate": 9.971560329429211e-06, "loss": 0.691, "step": 2054 }, { "epoch": 0.06298271423317396, "grad_norm": 1.4633092756141275, "learning_rate": 9.971507444137469e-06, "loss": 0.7357, "step": 2055 }, { "epoch": 0.06301336275591517, "grad_norm": 1.4986449981250511, "learning_rate": 9.971454509860192e-06, "loss": 0.7734, "step": 2056 }, { "epoch": 0.06304401127865637, "grad_norm": 1.7118925505764098, "learning_rate": 9.971401526597902e-06, "loss": 0.7523, "step": 2057 }, { "epoch": 0.06307465980139758, "grad_norm": 1.658159366846096, "learning_rate": 9.97134849435112e-06, "loss": 0.7249, "step": 2058 }, { "epoch": 0.06310530832413877, "grad_norm": 0.5746089866354207, "learning_rate": 9.97129541312037e-06, "loss": 0.4955, "step": 2059 }, { "epoch": 0.06313595684687998, "grad_norm": 1.5301886528919812, "learning_rate": 9.971242282906174e-06, "loss": 0.6824, "step": 2060 }, { "epoch": 0.06316660536962118, "grad_norm": 1.550465470110752, "learning_rate": 9.971189103709056e-06, "loss": 0.7722, "step": 2061 }, { "epoch": 0.06319725389236239, "grad_norm": 1.704843519666009, "learning_rate": 9.97113587552954e-06, "loss": 0.8454, "step": 2062 }, { "epoch": 0.06322790241510359, "grad_norm": 1.5379112754656155, "learning_rate": 9.97108259836815e-06, "loss": 0.7943, "step": 2063 }, { "epoch": 0.0632585509378448, "grad_norm": 1.389221355725189, "learning_rate": 9.971029272225411e-06, "loss": 0.7398, "step": 2064 }, { "epoch": 0.063289199460586, "grad_norm": 1.7117032047075924, "learning_rate": 9.970975897101849e-06, "loss": 0.7917, "step": 2065 }, { "epoch": 0.06331984798332721, "grad_norm": 1.4300142521010877, "learning_rate": 9.97092247299799e-06, "loss": 0.8035, "step": 2066 }, { "epoch": 0.0633504965060684, "grad_norm": 2.986621479116047, "learning_rate": 9.97086899991436e-06, "loss": 0.7836, "step": 2067 }, { "epoch": 0.06338114502880961, "grad_norm": 1.5948572157389802, "learning_rate": 9.970815477851485e-06, "loss": 0.789, "step": 2068 }, { "epoch": 0.06341179355155081, "grad_norm": 1.4059269001198131, "learning_rate": 9.970761906809893e-06, "loss": 0.6874, "step": 2069 }, { "epoch": 0.06344244207429202, "grad_norm": 1.5504828007656575, "learning_rate": 9.970708286790114e-06, "loss": 0.7255, "step": 2070 }, { "epoch": 0.06347309059703322, "grad_norm": 0.5354416953826695, "learning_rate": 9.970654617792672e-06, "loss": 0.4908, "step": 2071 }, { "epoch": 0.06350373911977443, "grad_norm": 0.5153148857273552, "learning_rate": 9.9706008998181e-06, "loss": 0.4937, "step": 2072 }, { "epoch": 0.06353438764251564, "grad_norm": 1.6742094524304845, "learning_rate": 9.970547132866925e-06, "loss": 0.7876, "step": 2073 }, { "epoch": 0.06356503616525684, "grad_norm": 0.4935313494232724, "learning_rate": 9.970493316939678e-06, "loss": 0.474, "step": 2074 }, { "epoch": 0.06359568468799803, "grad_norm": 1.707255760964894, "learning_rate": 9.970439452036888e-06, "loss": 0.8423, "step": 2075 }, { "epoch": 0.06362633321073924, "grad_norm": 1.406876352131693, "learning_rate": 9.970385538159086e-06, "loss": 0.7004, "step": 2076 }, { "epoch": 0.06365698173348044, "grad_norm": 1.5968637639896126, "learning_rate": 9.970331575306804e-06, "loss": 0.7858, "step": 2077 }, { "epoch": 0.06368763025622165, "grad_norm": 2.083757363001128, "learning_rate": 9.970277563480573e-06, "loss": 0.6907, "step": 2078 }, { "epoch": 0.06371827877896286, "grad_norm": 1.5657151061961336, "learning_rate": 9.970223502680926e-06, "loss": 0.8194, "step": 2079 }, { "epoch": 0.06374892730170406, "grad_norm": 1.6818165858551162, "learning_rate": 9.970169392908396e-06, "loss": 0.8655, "step": 2080 }, { "epoch": 0.06377957582444527, "grad_norm": 1.797481244743933, "learning_rate": 9.970115234163513e-06, "loss": 0.7341, "step": 2081 }, { "epoch": 0.06381022434718646, "grad_norm": 0.7597313154184668, "learning_rate": 9.970061026446813e-06, "loss": 0.4987, "step": 2082 }, { "epoch": 0.06384087286992766, "grad_norm": 1.5804823942709347, "learning_rate": 9.970006769758832e-06, "loss": 0.7011, "step": 2083 }, { "epoch": 0.06387152139266887, "grad_norm": 1.7325755908166844, "learning_rate": 9.969952464100102e-06, "loss": 0.7983, "step": 2084 }, { "epoch": 0.06390216991541008, "grad_norm": 1.544271321440353, "learning_rate": 9.969898109471159e-06, "loss": 0.6683, "step": 2085 }, { "epoch": 0.06393281843815128, "grad_norm": 0.5372875253010454, "learning_rate": 9.969843705872537e-06, "loss": 0.524, "step": 2086 }, { "epoch": 0.06396346696089249, "grad_norm": 1.5489320795452894, "learning_rate": 9.969789253304775e-06, "loss": 0.8184, "step": 2087 }, { "epoch": 0.06399411548363369, "grad_norm": 1.6173918190043088, "learning_rate": 9.969734751768407e-06, "loss": 0.7805, "step": 2088 }, { "epoch": 0.0640247640063749, "grad_norm": 1.6496991007966697, "learning_rate": 9.969680201263972e-06, "loss": 0.7682, "step": 2089 }, { "epoch": 0.06405541252911609, "grad_norm": 1.6524033410927212, "learning_rate": 9.969625601792005e-06, "loss": 0.7177, "step": 2090 }, { "epoch": 0.0640860610518573, "grad_norm": 1.4857128941950144, "learning_rate": 9.969570953353044e-06, "loss": 0.7156, "step": 2091 }, { "epoch": 0.0641167095745985, "grad_norm": 1.7725895474431455, "learning_rate": 9.969516255947633e-06, "loss": 0.7907, "step": 2092 }, { "epoch": 0.06414735809733971, "grad_norm": 1.6808794881551072, "learning_rate": 9.969461509576303e-06, "loss": 0.6844, "step": 2093 }, { "epoch": 0.06417800662008091, "grad_norm": 0.6612700023381914, "learning_rate": 9.9694067142396e-06, "loss": 0.4795, "step": 2094 }, { "epoch": 0.06420865514282212, "grad_norm": 1.5117522074981637, "learning_rate": 9.96935186993806e-06, "loss": 0.8516, "step": 2095 }, { "epoch": 0.06423930366556332, "grad_norm": 1.5972864648526663, "learning_rate": 9.969296976672224e-06, "loss": 0.8436, "step": 2096 }, { "epoch": 0.06426995218830453, "grad_norm": 1.7999572216113564, "learning_rate": 9.969242034442634e-06, "loss": 0.8348, "step": 2097 }, { "epoch": 0.06430060071104572, "grad_norm": 1.534255528190107, "learning_rate": 9.96918704324983e-06, "loss": 0.7466, "step": 2098 }, { "epoch": 0.06433124923378693, "grad_norm": 1.6516314037753332, "learning_rate": 9.969132003094357e-06, "loss": 0.7458, "step": 2099 }, { "epoch": 0.06436189775652813, "grad_norm": 1.6958496087699366, "learning_rate": 9.969076913976755e-06, "loss": 0.8594, "step": 2100 }, { "epoch": 0.06439254627926934, "grad_norm": 1.6282849471275878, "learning_rate": 9.969021775897563e-06, "loss": 0.754, "step": 2101 }, { "epoch": 0.06442319480201054, "grad_norm": 1.5046547729413484, "learning_rate": 9.968966588857331e-06, "loss": 0.7913, "step": 2102 }, { "epoch": 0.06445384332475175, "grad_norm": 1.6790385191385557, "learning_rate": 9.968911352856598e-06, "loss": 0.7523, "step": 2103 }, { "epoch": 0.06448449184749296, "grad_norm": 1.6491678489195332, "learning_rate": 9.968856067895913e-06, "loss": 0.7162, "step": 2104 }, { "epoch": 0.06451514037023416, "grad_norm": 1.452475913305421, "learning_rate": 9.968800733975816e-06, "loss": 0.6929, "step": 2105 }, { "epoch": 0.06454578889297535, "grad_norm": 1.6044846395342462, "learning_rate": 9.968745351096854e-06, "loss": 0.7511, "step": 2106 }, { "epoch": 0.06457643741571656, "grad_norm": 1.5856254763926731, "learning_rate": 9.968689919259572e-06, "loss": 0.748, "step": 2107 }, { "epoch": 0.06460708593845776, "grad_norm": 1.6049939812902017, "learning_rate": 9.968634438464517e-06, "loss": 0.7446, "step": 2108 }, { "epoch": 0.06463773446119897, "grad_norm": 1.4640258090992575, "learning_rate": 9.968578908712236e-06, "loss": 0.6848, "step": 2109 }, { "epoch": 0.06466838298394018, "grad_norm": 1.4724620840515699, "learning_rate": 9.968523330003276e-06, "loss": 0.8018, "step": 2110 }, { "epoch": 0.06469903150668138, "grad_norm": 1.497905714291856, "learning_rate": 9.968467702338186e-06, "loss": 0.7433, "step": 2111 }, { "epoch": 0.06472968002942259, "grad_norm": 1.6046775696045688, "learning_rate": 9.968412025717511e-06, "loss": 0.8056, "step": 2112 }, { "epoch": 0.06476032855216378, "grad_norm": 1.4911499854525303, "learning_rate": 9.968356300141802e-06, "loss": 0.7451, "step": 2113 }, { "epoch": 0.06479097707490498, "grad_norm": 0.5456425235495902, "learning_rate": 9.968300525611605e-06, "loss": 0.4725, "step": 2114 }, { "epoch": 0.06482162559764619, "grad_norm": 1.329635396460703, "learning_rate": 9.968244702127473e-06, "loss": 0.8366, "step": 2115 }, { "epoch": 0.0648522741203874, "grad_norm": 1.5679308459305878, "learning_rate": 9.968188829689955e-06, "loss": 0.7933, "step": 2116 }, { "epoch": 0.0648829226431286, "grad_norm": 0.48241076365183766, "learning_rate": 9.968132908299602e-06, "loss": 0.4451, "step": 2117 }, { "epoch": 0.06491357116586981, "grad_norm": 1.6794807441901756, "learning_rate": 9.968076937956962e-06, "loss": 0.7367, "step": 2118 }, { "epoch": 0.06494421968861101, "grad_norm": 1.484072308087597, "learning_rate": 9.968020918662591e-06, "loss": 0.7439, "step": 2119 }, { "epoch": 0.06497486821135222, "grad_norm": 1.5639185376487368, "learning_rate": 9.967964850417039e-06, "loss": 0.8234, "step": 2120 }, { "epoch": 0.06500551673409341, "grad_norm": 1.4588314006082408, "learning_rate": 9.967908733220854e-06, "loss": 0.778, "step": 2121 }, { "epoch": 0.06503616525683462, "grad_norm": 1.6329920776617517, "learning_rate": 9.967852567074598e-06, "loss": 0.7713, "step": 2122 }, { "epoch": 0.06506681377957582, "grad_norm": 1.583090835964378, "learning_rate": 9.967796351978817e-06, "loss": 0.8068, "step": 2123 }, { "epoch": 0.06509746230231703, "grad_norm": 1.6686451172804817, "learning_rate": 9.967740087934069e-06, "loss": 0.8265, "step": 2124 }, { "epoch": 0.06512811082505823, "grad_norm": 0.7073855471181907, "learning_rate": 9.967683774940905e-06, "loss": 0.4696, "step": 2125 }, { "epoch": 0.06515875934779944, "grad_norm": 1.5319524524654766, "learning_rate": 9.967627412999883e-06, "loss": 0.7534, "step": 2126 }, { "epoch": 0.06518940787054064, "grad_norm": 1.4703431175798478, "learning_rate": 9.967571002111558e-06, "loss": 0.735, "step": 2127 }, { "epoch": 0.06522005639328185, "grad_norm": 1.5806506747863496, "learning_rate": 9.967514542276484e-06, "loss": 0.7376, "step": 2128 }, { "epoch": 0.06525070491602304, "grad_norm": 0.5186892097130504, "learning_rate": 9.967458033495219e-06, "loss": 0.4944, "step": 2129 }, { "epoch": 0.06528135343876425, "grad_norm": 1.7165598877741544, "learning_rate": 9.967401475768316e-06, "loss": 0.8539, "step": 2130 }, { "epoch": 0.06531200196150545, "grad_norm": 1.7010263260424143, "learning_rate": 9.967344869096338e-06, "loss": 0.709, "step": 2131 }, { "epoch": 0.06534265048424666, "grad_norm": 1.5734627898997604, "learning_rate": 9.96728821347984e-06, "loss": 0.8793, "step": 2132 }, { "epoch": 0.06537329900698786, "grad_norm": 1.6448186821146893, "learning_rate": 9.96723150891938e-06, "loss": 0.6245, "step": 2133 }, { "epoch": 0.06540394752972907, "grad_norm": 1.5486037701042743, "learning_rate": 9.967174755415516e-06, "loss": 0.8421, "step": 2134 }, { "epoch": 0.06543459605247028, "grad_norm": 1.5502085364143505, "learning_rate": 9.96711795296881e-06, "loss": 0.8207, "step": 2135 }, { "epoch": 0.06546524457521148, "grad_norm": 1.3981481266785785, "learning_rate": 9.967061101579818e-06, "loss": 0.7442, "step": 2136 }, { "epoch": 0.06549589309795267, "grad_norm": 1.6201747414647898, "learning_rate": 9.967004201249105e-06, "loss": 0.7553, "step": 2137 }, { "epoch": 0.06552654162069388, "grad_norm": 1.7596843101337891, "learning_rate": 9.966947251977226e-06, "loss": 0.7259, "step": 2138 }, { "epoch": 0.06555719014343508, "grad_norm": 1.7131321117826857, "learning_rate": 9.966890253764746e-06, "loss": 0.7493, "step": 2139 }, { "epoch": 0.06558783866617629, "grad_norm": 1.4158833240292164, "learning_rate": 9.966833206612225e-06, "loss": 0.7495, "step": 2140 }, { "epoch": 0.0656184871889175, "grad_norm": 1.7282802288897057, "learning_rate": 9.966776110520224e-06, "loss": 0.771, "step": 2141 }, { "epoch": 0.0656491357116587, "grad_norm": 1.499027742439698, "learning_rate": 9.96671896548931e-06, "loss": 0.8131, "step": 2142 }, { "epoch": 0.06567978423439991, "grad_norm": 1.5079791788739618, "learning_rate": 9.966661771520042e-06, "loss": 0.8152, "step": 2143 }, { "epoch": 0.0657104327571411, "grad_norm": 1.5460839119650887, "learning_rate": 9.966604528612986e-06, "loss": 0.7397, "step": 2144 }, { "epoch": 0.0657410812798823, "grad_norm": 1.4448274340363363, "learning_rate": 9.966547236768703e-06, "loss": 0.735, "step": 2145 }, { "epoch": 0.06577172980262351, "grad_norm": 1.5160695132946647, "learning_rate": 9.96648989598776e-06, "loss": 0.814, "step": 2146 }, { "epoch": 0.06580237832536472, "grad_norm": 1.7736518942076887, "learning_rate": 9.966432506270723e-06, "loss": 0.7821, "step": 2147 }, { "epoch": 0.06583302684810592, "grad_norm": 1.5252516544396308, "learning_rate": 9.966375067618152e-06, "loss": 0.782, "step": 2148 }, { "epoch": 0.06586367537084713, "grad_norm": 1.6830247115835513, "learning_rate": 9.96631758003062e-06, "loss": 0.7894, "step": 2149 }, { "epoch": 0.06589432389358833, "grad_norm": 1.6134036401438803, "learning_rate": 9.966260043508688e-06, "loss": 0.7996, "step": 2150 }, { "epoch": 0.06592497241632954, "grad_norm": 1.459814700880731, "learning_rate": 9.966202458052927e-06, "loss": 0.8254, "step": 2151 }, { "epoch": 0.06595562093907073, "grad_norm": 1.5435398373192233, "learning_rate": 9.966144823663903e-06, "loss": 0.7591, "step": 2152 }, { "epoch": 0.06598626946181194, "grad_norm": 0.8698212477536403, "learning_rate": 9.966087140342182e-06, "loss": 0.5035, "step": 2153 }, { "epoch": 0.06601691798455314, "grad_norm": 1.610405069082461, "learning_rate": 9.966029408088333e-06, "loss": 0.8123, "step": 2154 }, { "epoch": 0.06604756650729435, "grad_norm": 1.599838142901773, "learning_rate": 9.965971626902928e-06, "loss": 0.6525, "step": 2155 }, { "epoch": 0.06607821503003555, "grad_norm": 1.817537128171504, "learning_rate": 9.965913796786532e-06, "loss": 0.8536, "step": 2156 }, { "epoch": 0.06610886355277676, "grad_norm": 1.6306308385893569, "learning_rate": 9.965855917739718e-06, "loss": 0.8574, "step": 2157 }, { "epoch": 0.06613951207551796, "grad_norm": 0.5391545488469883, "learning_rate": 9.965797989763053e-06, "loss": 0.4609, "step": 2158 }, { "epoch": 0.06617016059825917, "grad_norm": 1.5543686996383446, "learning_rate": 9.965740012857113e-06, "loss": 0.7548, "step": 2159 }, { "epoch": 0.06620080912100036, "grad_norm": 1.6267070049814063, "learning_rate": 9.965681987022463e-06, "loss": 0.7462, "step": 2160 }, { "epoch": 0.06623145764374157, "grad_norm": 1.6465814369777636, "learning_rate": 9.96562391225968e-06, "loss": 0.7674, "step": 2161 }, { "epoch": 0.06626210616648277, "grad_norm": 1.6397608055142163, "learning_rate": 9.965565788569333e-06, "loss": 0.6924, "step": 2162 }, { "epoch": 0.06629275468922398, "grad_norm": 1.427541011666792, "learning_rate": 9.965507615951997e-06, "loss": 0.8097, "step": 2163 }, { "epoch": 0.06632340321196518, "grad_norm": 1.6918277708892688, "learning_rate": 9.965449394408243e-06, "loss": 0.8769, "step": 2164 }, { "epoch": 0.06635405173470639, "grad_norm": 1.630622138326441, "learning_rate": 9.965391123938645e-06, "loss": 0.7927, "step": 2165 }, { "epoch": 0.0663847002574476, "grad_norm": 0.6859441146135584, "learning_rate": 9.96533280454378e-06, "loss": 0.4898, "step": 2166 }, { "epoch": 0.0664153487801888, "grad_norm": 1.668208613010347, "learning_rate": 9.965274436224217e-06, "loss": 0.7905, "step": 2167 }, { "epoch": 0.06644599730293, "grad_norm": 1.5575230517251784, "learning_rate": 9.965216018980537e-06, "loss": 0.7394, "step": 2168 }, { "epoch": 0.0664766458256712, "grad_norm": 1.8391975661194555, "learning_rate": 9.965157552813313e-06, "loss": 0.7686, "step": 2169 }, { "epoch": 0.0665072943484124, "grad_norm": 1.4403364415829025, "learning_rate": 9.96509903772312e-06, "loss": 0.7808, "step": 2170 }, { "epoch": 0.06653794287115361, "grad_norm": 1.4205805097804594, "learning_rate": 9.96504047371054e-06, "loss": 0.7645, "step": 2171 }, { "epoch": 0.06656859139389482, "grad_norm": 0.5105616156355444, "learning_rate": 9.96498186077614e-06, "loss": 0.4821, "step": 2172 }, { "epoch": 0.06659923991663602, "grad_norm": 1.5459643234522062, "learning_rate": 9.964923198920507e-06, "loss": 0.7628, "step": 2173 }, { "epoch": 0.06662988843937723, "grad_norm": 1.5035653208590243, "learning_rate": 9.964864488144215e-06, "loss": 0.7567, "step": 2174 }, { "epoch": 0.06666053696211842, "grad_norm": 1.504974661031507, "learning_rate": 9.964805728447842e-06, "loss": 0.7644, "step": 2175 }, { "epoch": 0.06669118548485962, "grad_norm": 1.6659481292237945, "learning_rate": 9.964746919831969e-06, "loss": 0.6948, "step": 2176 }, { "epoch": 0.06672183400760083, "grad_norm": 1.5193613696230903, "learning_rate": 9.964688062297173e-06, "loss": 0.7582, "step": 2177 }, { "epoch": 0.06675248253034204, "grad_norm": 1.5745298038147968, "learning_rate": 9.964629155844034e-06, "loss": 0.7691, "step": 2178 }, { "epoch": 0.06678313105308324, "grad_norm": 1.5179006867531017, "learning_rate": 9.964570200473136e-06, "loss": 0.7919, "step": 2179 }, { "epoch": 0.06681377957582445, "grad_norm": 1.333373396825772, "learning_rate": 9.964511196185058e-06, "loss": 0.6081, "step": 2180 }, { "epoch": 0.06684442809856565, "grad_norm": 0.7075424368192897, "learning_rate": 9.964452142980379e-06, "loss": 0.4992, "step": 2181 }, { "epoch": 0.06687507662130686, "grad_norm": 1.6504850254844678, "learning_rate": 9.964393040859683e-06, "loss": 0.7732, "step": 2182 }, { "epoch": 0.06690572514404805, "grad_norm": 1.4676974878440803, "learning_rate": 9.964333889823555e-06, "loss": 0.6487, "step": 2183 }, { "epoch": 0.06693637366678926, "grad_norm": 1.4773732364163847, "learning_rate": 9.964274689872571e-06, "loss": 0.6937, "step": 2184 }, { "epoch": 0.06696702218953046, "grad_norm": 1.4074425395485315, "learning_rate": 9.96421544100732e-06, "loss": 0.7115, "step": 2185 }, { "epoch": 0.06699767071227167, "grad_norm": 1.5671791151949905, "learning_rate": 9.964156143228386e-06, "loss": 0.7401, "step": 2186 }, { "epoch": 0.06702831923501287, "grad_norm": 1.4121622867750387, "learning_rate": 9.964096796536349e-06, "loss": 0.7966, "step": 2187 }, { "epoch": 0.06705896775775408, "grad_norm": 1.665090246459269, "learning_rate": 9.964037400931798e-06, "loss": 0.8635, "step": 2188 }, { "epoch": 0.06708961628049528, "grad_norm": 1.558246693685213, "learning_rate": 9.963977956415315e-06, "loss": 0.7831, "step": 2189 }, { "epoch": 0.06712026480323649, "grad_norm": 1.466033615923509, "learning_rate": 9.963918462987488e-06, "loss": 0.7375, "step": 2190 }, { "epoch": 0.06715091332597768, "grad_norm": 1.335850504870168, "learning_rate": 9.9638589206489e-06, "loss": 0.6336, "step": 2191 }, { "epoch": 0.06718156184871889, "grad_norm": 1.4499797968184351, "learning_rate": 9.963799329400142e-06, "loss": 0.6622, "step": 2192 }, { "epoch": 0.0672122103714601, "grad_norm": 1.4507951266658978, "learning_rate": 9.9637396892418e-06, "loss": 0.7587, "step": 2193 }, { "epoch": 0.0672428588942013, "grad_norm": 1.6805372915915493, "learning_rate": 9.963680000174458e-06, "loss": 0.6987, "step": 2194 }, { "epoch": 0.0672735074169425, "grad_norm": 1.4645254106909897, "learning_rate": 9.96362026219871e-06, "loss": 0.7231, "step": 2195 }, { "epoch": 0.06730415593968371, "grad_norm": 1.5951560139148615, "learning_rate": 9.96356047531514e-06, "loss": 0.7572, "step": 2196 }, { "epoch": 0.06733480446242492, "grad_norm": 1.4925064258331937, "learning_rate": 9.96350063952434e-06, "loss": 0.742, "step": 2197 }, { "epoch": 0.06736545298516612, "grad_norm": 1.5595479616718342, "learning_rate": 9.963440754826897e-06, "loss": 0.7172, "step": 2198 }, { "epoch": 0.06739610150790731, "grad_norm": 2.3115590298901827, "learning_rate": 9.9633808212234e-06, "loss": 0.8114, "step": 2199 }, { "epoch": 0.06742675003064852, "grad_norm": 0.7004493538622574, "learning_rate": 9.963320838714445e-06, "loss": 0.4906, "step": 2200 }, { "epoch": 0.06745739855338972, "grad_norm": 1.4213986844634123, "learning_rate": 9.96326080730062e-06, "loss": 0.8239, "step": 2201 }, { "epoch": 0.06748804707613093, "grad_norm": 1.4546849079212285, "learning_rate": 9.963200726982515e-06, "loss": 0.7847, "step": 2202 }, { "epoch": 0.06751869559887214, "grad_norm": 0.5386427935561255, "learning_rate": 9.963140597760723e-06, "loss": 0.4875, "step": 2203 }, { "epoch": 0.06754934412161334, "grad_norm": 1.6918762531278517, "learning_rate": 9.963080419635838e-06, "loss": 0.744, "step": 2204 }, { "epoch": 0.06757999264435455, "grad_norm": 1.4777190907712032, "learning_rate": 9.963020192608452e-06, "loss": 0.7457, "step": 2205 }, { "epoch": 0.06761064116709574, "grad_norm": 1.4997045825408364, "learning_rate": 9.962959916679158e-06, "loss": 0.835, "step": 2206 }, { "epoch": 0.06764128968983694, "grad_norm": 1.4597602395494962, "learning_rate": 9.962899591848549e-06, "loss": 0.7512, "step": 2207 }, { "epoch": 0.06767193821257815, "grad_norm": 1.6382573127598854, "learning_rate": 9.962839218117222e-06, "loss": 0.7739, "step": 2208 }, { "epoch": 0.06770258673531936, "grad_norm": 1.619871611786346, "learning_rate": 9.962778795485768e-06, "loss": 0.6896, "step": 2209 }, { "epoch": 0.06773323525806056, "grad_norm": 1.6171491737813048, "learning_rate": 9.962718323954787e-06, "loss": 0.6718, "step": 2210 }, { "epoch": 0.06776388378080177, "grad_norm": 0.8014233351026867, "learning_rate": 9.96265780352487e-06, "loss": 0.4785, "step": 2211 }, { "epoch": 0.06779453230354297, "grad_norm": 1.5984726088552175, "learning_rate": 9.962597234196621e-06, "loss": 0.7039, "step": 2212 }, { "epoch": 0.06782518082628418, "grad_norm": 0.6743791542576247, "learning_rate": 9.962536615970626e-06, "loss": 0.4737, "step": 2213 }, { "epoch": 0.06785582934902537, "grad_norm": 1.5527768358629832, "learning_rate": 9.962475948847492e-06, "loss": 0.8081, "step": 2214 }, { "epoch": 0.06788647787176658, "grad_norm": 1.7397623061681808, "learning_rate": 9.962415232827811e-06, "loss": 0.7081, "step": 2215 }, { "epoch": 0.06791712639450778, "grad_norm": 1.6288253062402762, "learning_rate": 9.962354467912183e-06, "loss": 0.6778, "step": 2216 }, { "epoch": 0.06794777491724899, "grad_norm": 1.7124410241438663, "learning_rate": 9.962293654101207e-06, "loss": 0.8059, "step": 2217 }, { "epoch": 0.0679784234399902, "grad_norm": 1.6584733027512866, "learning_rate": 9.962232791395483e-06, "loss": 0.8388, "step": 2218 }, { "epoch": 0.0680090719627314, "grad_norm": 1.607822880463562, "learning_rate": 9.962171879795607e-06, "loss": 0.7459, "step": 2219 }, { "epoch": 0.0680397204854726, "grad_norm": 0.6815736528557949, "learning_rate": 9.962110919302184e-06, "loss": 0.4656, "step": 2220 }, { "epoch": 0.06807036900821381, "grad_norm": 1.368710965260855, "learning_rate": 9.962049909915812e-06, "loss": 0.7812, "step": 2221 }, { "epoch": 0.068101017530955, "grad_norm": 1.6241676946634096, "learning_rate": 9.961988851637094e-06, "loss": 0.6765, "step": 2222 }, { "epoch": 0.06813166605369621, "grad_norm": 1.576210010915642, "learning_rate": 9.961927744466628e-06, "loss": 0.8207, "step": 2223 }, { "epoch": 0.06816231457643741, "grad_norm": 1.6398300104299985, "learning_rate": 9.96186658840502e-06, "loss": 0.8076, "step": 2224 }, { "epoch": 0.06819296309917862, "grad_norm": 1.4675778077456951, "learning_rate": 9.96180538345287e-06, "loss": 0.7424, "step": 2225 }, { "epoch": 0.06822361162191982, "grad_norm": 1.6176477655648553, "learning_rate": 9.961744129610781e-06, "loss": 0.7696, "step": 2226 }, { "epoch": 0.06825426014466103, "grad_norm": 0.5647623076180577, "learning_rate": 9.961682826879359e-06, "loss": 0.5028, "step": 2227 }, { "epoch": 0.06828490866740224, "grad_norm": 1.5334188455381583, "learning_rate": 9.961621475259208e-06, "loss": 0.7192, "step": 2228 }, { "epoch": 0.06831555719014344, "grad_norm": 1.5318792470469798, "learning_rate": 9.961560074750929e-06, "loss": 0.7618, "step": 2229 }, { "epoch": 0.06834620571288463, "grad_norm": 1.6864430740777774, "learning_rate": 9.96149862535513e-06, "loss": 0.6909, "step": 2230 }, { "epoch": 0.06837685423562584, "grad_norm": 1.6957620874921502, "learning_rate": 9.961437127072415e-06, "loss": 0.7368, "step": 2231 }, { "epoch": 0.06840750275836704, "grad_norm": 1.69503263962174, "learning_rate": 9.961375579903392e-06, "loss": 0.8012, "step": 2232 }, { "epoch": 0.06843815128110825, "grad_norm": 1.6522695886121557, "learning_rate": 9.961313983848665e-06, "loss": 0.7441, "step": 2233 }, { "epoch": 0.06846879980384946, "grad_norm": 1.5261072487063139, "learning_rate": 9.96125233890884e-06, "loss": 0.7495, "step": 2234 }, { "epoch": 0.06849944832659066, "grad_norm": 1.60995085468579, "learning_rate": 9.961190645084529e-06, "loss": 0.7468, "step": 2235 }, { "epoch": 0.06853009684933187, "grad_norm": 1.6887762508844755, "learning_rate": 9.961128902376335e-06, "loss": 0.7344, "step": 2236 }, { "epoch": 0.06856074537207306, "grad_norm": 1.4166286740800171, "learning_rate": 9.96106711078487e-06, "loss": 0.8912, "step": 2237 }, { "epoch": 0.06859139389481426, "grad_norm": 1.2850390148526718, "learning_rate": 9.961005270310742e-06, "loss": 0.6608, "step": 2238 }, { "epoch": 0.06862204241755547, "grad_norm": 1.621885493553626, "learning_rate": 9.96094338095456e-06, "loss": 0.7084, "step": 2239 }, { "epoch": 0.06865269094029668, "grad_norm": 1.5059183962353004, "learning_rate": 9.960881442716931e-06, "loss": 0.8406, "step": 2240 }, { "epoch": 0.06868333946303788, "grad_norm": 1.5009294295724855, "learning_rate": 9.96081945559847e-06, "loss": 0.7712, "step": 2241 }, { "epoch": 0.06871398798577909, "grad_norm": 1.596156560230167, "learning_rate": 9.960757419599785e-06, "loss": 0.7892, "step": 2242 }, { "epoch": 0.0687446365085203, "grad_norm": 1.4383442030391265, "learning_rate": 9.960695334721489e-06, "loss": 0.7079, "step": 2243 }, { "epoch": 0.0687752850312615, "grad_norm": 1.498892857869939, "learning_rate": 9.960633200964192e-06, "loss": 0.7677, "step": 2244 }, { "epoch": 0.06880593355400269, "grad_norm": 1.446564458008656, "learning_rate": 9.960571018328505e-06, "loss": 0.7841, "step": 2245 }, { "epoch": 0.0688365820767439, "grad_norm": 1.3465539249173133, "learning_rate": 9.960508786815045e-06, "loss": 0.6842, "step": 2246 }, { "epoch": 0.0688672305994851, "grad_norm": 1.4835517579545012, "learning_rate": 9.96044650642442e-06, "loss": 0.8029, "step": 2247 }, { "epoch": 0.06889787912222631, "grad_norm": 1.4127670497014815, "learning_rate": 9.96038417715725e-06, "loss": 0.7409, "step": 2248 }, { "epoch": 0.06892852764496751, "grad_norm": 1.6130104183239422, "learning_rate": 9.960321799014142e-06, "loss": 0.7617, "step": 2249 }, { "epoch": 0.06895917616770872, "grad_norm": 1.455345175303625, "learning_rate": 9.960259371995715e-06, "loss": 0.6927, "step": 2250 }, { "epoch": 0.06898982469044992, "grad_norm": 1.417954344534955, "learning_rate": 9.960196896102585e-06, "loss": 0.7347, "step": 2251 }, { "epoch": 0.06902047321319113, "grad_norm": 0.7354190713390101, "learning_rate": 9.960134371335364e-06, "loss": 0.5055, "step": 2252 }, { "epoch": 0.06905112173593232, "grad_norm": 2.464260786410831, "learning_rate": 9.960071797694671e-06, "loss": 0.7328, "step": 2253 }, { "epoch": 0.06908177025867353, "grad_norm": 1.5824765668565617, "learning_rate": 9.960009175181122e-06, "loss": 0.7854, "step": 2254 }, { "epoch": 0.06911241878141473, "grad_norm": 1.396539075482736, "learning_rate": 9.959946503795333e-06, "loss": 0.6403, "step": 2255 }, { "epoch": 0.06914306730415594, "grad_norm": 1.687345897424611, "learning_rate": 9.959883783537922e-06, "loss": 0.7292, "step": 2256 }, { "epoch": 0.06917371582689714, "grad_norm": 0.49887067227482823, "learning_rate": 9.959821014409506e-06, "loss": 0.4868, "step": 2257 }, { "epoch": 0.06920436434963835, "grad_norm": 1.4752545880663905, "learning_rate": 9.959758196410705e-06, "loss": 0.7206, "step": 2258 }, { "epoch": 0.06923501287237956, "grad_norm": 1.405964935727789, "learning_rate": 9.959695329542138e-06, "loss": 0.7757, "step": 2259 }, { "epoch": 0.06926566139512076, "grad_norm": 1.6225511198386022, "learning_rate": 9.959632413804424e-06, "loss": 0.8417, "step": 2260 }, { "epoch": 0.06929630991786195, "grad_norm": 1.699315570915936, "learning_rate": 9.959569449198183e-06, "loss": 0.8395, "step": 2261 }, { "epoch": 0.06932695844060316, "grad_norm": 1.518082266258255, "learning_rate": 9.959506435724036e-06, "loss": 0.7422, "step": 2262 }, { "epoch": 0.06935760696334436, "grad_norm": 1.5754059822459061, "learning_rate": 9.959443373382602e-06, "loss": 0.7354, "step": 2263 }, { "epoch": 0.06938825548608557, "grad_norm": 1.5214332059852722, "learning_rate": 9.959380262174502e-06, "loss": 0.7767, "step": 2264 }, { "epoch": 0.06941890400882678, "grad_norm": 1.5737802081582704, "learning_rate": 9.959317102100362e-06, "loss": 0.7342, "step": 2265 }, { "epoch": 0.06944955253156798, "grad_norm": 1.8387332649976296, "learning_rate": 9.9592538931608e-06, "loss": 0.7508, "step": 2266 }, { "epoch": 0.06948020105430919, "grad_norm": 1.4800213989474693, "learning_rate": 9.959190635356441e-06, "loss": 0.8557, "step": 2267 }, { "epoch": 0.06951084957705038, "grad_norm": 1.4187709113650164, "learning_rate": 9.959127328687908e-06, "loss": 0.7196, "step": 2268 }, { "epoch": 0.06954149809979158, "grad_norm": 1.5174554864539616, "learning_rate": 9.959063973155824e-06, "loss": 0.7826, "step": 2269 }, { "epoch": 0.06957214662253279, "grad_norm": 1.4623892329178947, "learning_rate": 9.959000568760815e-06, "loss": 0.7149, "step": 2270 }, { "epoch": 0.069602795145274, "grad_norm": 1.5051705640222712, "learning_rate": 9.958937115503505e-06, "loss": 0.7796, "step": 2271 }, { "epoch": 0.0696334436680152, "grad_norm": 0.7156124129420741, "learning_rate": 9.958873613384516e-06, "loss": 0.485, "step": 2272 }, { "epoch": 0.06966409219075641, "grad_norm": 1.437252707557331, "learning_rate": 9.958810062404479e-06, "loss": 0.7391, "step": 2273 }, { "epoch": 0.06969474071349761, "grad_norm": 1.3226931522025864, "learning_rate": 9.958746462564017e-06, "loss": 0.7041, "step": 2274 }, { "epoch": 0.06972538923623882, "grad_norm": 1.3636997979603214, "learning_rate": 9.958682813863758e-06, "loss": 0.7361, "step": 2275 }, { "epoch": 0.06975603775898001, "grad_norm": 1.5689381961217415, "learning_rate": 9.958619116304327e-06, "loss": 0.7845, "step": 2276 }, { "epoch": 0.06978668628172122, "grad_norm": 1.3130613539847868, "learning_rate": 9.958555369886354e-06, "loss": 0.6884, "step": 2277 }, { "epoch": 0.06981733480446242, "grad_norm": 1.6651384302996424, "learning_rate": 9.958491574610467e-06, "loss": 0.8544, "step": 2278 }, { "epoch": 0.06984798332720363, "grad_norm": 1.945768317732513, "learning_rate": 9.958427730477292e-06, "loss": 0.6758, "step": 2279 }, { "epoch": 0.06987863184994483, "grad_norm": 1.334776730903076, "learning_rate": 9.958363837487462e-06, "loss": 0.7152, "step": 2280 }, { "epoch": 0.06990928037268604, "grad_norm": 0.5901189995578463, "learning_rate": 9.958299895641603e-06, "loss": 0.4783, "step": 2281 }, { "epoch": 0.06993992889542724, "grad_norm": 1.4620058239221703, "learning_rate": 9.958235904940346e-06, "loss": 0.8445, "step": 2282 }, { "epoch": 0.06997057741816845, "grad_norm": 0.5581667318162339, "learning_rate": 9.958171865384322e-06, "loss": 0.4832, "step": 2283 }, { "epoch": 0.07000122594090964, "grad_norm": 1.5048928819588396, "learning_rate": 9.958107776974164e-06, "loss": 0.8315, "step": 2284 }, { "epoch": 0.07003187446365085, "grad_norm": 0.5105370557169423, "learning_rate": 9.958043639710501e-06, "loss": 0.4876, "step": 2285 }, { "epoch": 0.07006252298639205, "grad_norm": 1.4578410310401482, "learning_rate": 9.957979453593964e-06, "loss": 0.6751, "step": 2286 }, { "epoch": 0.07009317150913326, "grad_norm": 1.6805393766477554, "learning_rate": 9.957915218625188e-06, "loss": 0.9074, "step": 2287 }, { "epoch": 0.07012382003187446, "grad_norm": 1.5730866667458743, "learning_rate": 9.957850934804805e-06, "loss": 0.7677, "step": 2288 }, { "epoch": 0.07015446855461567, "grad_norm": 1.5314101124463753, "learning_rate": 9.957786602133448e-06, "loss": 0.6521, "step": 2289 }, { "epoch": 0.07018511707735688, "grad_norm": 1.3711793324683692, "learning_rate": 9.95772222061175e-06, "loss": 0.759, "step": 2290 }, { "epoch": 0.07021576560009808, "grad_norm": 0.6115605172706645, "learning_rate": 9.957657790240347e-06, "loss": 0.4833, "step": 2291 }, { "epoch": 0.07024641412283927, "grad_norm": 1.6099573585640043, "learning_rate": 9.957593311019875e-06, "loss": 0.7728, "step": 2292 }, { "epoch": 0.07027706264558048, "grad_norm": 1.3724962301684203, "learning_rate": 9.957528782950965e-06, "loss": 0.6074, "step": 2293 }, { "epoch": 0.07030771116832168, "grad_norm": 1.9758182656550285, "learning_rate": 9.957464206034258e-06, "loss": 0.8259, "step": 2294 }, { "epoch": 0.07033835969106289, "grad_norm": 1.6432621748048144, "learning_rate": 9.957399580270386e-06, "loss": 0.7142, "step": 2295 }, { "epoch": 0.0703690082138041, "grad_norm": 1.4856900015820431, "learning_rate": 9.957334905659987e-06, "loss": 0.8157, "step": 2296 }, { "epoch": 0.0703996567365453, "grad_norm": 1.6135069333050573, "learning_rate": 9.9572701822037e-06, "loss": 0.7519, "step": 2297 }, { "epoch": 0.07043030525928651, "grad_norm": 1.62088871947506, "learning_rate": 9.957205409902163e-06, "loss": 0.7951, "step": 2298 }, { "epoch": 0.07046095378202771, "grad_norm": 1.3343833951718267, "learning_rate": 9.95714058875601e-06, "loss": 0.7087, "step": 2299 }, { "epoch": 0.0704916023047689, "grad_norm": 0.550817552136799, "learning_rate": 9.957075718765882e-06, "loss": 0.4962, "step": 2300 }, { "epoch": 0.07052225082751011, "grad_norm": 1.5009531716789164, "learning_rate": 9.95701079993242e-06, "loss": 0.8241, "step": 2301 }, { "epoch": 0.07055289935025132, "grad_norm": 1.6226112610911156, "learning_rate": 9.956945832256264e-06, "loss": 0.737, "step": 2302 }, { "epoch": 0.07058354787299252, "grad_norm": 0.49527290088977194, "learning_rate": 9.956880815738051e-06, "loss": 0.4794, "step": 2303 }, { "epoch": 0.07061419639573373, "grad_norm": 1.5807700083120413, "learning_rate": 9.956815750378423e-06, "loss": 0.7466, "step": 2304 }, { "epoch": 0.07064484491847493, "grad_norm": 1.565795917813256, "learning_rate": 9.956750636178021e-06, "loss": 0.7629, "step": 2305 }, { "epoch": 0.07067549344121614, "grad_norm": 1.614523868212177, "learning_rate": 9.956685473137486e-06, "loss": 0.8038, "step": 2306 }, { "epoch": 0.07070614196395733, "grad_norm": 1.7166726489421895, "learning_rate": 9.956620261257462e-06, "loss": 0.7924, "step": 2307 }, { "epoch": 0.07073679048669854, "grad_norm": 0.5103907415763846, "learning_rate": 9.95655500053859e-06, "loss": 0.4937, "step": 2308 }, { "epoch": 0.07076743900943974, "grad_norm": 1.5505432356024322, "learning_rate": 9.956489690981513e-06, "loss": 0.7959, "step": 2309 }, { "epoch": 0.07079808753218095, "grad_norm": 1.549097698734928, "learning_rate": 9.956424332586876e-06, "loss": 0.7813, "step": 2310 }, { "epoch": 0.07082873605492215, "grad_norm": 1.579163687662132, "learning_rate": 9.956358925355321e-06, "loss": 0.6945, "step": 2311 }, { "epoch": 0.07085938457766336, "grad_norm": 1.656670036150392, "learning_rate": 9.956293469287494e-06, "loss": 0.8283, "step": 2312 }, { "epoch": 0.07089003310040456, "grad_norm": 0.4965704180018229, "learning_rate": 9.956227964384038e-06, "loss": 0.4816, "step": 2313 }, { "epoch": 0.07092068162314577, "grad_norm": 0.5350419152640112, "learning_rate": 9.9561624106456e-06, "loss": 0.4846, "step": 2314 }, { "epoch": 0.07095133014588696, "grad_norm": 1.6119520896641555, "learning_rate": 9.956096808072827e-06, "loss": 0.727, "step": 2315 }, { "epoch": 0.07098197866862817, "grad_norm": 1.6432377449364521, "learning_rate": 9.956031156666364e-06, "loss": 0.7423, "step": 2316 }, { "epoch": 0.07101262719136937, "grad_norm": 0.4749535533542855, "learning_rate": 9.955965456426856e-06, "loss": 0.4812, "step": 2317 }, { "epoch": 0.07104327571411058, "grad_norm": 1.570383494758829, "learning_rate": 9.955899707354954e-06, "loss": 0.9048, "step": 2318 }, { "epoch": 0.07107392423685178, "grad_norm": 2.091567580256955, "learning_rate": 9.955833909451304e-06, "loss": 0.882, "step": 2319 }, { "epoch": 0.07110457275959299, "grad_norm": 1.5124392434772669, "learning_rate": 9.955768062716553e-06, "loss": 0.777, "step": 2320 }, { "epoch": 0.0711352212823342, "grad_norm": 1.674564515483792, "learning_rate": 9.955702167151355e-06, "loss": 0.7655, "step": 2321 }, { "epoch": 0.0711658698050754, "grad_norm": 1.7671932445811052, "learning_rate": 9.955636222756353e-06, "loss": 0.8184, "step": 2322 }, { "epoch": 0.0711965183278166, "grad_norm": 1.3457271810530034, "learning_rate": 9.955570229532198e-06, "loss": 0.6989, "step": 2323 }, { "epoch": 0.0712271668505578, "grad_norm": 1.415283018822255, "learning_rate": 9.955504187479542e-06, "loss": 0.6869, "step": 2324 }, { "epoch": 0.071257815373299, "grad_norm": 1.4872945509645354, "learning_rate": 9.955438096599038e-06, "loss": 0.7739, "step": 2325 }, { "epoch": 0.07128846389604021, "grad_norm": 1.531571706797035, "learning_rate": 9.955371956891334e-06, "loss": 0.6943, "step": 2326 }, { "epoch": 0.07131911241878142, "grad_norm": 1.6401191198850842, "learning_rate": 9.95530576835708e-06, "loss": 0.7865, "step": 2327 }, { "epoch": 0.07134976094152262, "grad_norm": 1.6821275204330286, "learning_rate": 9.955239530996932e-06, "loss": 0.7639, "step": 2328 }, { "epoch": 0.07138040946426383, "grad_norm": 1.543797378920591, "learning_rate": 9.95517324481154e-06, "loss": 0.8088, "step": 2329 }, { "epoch": 0.07141105798700503, "grad_norm": 1.3931699417136183, "learning_rate": 9.95510690980156e-06, "loss": 0.6666, "step": 2330 }, { "epoch": 0.07144170650974623, "grad_norm": 0.5428930970915117, "learning_rate": 9.955040525967643e-06, "loss": 0.4688, "step": 2331 }, { "epoch": 0.07147235503248743, "grad_norm": 1.4423120106952252, "learning_rate": 9.954974093310443e-06, "loss": 0.7318, "step": 2332 }, { "epoch": 0.07150300355522864, "grad_norm": 1.6199796532423811, "learning_rate": 9.954907611830615e-06, "loss": 0.8667, "step": 2333 }, { "epoch": 0.07153365207796984, "grad_norm": 1.7154240664312752, "learning_rate": 9.954841081528817e-06, "loss": 0.7789, "step": 2334 }, { "epoch": 0.07156430060071105, "grad_norm": 1.5584008505883817, "learning_rate": 9.954774502405699e-06, "loss": 0.886, "step": 2335 }, { "epoch": 0.07159494912345225, "grad_norm": 1.6994534082892232, "learning_rate": 9.954707874461921e-06, "loss": 0.7563, "step": 2336 }, { "epoch": 0.07162559764619346, "grad_norm": 1.544504692358871, "learning_rate": 9.95464119769814e-06, "loss": 0.7149, "step": 2337 }, { "epoch": 0.07165624616893465, "grad_norm": 1.6944880071250432, "learning_rate": 9.954574472115011e-06, "loss": 0.7712, "step": 2338 }, { "epoch": 0.07168689469167586, "grad_norm": 1.5089466051576883, "learning_rate": 9.954507697713192e-06, "loss": 0.7649, "step": 2339 }, { "epoch": 0.07171754321441706, "grad_norm": 1.454378541422721, "learning_rate": 9.95444087449334e-06, "loss": 0.7491, "step": 2340 }, { "epoch": 0.07174819173715827, "grad_norm": 1.3646120064199574, "learning_rate": 9.954374002456116e-06, "loss": 0.7344, "step": 2341 }, { "epoch": 0.07177884025989947, "grad_norm": 1.4771917157762369, "learning_rate": 9.954307081602176e-06, "loss": 0.6736, "step": 2342 }, { "epoch": 0.07180948878264068, "grad_norm": 1.544109368474685, "learning_rate": 9.954240111932182e-06, "loss": 0.7671, "step": 2343 }, { "epoch": 0.07184013730538188, "grad_norm": 1.5776155488468508, "learning_rate": 9.954173093446792e-06, "loss": 0.8408, "step": 2344 }, { "epoch": 0.07187078582812309, "grad_norm": 1.4438646853980555, "learning_rate": 9.954106026146667e-06, "loss": 0.6693, "step": 2345 }, { "epoch": 0.07190143435086428, "grad_norm": 1.6101333322840656, "learning_rate": 9.954038910032468e-06, "loss": 0.7512, "step": 2346 }, { "epoch": 0.07193208287360549, "grad_norm": 1.7327882642598493, "learning_rate": 9.953971745104855e-06, "loss": 0.803, "step": 2347 }, { "epoch": 0.0719627313963467, "grad_norm": 1.5354505920787913, "learning_rate": 9.95390453136449e-06, "loss": 0.7569, "step": 2348 }, { "epoch": 0.0719933799190879, "grad_norm": 1.4838043340580038, "learning_rate": 9.953837268812039e-06, "loss": 0.6154, "step": 2349 }, { "epoch": 0.0720240284418291, "grad_norm": 1.4500421199837135, "learning_rate": 9.95376995744816e-06, "loss": 0.7151, "step": 2350 }, { "epoch": 0.07205467696457031, "grad_norm": 1.5744530030897534, "learning_rate": 9.95370259727352e-06, "loss": 0.7931, "step": 2351 }, { "epoch": 0.07208532548731152, "grad_norm": 1.5521069867977502, "learning_rate": 9.95363518828878e-06, "loss": 0.7458, "step": 2352 }, { "epoch": 0.07211597401005272, "grad_norm": 0.49669587336637083, "learning_rate": 9.953567730494604e-06, "loss": 0.4819, "step": 2353 }, { "epoch": 0.07214662253279391, "grad_norm": 1.3509973958422652, "learning_rate": 9.953500223891657e-06, "loss": 0.6583, "step": 2354 }, { "epoch": 0.07217727105553512, "grad_norm": 0.5112086128971605, "learning_rate": 9.953432668480607e-06, "loss": 0.4664, "step": 2355 }, { "epoch": 0.07220791957827633, "grad_norm": 1.4918366153978313, "learning_rate": 9.953365064262117e-06, "loss": 0.7806, "step": 2356 }, { "epoch": 0.07223856810101753, "grad_norm": 1.478903474966207, "learning_rate": 9.953297411236853e-06, "loss": 0.7361, "step": 2357 }, { "epoch": 0.07226921662375874, "grad_norm": 1.6244042223129052, "learning_rate": 9.953229709405483e-06, "loss": 0.7457, "step": 2358 }, { "epoch": 0.07229986514649994, "grad_norm": 1.3986306153061534, "learning_rate": 9.953161958768673e-06, "loss": 0.6625, "step": 2359 }, { "epoch": 0.07233051366924115, "grad_norm": 0.5575134365723506, "learning_rate": 9.95309415932709e-06, "loss": 0.5001, "step": 2360 }, { "epoch": 0.07236116219198235, "grad_norm": 1.4915733000708922, "learning_rate": 9.953026311081404e-06, "loss": 0.7202, "step": 2361 }, { "epoch": 0.07239181071472355, "grad_norm": 1.5926380992445512, "learning_rate": 9.952958414032283e-06, "loss": 0.7835, "step": 2362 }, { "epoch": 0.07242245923746475, "grad_norm": 1.4025711219632684, "learning_rate": 9.952890468180396e-06, "loss": 0.7554, "step": 2363 }, { "epoch": 0.07245310776020596, "grad_norm": 1.5563297275718724, "learning_rate": 9.95282247352641e-06, "loss": 0.8154, "step": 2364 }, { "epoch": 0.07248375628294716, "grad_norm": 1.599423302563011, "learning_rate": 9.952754430070997e-06, "loss": 0.6117, "step": 2365 }, { "epoch": 0.07251440480568837, "grad_norm": 1.4149689636608627, "learning_rate": 9.952686337814827e-06, "loss": 0.8239, "step": 2366 }, { "epoch": 0.07254505332842957, "grad_norm": 1.3792179660124886, "learning_rate": 9.952618196758574e-06, "loss": 0.7045, "step": 2367 }, { "epoch": 0.07257570185117078, "grad_norm": 1.7763344267010734, "learning_rate": 9.952550006902905e-06, "loss": 0.6535, "step": 2368 }, { "epoch": 0.07260635037391197, "grad_norm": 1.5721298294150785, "learning_rate": 9.952481768248495e-06, "loss": 0.8082, "step": 2369 }, { "epoch": 0.07263699889665318, "grad_norm": 1.4572914195667632, "learning_rate": 9.952413480796013e-06, "loss": 0.7341, "step": 2370 }, { "epoch": 0.07266764741939438, "grad_norm": 1.7346884793418167, "learning_rate": 9.952345144546135e-06, "loss": 0.7774, "step": 2371 }, { "epoch": 0.07269829594213559, "grad_norm": 1.662493131719946, "learning_rate": 9.952276759499531e-06, "loss": 0.771, "step": 2372 }, { "epoch": 0.0727289444648768, "grad_norm": 1.580579722713544, "learning_rate": 9.95220832565688e-06, "loss": 0.7547, "step": 2373 }, { "epoch": 0.072759592987618, "grad_norm": 0.5316211831383242, "learning_rate": 9.95213984301885e-06, "loss": 0.4883, "step": 2374 }, { "epoch": 0.0727902415103592, "grad_norm": 0.5695758944120248, "learning_rate": 9.952071311586123e-06, "loss": 0.4938, "step": 2375 }, { "epoch": 0.07282089003310041, "grad_norm": 1.414564797506994, "learning_rate": 9.952002731359368e-06, "loss": 0.6887, "step": 2376 }, { "epoch": 0.0728515385558416, "grad_norm": 1.462666469210569, "learning_rate": 9.951934102339263e-06, "loss": 0.7164, "step": 2377 }, { "epoch": 0.07288218707858281, "grad_norm": 1.7775668571252206, "learning_rate": 9.951865424526486e-06, "loss": 0.7998, "step": 2378 }, { "epoch": 0.07291283560132401, "grad_norm": 1.6996127106298209, "learning_rate": 9.95179669792171e-06, "loss": 0.7805, "step": 2379 }, { "epoch": 0.07294348412406522, "grad_norm": 0.5316304801431238, "learning_rate": 9.951727922525615e-06, "loss": 0.4876, "step": 2380 }, { "epoch": 0.07297413264680642, "grad_norm": 1.4322583608409947, "learning_rate": 9.951659098338878e-06, "loss": 0.7079, "step": 2381 }, { "epoch": 0.07300478116954763, "grad_norm": 1.461643156028268, "learning_rate": 9.951590225362176e-06, "loss": 0.8191, "step": 2382 }, { "epoch": 0.07303542969228884, "grad_norm": 1.5364475213541864, "learning_rate": 9.95152130359619e-06, "loss": 0.8208, "step": 2383 }, { "epoch": 0.07306607821503004, "grad_norm": 1.6830754208785157, "learning_rate": 9.951452333041596e-06, "loss": 0.6865, "step": 2384 }, { "epoch": 0.07309672673777123, "grad_norm": 0.4983723043143504, "learning_rate": 9.951383313699077e-06, "loss": 0.4782, "step": 2385 }, { "epoch": 0.07312737526051244, "grad_norm": 1.6986107513151762, "learning_rate": 9.951314245569311e-06, "loss": 0.8361, "step": 2386 }, { "epoch": 0.07315802378325365, "grad_norm": 1.6267098236233484, "learning_rate": 9.951245128652978e-06, "loss": 0.7969, "step": 2387 }, { "epoch": 0.07318867230599485, "grad_norm": 1.40476093855487, "learning_rate": 9.95117596295076e-06, "loss": 0.6935, "step": 2388 }, { "epoch": 0.07321932082873606, "grad_norm": 1.419718983518666, "learning_rate": 9.951106748463339e-06, "loss": 0.752, "step": 2389 }, { "epoch": 0.07324996935147726, "grad_norm": 1.4303846516820158, "learning_rate": 9.951037485191395e-06, "loss": 0.7853, "step": 2390 }, { "epoch": 0.07328061787421847, "grad_norm": 0.5119084169868066, "learning_rate": 9.950968173135614e-06, "loss": 0.5004, "step": 2391 }, { "epoch": 0.07331126639695967, "grad_norm": 1.3494854894684387, "learning_rate": 9.950898812296676e-06, "loss": 0.705, "step": 2392 }, { "epoch": 0.07334191491970087, "grad_norm": 1.8455448578867302, "learning_rate": 9.950829402675264e-06, "loss": 0.7982, "step": 2393 }, { "epoch": 0.07337256344244207, "grad_norm": 0.4835327806636155, "learning_rate": 9.950759944272066e-06, "loss": 0.4865, "step": 2394 }, { "epoch": 0.07340321196518328, "grad_norm": 1.3406067580300478, "learning_rate": 9.950690437087763e-06, "loss": 0.7368, "step": 2395 }, { "epoch": 0.07343386048792448, "grad_norm": 1.6873111208425078, "learning_rate": 9.950620881123039e-06, "loss": 0.8639, "step": 2396 }, { "epoch": 0.07346450901066569, "grad_norm": 1.46732557788398, "learning_rate": 9.950551276378579e-06, "loss": 0.7036, "step": 2397 }, { "epoch": 0.0734951575334069, "grad_norm": 1.5762994318622063, "learning_rate": 9.950481622855073e-06, "loss": 0.8115, "step": 2398 }, { "epoch": 0.0735258060561481, "grad_norm": 1.558871453615948, "learning_rate": 9.950411920553205e-06, "loss": 0.7644, "step": 2399 }, { "epoch": 0.07355645457888929, "grad_norm": 1.4489891503377246, "learning_rate": 9.950342169473661e-06, "loss": 0.797, "step": 2400 }, { "epoch": 0.0735871031016305, "grad_norm": 1.5551813894849296, "learning_rate": 9.950272369617132e-06, "loss": 0.6655, "step": 2401 }, { "epoch": 0.0736177516243717, "grad_norm": 0.5246682743553378, "learning_rate": 9.9502025209843e-06, "loss": 0.4826, "step": 2402 }, { "epoch": 0.07364840014711291, "grad_norm": 1.3890689268778826, "learning_rate": 9.950132623575855e-06, "loss": 0.7916, "step": 2403 }, { "epoch": 0.07367904866985411, "grad_norm": 1.7246515124400885, "learning_rate": 9.950062677392488e-06, "loss": 0.7617, "step": 2404 }, { "epoch": 0.07370969719259532, "grad_norm": 1.4353254146537708, "learning_rate": 9.949992682434887e-06, "loss": 0.7241, "step": 2405 }, { "epoch": 0.07374034571533652, "grad_norm": 0.47563199864881694, "learning_rate": 9.949922638703742e-06, "loss": 0.4789, "step": 2406 }, { "epoch": 0.07377099423807773, "grad_norm": 1.5639774443719316, "learning_rate": 9.949852546199741e-06, "loss": 0.8525, "step": 2407 }, { "epoch": 0.07380164276081892, "grad_norm": 1.4980461204109117, "learning_rate": 9.949782404923579e-06, "loss": 0.7709, "step": 2408 }, { "epoch": 0.07383229128356013, "grad_norm": 1.4903451558256395, "learning_rate": 9.949712214875942e-06, "loss": 0.8231, "step": 2409 }, { "epoch": 0.07386293980630133, "grad_norm": 1.5198152063954642, "learning_rate": 9.949641976057525e-06, "loss": 0.731, "step": 2410 }, { "epoch": 0.07389358832904254, "grad_norm": 1.51867317525501, "learning_rate": 9.94957168846902e-06, "loss": 0.8981, "step": 2411 }, { "epoch": 0.07392423685178375, "grad_norm": 1.3190331935529958, "learning_rate": 9.949501352111118e-06, "loss": 0.7815, "step": 2412 }, { "epoch": 0.07395488537452495, "grad_norm": 1.304699621232466, "learning_rate": 9.949430966984512e-06, "loss": 0.7387, "step": 2413 }, { "epoch": 0.07398553389726616, "grad_norm": 1.5903329420702756, "learning_rate": 9.949360533089898e-06, "loss": 0.8757, "step": 2414 }, { "epoch": 0.07401618242000736, "grad_norm": 1.505747612578389, "learning_rate": 9.949290050427967e-06, "loss": 0.793, "step": 2415 }, { "epoch": 0.07404683094274855, "grad_norm": 0.5318983464310043, "learning_rate": 9.949219518999416e-06, "loss": 0.502, "step": 2416 }, { "epoch": 0.07407747946548976, "grad_norm": 1.5710206880873934, "learning_rate": 9.949148938804938e-06, "loss": 0.7556, "step": 2417 }, { "epoch": 0.07410812798823097, "grad_norm": 1.4647499774260078, "learning_rate": 9.949078309845229e-06, "loss": 0.7778, "step": 2418 }, { "epoch": 0.07413877651097217, "grad_norm": 1.40602683569368, "learning_rate": 9.949007632120986e-06, "loss": 0.7034, "step": 2419 }, { "epoch": 0.07416942503371338, "grad_norm": 1.610382831136568, "learning_rate": 9.948936905632905e-06, "loss": 0.7772, "step": 2420 }, { "epoch": 0.07420007355645458, "grad_norm": 1.4945051916895455, "learning_rate": 9.94886613038168e-06, "loss": 0.7323, "step": 2421 }, { "epoch": 0.07423072207919579, "grad_norm": 1.395511644399987, "learning_rate": 9.948795306368012e-06, "loss": 0.6179, "step": 2422 }, { "epoch": 0.074261370601937, "grad_norm": 1.5991685243835319, "learning_rate": 9.948724433592599e-06, "loss": 0.6929, "step": 2423 }, { "epoch": 0.07429201912467819, "grad_norm": 1.5126153607165622, "learning_rate": 9.948653512056136e-06, "loss": 0.7467, "step": 2424 }, { "epoch": 0.07432266764741939, "grad_norm": 1.3429933526005713, "learning_rate": 9.948582541759324e-06, "loss": 0.7215, "step": 2425 }, { "epoch": 0.0743533161701606, "grad_norm": 0.5135704753584204, "learning_rate": 9.948511522702864e-06, "loss": 0.4932, "step": 2426 }, { "epoch": 0.0743839646929018, "grad_norm": 1.6105665383191592, "learning_rate": 9.94844045488745e-06, "loss": 0.8753, "step": 2427 }, { "epoch": 0.07441461321564301, "grad_norm": 1.3975212137110826, "learning_rate": 9.94836933831379e-06, "loss": 0.7301, "step": 2428 }, { "epoch": 0.07444526173838421, "grad_norm": 1.7605116456976595, "learning_rate": 9.94829817298258e-06, "loss": 0.7658, "step": 2429 }, { "epoch": 0.07447591026112542, "grad_norm": 1.5445142590023533, "learning_rate": 9.94822695889452e-06, "loss": 0.9522, "step": 2430 }, { "epoch": 0.07450655878386661, "grad_norm": 1.4832581238365543, "learning_rate": 9.948155696050316e-06, "loss": 0.8121, "step": 2431 }, { "epoch": 0.07453720730660782, "grad_norm": 1.774479548502581, "learning_rate": 9.948084384450667e-06, "loss": 0.8607, "step": 2432 }, { "epoch": 0.07456785582934902, "grad_norm": 1.4889241362085384, "learning_rate": 9.948013024096277e-06, "loss": 0.857, "step": 2433 }, { "epoch": 0.07459850435209023, "grad_norm": 1.462007232306308, "learning_rate": 9.947941614987848e-06, "loss": 0.7935, "step": 2434 }, { "epoch": 0.07462915287483143, "grad_norm": 3.549465159299555, "learning_rate": 9.947870157126085e-06, "loss": 0.717, "step": 2435 }, { "epoch": 0.07465980139757264, "grad_norm": 1.5528448507071282, "learning_rate": 9.94779865051169e-06, "loss": 0.7343, "step": 2436 }, { "epoch": 0.07469044992031385, "grad_norm": 0.5186111105737298, "learning_rate": 9.947727095145371e-06, "loss": 0.4758, "step": 2437 }, { "epoch": 0.07472109844305505, "grad_norm": 1.4477710220427649, "learning_rate": 9.94765549102783e-06, "loss": 0.7473, "step": 2438 }, { "epoch": 0.07475174696579624, "grad_norm": 1.5020025616836956, "learning_rate": 9.947583838159774e-06, "loss": 0.7596, "step": 2439 }, { "epoch": 0.07478239548853745, "grad_norm": 1.3660602421088988, "learning_rate": 9.947512136541906e-06, "loss": 0.7362, "step": 2440 }, { "epoch": 0.07481304401127865, "grad_norm": 1.3073955528412549, "learning_rate": 9.947440386174938e-06, "loss": 0.6983, "step": 2441 }, { "epoch": 0.07484369253401986, "grad_norm": 1.492893601304882, "learning_rate": 9.947368587059574e-06, "loss": 0.7456, "step": 2442 }, { "epoch": 0.07487434105676107, "grad_norm": 1.487822551542841, "learning_rate": 9.94729673919652e-06, "loss": 0.9289, "step": 2443 }, { "epoch": 0.07490498957950227, "grad_norm": 1.356708832833566, "learning_rate": 9.947224842586484e-06, "loss": 0.6705, "step": 2444 }, { "epoch": 0.07493563810224348, "grad_norm": 1.408523485625938, "learning_rate": 9.947152897230179e-06, "loss": 0.7386, "step": 2445 }, { "epoch": 0.07496628662498468, "grad_norm": 1.507249813090564, "learning_rate": 9.94708090312831e-06, "loss": 0.7646, "step": 2446 }, { "epoch": 0.07499693514772587, "grad_norm": 1.6012931522704223, "learning_rate": 9.947008860281586e-06, "loss": 0.6553, "step": 2447 }, { "epoch": 0.07502758367046708, "grad_norm": 1.738207892513078, "learning_rate": 9.946936768690719e-06, "loss": 0.6639, "step": 2448 }, { "epoch": 0.07505823219320829, "grad_norm": 1.6685354491952977, "learning_rate": 9.946864628356418e-06, "loss": 0.6886, "step": 2449 }, { "epoch": 0.07508888071594949, "grad_norm": 1.4636990548240363, "learning_rate": 9.946792439279393e-06, "loss": 0.6814, "step": 2450 }, { "epoch": 0.0751195292386907, "grad_norm": 1.3882063219572693, "learning_rate": 9.946720201460358e-06, "loss": 0.6872, "step": 2451 }, { "epoch": 0.0751501777614319, "grad_norm": 0.5257119777918124, "learning_rate": 9.946647914900023e-06, "loss": 0.458, "step": 2452 }, { "epoch": 0.07518082628417311, "grad_norm": 1.4984592107408428, "learning_rate": 9.946575579599098e-06, "loss": 0.8152, "step": 2453 }, { "epoch": 0.07521147480691431, "grad_norm": 0.4726953959064538, "learning_rate": 9.946503195558302e-06, "loss": 0.4603, "step": 2454 }, { "epoch": 0.0752421233296555, "grad_norm": 1.5180803377686505, "learning_rate": 9.94643076277834e-06, "loss": 0.8448, "step": 2455 }, { "epoch": 0.07527277185239671, "grad_norm": 1.5670014037417066, "learning_rate": 9.946358281259933e-06, "loss": 0.7535, "step": 2456 }, { "epoch": 0.07530342037513792, "grad_norm": 1.3901653117231947, "learning_rate": 9.946285751003793e-06, "loss": 0.7541, "step": 2457 }, { "epoch": 0.07533406889787912, "grad_norm": 1.6382949439482235, "learning_rate": 9.946213172010633e-06, "loss": 0.7977, "step": 2458 }, { "epoch": 0.07536471742062033, "grad_norm": 1.793892517413945, "learning_rate": 9.946140544281168e-06, "loss": 0.7445, "step": 2459 }, { "epoch": 0.07539536594336153, "grad_norm": 0.5348958126535583, "learning_rate": 9.946067867816116e-06, "loss": 0.4989, "step": 2460 }, { "epoch": 0.07542601446610274, "grad_norm": 1.456498831055893, "learning_rate": 9.945995142616192e-06, "loss": 0.759, "step": 2461 }, { "epoch": 0.07545666298884393, "grad_norm": 0.4805217374554526, "learning_rate": 9.945922368682111e-06, "loss": 0.4747, "step": 2462 }, { "epoch": 0.07548731151158514, "grad_norm": 1.4099650512148059, "learning_rate": 9.945849546014591e-06, "loss": 0.6794, "step": 2463 }, { "epoch": 0.07551796003432634, "grad_norm": 1.5397568732057632, "learning_rate": 9.945776674614353e-06, "loss": 0.7343, "step": 2464 }, { "epoch": 0.07554860855706755, "grad_norm": 0.47850654724966846, "learning_rate": 9.94570375448211e-06, "loss": 0.4683, "step": 2465 }, { "epoch": 0.07557925707980875, "grad_norm": 1.5567104865187114, "learning_rate": 9.945630785618583e-06, "loss": 0.7852, "step": 2466 }, { "epoch": 0.07560990560254996, "grad_norm": 1.5794770947877168, "learning_rate": 9.94555776802449e-06, "loss": 0.7838, "step": 2467 }, { "epoch": 0.07564055412529117, "grad_norm": 0.503255149897054, "learning_rate": 9.94548470170055e-06, "loss": 0.4983, "step": 2468 }, { "epoch": 0.07567120264803237, "grad_norm": 1.584348949670095, "learning_rate": 9.945411586647486e-06, "loss": 0.7368, "step": 2469 }, { "epoch": 0.07570185117077356, "grad_norm": 1.5457914290561485, "learning_rate": 9.945338422866015e-06, "loss": 0.8219, "step": 2470 }, { "epoch": 0.07573249969351477, "grad_norm": 1.5854701013634058, "learning_rate": 9.945265210356858e-06, "loss": 0.8171, "step": 2471 }, { "epoch": 0.07576314821625597, "grad_norm": 1.3339014402705351, "learning_rate": 9.94519194912074e-06, "loss": 0.7426, "step": 2472 }, { "epoch": 0.07579379673899718, "grad_norm": 1.4536414245884155, "learning_rate": 9.94511863915838e-06, "loss": 0.7056, "step": 2473 }, { "epoch": 0.07582444526173839, "grad_norm": 1.5865916997828453, "learning_rate": 9.9450452804705e-06, "loss": 0.8072, "step": 2474 }, { "epoch": 0.07585509378447959, "grad_norm": 1.3931459152820604, "learning_rate": 9.944971873057822e-06, "loss": 0.7913, "step": 2475 }, { "epoch": 0.0758857423072208, "grad_norm": 1.7023560629263834, "learning_rate": 9.944898416921073e-06, "loss": 0.7775, "step": 2476 }, { "epoch": 0.075916390829962, "grad_norm": 1.536804847924542, "learning_rate": 9.944824912060975e-06, "loss": 0.7533, "step": 2477 }, { "epoch": 0.0759470393527032, "grad_norm": 1.4324377152447703, "learning_rate": 9.944751358478253e-06, "loss": 0.7385, "step": 2478 }, { "epoch": 0.0759776878754444, "grad_norm": 1.6896454753925618, "learning_rate": 9.944677756173629e-06, "loss": 0.7343, "step": 2479 }, { "epoch": 0.0760083363981856, "grad_norm": 1.5812116730859545, "learning_rate": 9.944604105147828e-06, "loss": 0.7712, "step": 2480 }, { "epoch": 0.07603898492092681, "grad_norm": 1.5401912136672051, "learning_rate": 9.94453040540158e-06, "loss": 0.688, "step": 2481 }, { "epoch": 0.07606963344366802, "grad_norm": 0.5347699612537112, "learning_rate": 9.944456656935609e-06, "loss": 0.4771, "step": 2482 }, { "epoch": 0.07610028196640922, "grad_norm": 0.5380385388684723, "learning_rate": 9.94438285975064e-06, "loss": 0.486, "step": 2483 }, { "epoch": 0.07613093048915043, "grad_norm": 1.8297787632671714, "learning_rate": 9.944309013847403e-06, "loss": 0.7836, "step": 2484 }, { "epoch": 0.07616157901189163, "grad_norm": 3.434944477732288, "learning_rate": 9.944235119226624e-06, "loss": 0.7667, "step": 2485 }, { "epoch": 0.07619222753463283, "grad_norm": 2.1324102852153652, "learning_rate": 9.944161175889031e-06, "loss": 0.7417, "step": 2486 }, { "epoch": 0.07622287605737403, "grad_norm": 1.4614022662511925, "learning_rate": 9.944087183835353e-06, "loss": 0.73, "step": 2487 }, { "epoch": 0.07625352458011524, "grad_norm": 1.6347076215519958, "learning_rate": 9.944013143066318e-06, "loss": 0.8001, "step": 2488 }, { "epoch": 0.07628417310285644, "grad_norm": 0.5349098376640582, "learning_rate": 9.943939053582659e-06, "loss": 0.4925, "step": 2489 }, { "epoch": 0.07631482162559765, "grad_norm": 0.5313605012768564, "learning_rate": 9.943864915385102e-06, "loss": 0.5047, "step": 2490 }, { "epoch": 0.07634547014833885, "grad_norm": 1.5746465915061323, "learning_rate": 9.943790728474378e-06, "loss": 0.7692, "step": 2491 }, { "epoch": 0.07637611867108006, "grad_norm": 0.5270648203889103, "learning_rate": 9.943716492851221e-06, "loss": 0.5074, "step": 2492 }, { "epoch": 0.07640676719382125, "grad_norm": 1.5326430912754208, "learning_rate": 9.94364220851636e-06, "loss": 0.7145, "step": 2493 }, { "epoch": 0.07643741571656246, "grad_norm": 1.3649790075823107, "learning_rate": 9.943567875470526e-06, "loss": 0.7899, "step": 2494 }, { "epoch": 0.07646806423930366, "grad_norm": 1.5375397558123842, "learning_rate": 9.943493493714453e-06, "loss": 0.8124, "step": 2495 }, { "epoch": 0.07649871276204487, "grad_norm": 0.6133291929283022, "learning_rate": 9.943419063248876e-06, "loss": 0.4689, "step": 2496 }, { "epoch": 0.07652936128478607, "grad_norm": 1.6121814306703375, "learning_rate": 9.943344584074525e-06, "loss": 0.8127, "step": 2497 }, { "epoch": 0.07656000980752728, "grad_norm": 1.291622678976977, "learning_rate": 9.943270056192133e-06, "loss": 0.7108, "step": 2498 }, { "epoch": 0.07659065833026849, "grad_norm": 1.3547416194088366, "learning_rate": 9.94319547960244e-06, "loss": 0.6701, "step": 2499 }, { "epoch": 0.07662130685300969, "grad_norm": 1.4960493165864817, "learning_rate": 9.943120854306175e-06, "loss": 0.6211, "step": 2500 }, { "epoch": 0.07665195537575088, "grad_norm": 1.4629864955127547, "learning_rate": 9.943046180304075e-06, "loss": 0.76, "step": 2501 }, { "epoch": 0.07668260389849209, "grad_norm": 0.534791314138873, "learning_rate": 9.942971457596877e-06, "loss": 0.4849, "step": 2502 }, { "epoch": 0.0767132524212333, "grad_norm": 1.442441020729448, "learning_rate": 9.942896686185317e-06, "loss": 0.7066, "step": 2503 }, { "epoch": 0.0767439009439745, "grad_norm": 1.4797509659111812, "learning_rate": 9.942821866070131e-06, "loss": 0.7432, "step": 2504 }, { "epoch": 0.0767745494667157, "grad_norm": 1.5866974893100807, "learning_rate": 9.942746997252056e-06, "loss": 0.7345, "step": 2505 }, { "epoch": 0.07680519798945691, "grad_norm": 0.4884384283015516, "learning_rate": 9.94267207973183e-06, "loss": 0.4811, "step": 2506 }, { "epoch": 0.07683584651219812, "grad_norm": 1.5456613007914586, "learning_rate": 9.942597113510193e-06, "loss": 0.6905, "step": 2507 }, { "epoch": 0.07686649503493932, "grad_norm": 1.8015446717734385, "learning_rate": 9.94252209858788e-06, "loss": 0.7743, "step": 2508 }, { "epoch": 0.07689714355768051, "grad_norm": 1.4813332781676396, "learning_rate": 9.942447034965634e-06, "loss": 0.7475, "step": 2509 }, { "epoch": 0.07692779208042172, "grad_norm": 1.6164124639811255, "learning_rate": 9.942371922644191e-06, "loss": 0.7015, "step": 2510 }, { "epoch": 0.07695844060316293, "grad_norm": 1.4862420499209672, "learning_rate": 9.942296761624293e-06, "loss": 0.8108, "step": 2511 }, { "epoch": 0.07698908912590413, "grad_norm": 1.4060673558957697, "learning_rate": 9.94222155190668e-06, "loss": 0.583, "step": 2512 }, { "epoch": 0.07701973764864534, "grad_norm": 1.536287515904748, "learning_rate": 9.942146293492097e-06, "loss": 0.7318, "step": 2513 }, { "epoch": 0.07705038617138654, "grad_norm": 1.602013961068316, "learning_rate": 9.942070986381279e-06, "loss": 0.6935, "step": 2514 }, { "epoch": 0.07708103469412775, "grad_norm": 1.4612724846465515, "learning_rate": 9.941995630574972e-06, "loss": 0.676, "step": 2515 }, { "epoch": 0.07711168321686895, "grad_norm": 1.3852986860041157, "learning_rate": 9.941920226073917e-06, "loss": 0.7757, "step": 2516 }, { "epoch": 0.07714233173961015, "grad_norm": 1.3388138307741624, "learning_rate": 9.941844772878857e-06, "loss": 0.7259, "step": 2517 }, { "epoch": 0.07717298026235135, "grad_norm": 1.3005738411441325, "learning_rate": 9.941769270990538e-06, "loss": 0.7288, "step": 2518 }, { "epoch": 0.07720362878509256, "grad_norm": 1.2598389748390852, "learning_rate": 9.941693720409701e-06, "loss": 0.7464, "step": 2519 }, { "epoch": 0.07723427730783376, "grad_norm": 1.4299929356071608, "learning_rate": 9.941618121137091e-06, "loss": 0.7996, "step": 2520 }, { "epoch": 0.07726492583057497, "grad_norm": 1.4881053938862523, "learning_rate": 9.941542473173454e-06, "loss": 0.7143, "step": 2521 }, { "epoch": 0.07729557435331617, "grad_norm": 1.582295604022134, "learning_rate": 9.941466776519535e-06, "loss": 0.758, "step": 2522 }, { "epoch": 0.07732622287605738, "grad_norm": 1.5895543774474208, "learning_rate": 9.941391031176077e-06, "loss": 0.8042, "step": 2523 }, { "epoch": 0.07735687139879857, "grad_norm": 1.464408409021827, "learning_rate": 9.941315237143831e-06, "loss": 0.7455, "step": 2524 }, { "epoch": 0.07738751992153978, "grad_norm": 0.5153912973359209, "learning_rate": 9.941239394423544e-06, "loss": 0.4782, "step": 2525 }, { "epoch": 0.07741816844428098, "grad_norm": 1.440337886464414, "learning_rate": 9.941163503015956e-06, "loss": 0.7282, "step": 2526 }, { "epoch": 0.07744881696702219, "grad_norm": 1.7138514792712938, "learning_rate": 9.941087562921823e-06, "loss": 0.7589, "step": 2527 }, { "epoch": 0.0774794654897634, "grad_norm": 1.4399856292066624, "learning_rate": 9.941011574141888e-06, "loss": 0.7984, "step": 2528 }, { "epoch": 0.0775101140125046, "grad_norm": 1.4796351893407196, "learning_rate": 9.940935536676903e-06, "loss": 0.606, "step": 2529 }, { "epoch": 0.0775407625352458, "grad_norm": 1.5099847158520543, "learning_rate": 9.940859450527615e-06, "loss": 0.7229, "step": 2530 }, { "epoch": 0.07757141105798701, "grad_norm": 0.49643502830167435, "learning_rate": 9.940783315694778e-06, "loss": 0.4797, "step": 2531 }, { "epoch": 0.0776020595807282, "grad_norm": 0.4740858152225164, "learning_rate": 9.940707132179134e-06, "loss": 0.4619, "step": 2532 }, { "epoch": 0.07763270810346941, "grad_norm": 1.646934676185341, "learning_rate": 9.940630899981442e-06, "loss": 0.6496, "step": 2533 }, { "epoch": 0.07766335662621061, "grad_norm": 1.6586621741144396, "learning_rate": 9.940554619102448e-06, "loss": 0.7492, "step": 2534 }, { "epoch": 0.07769400514895182, "grad_norm": 1.477266516889949, "learning_rate": 9.940478289542906e-06, "loss": 0.686, "step": 2535 }, { "epoch": 0.07772465367169303, "grad_norm": 1.396667779670438, "learning_rate": 9.940401911303568e-06, "loss": 0.7271, "step": 2536 }, { "epoch": 0.07775530219443423, "grad_norm": 1.610819381610482, "learning_rate": 9.940325484385185e-06, "loss": 0.8296, "step": 2537 }, { "epoch": 0.07778595071717544, "grad_norm": 1.4574867021651765, "learning_rate": 9.94024900878851e-06, "loss": 0.7857, "step": 2538 }, { "epoch": 0.07781659923991664, "grad_norm": 1.49539461678534, "learning_rate": 9.9401724845143e-06, "loss": 0.7376, "step": 2539 }, { "epoch": 0.07784724776265783, "grad_norm": 1.466520604961041, "learning_rate": 9.940095911563303e-06, "loss": 0.7939, "step": 2540 }, { "epoch": 0.07787789628539904, "grad_norm": 1.5618001379937287, "learning_rate": 9.94001928993628e-06, "loss": 0.7811, "step": 2541 }, { "epoch": 0.07790854480814025, "grad_norm": 1.6538712885787754, "learning_rate": 9.93994261963398e-06, "loss": 0.7585, "step": 2542 }, { "epoch": 0.07793919333088145, "grad_norm": 1.4654576190559585, "learning_rate": 9.939865900657164e-06, "loss": 0.7264, "step": 2543 }, { "epoch": 0.07796984185362266, "grad_norm": 1.5366615044823724, "learning_rate": 9.939789133006586e-06, "loss": 0.7517, "step": 2544 }, { "epoch": 0.07800049037636386, "grad_norm": 1.481815990720105, "learning_rate": 9.939712316682998e-06, "loss": 0.7104, "step": 2545 }, { "epoch": 0.07803113889910507, "grad_norm": 1.6304652429938047, "learning_rate": 9.939635451687164e-06, "loss": 0.8139, "step": 2546 }, { "epoch": 0.07806178742184627, "grad_norm": 1.5617117989498734, "learning_rate": 9.939558538019838e-06, "loss": 0.7581, "step": 2547 }, { "epoch": 0.07809243594458747, "grad_norm": 1.5597891642690347, "learning_rate": 9.939481575681777e-06, "loss": 0.718, "step": 2548 }, { "epoch": 0.07812308446732867, "grad_norm": 1.2617129619170109, "learning_rate": 9.93940456467374e-06, "loss": 0.735, "step": 2549 }, { "epoch": 0.07815373299006988, "grad_norm": 1.536810705424935, "learning_rate": 9.939327504996485e-06, "loss": 0.702, "step": 2550 }, { "epoch": 0.07818438151281108, "grad_norm": 1.6585313521451595, "learning_rate": 9.939250396650775e-06, "loss": 0.8341, "step": 2551 }, { "epoch": 0.07821503003555229, "grad_norm": 1.566536926569754, "learning_rate": 9.939173239637365e-06, "loss": 0.7648, "step": 2552 }, { "epoch": 0.0782456785582935, "grad_norm": 1.6563421186181673, "learning_rate": 9.939096033957019e-06, "loss": 0.762, "step": 2553 }, { "epoch": 0.0782763270810347, "grad_norm": 1.3232560838689942, "learning_rate": 9.939018779610494e-06, "loss": 0.7106, "step": 2554 }, { "epoch": 0.07830697560377589, "grad_norm": 1.5192399738244056, "learning_rate": 9.938941476598554e-06, "loss": 0.7025, "step": 2555 }, { "epoch": 0.0783376241265171, "grad_norm": 1.319385768363203, "learning_rate": 9.938864124921961e-06, "loss": 0.6916, "step": 2556 }, { "epoch": 0.0783682726492583, "grad_norm": 1.631867766156155, "learning_rate": 9.938786724581476e-06, "loss": 0.7446, "step": 2557 }, { "epoch": 0.07839892117199951, "grad_norm": 1.5922637252803642, "learning_rate": 9.93870927557786e-06, "loss": 0.735, "step": 2558 }, { "epoch": 0.07842956969474071, "grad_norm": 1.4643535676661754, "learning_rate": 9.93863177791188e-06, "loss": 0.7233, "step": 2559 }, { "epoch": 0.07846021821748192, "grad_norm": 1.6538594944696787, "learning_rate": 9.938554231584296e-06, "loss": 0.8264, "step": 2560 }, { "epoch": 0.07849086674022313, "grad_norm": 1.6026152223274874, "learning_rate": 9.938476636595874e-06, "loss": 0.8138, "step": 2561 }, { "epoch": 0.07852151526296433, "grad_norm": 1.4729918297284745, "learning_rate": 9.938398992947379e-06, "loss": 0.7651, "step": 2562 }, { "epoch": 0.07855216378570552, "grad_norm": 1.4995945818957837, "learning_rate": 9.938321300639574e-06, "loss": 0.781, "step": 2563 }, { "epoch": 0.07858281230844673, "grad_norm": 1.6056610212302354, "learning_rate": 9.938243559673227e-06, "loss": 0.7858, "step": 2564 }, { "epoch": 0.07861346083118793, "grad_norm": 0.6340723007616479, "learning_rate": 9.938165770049101e-06, "loss": 0.4856, "step": 2565 }, { "epoch": 0.07864410935392914, "grad_norm": 1.4975955106911867, "learning_rate": 9.938087931767965e-06, "loss": 0.7148, "step": 2566 }, { "epoch": 0.07867475787667035, "grad_norm": 0.55056911928298, "learning_rate": 9.938010044830585e-06, "loss": 0.4776, "step": 2567 }, { "epoch": 0.07870540639941155, "grad_norm": 1.4770136350971643, "learning_rate": 9.937932109237729e-06, "loss": 0.7802, "step": 2568 }, { "epoch": 0.07873605492215276, "grad_norm": 0.5139015894769564, "learning_rate": 9.937854124990163e-06, "loss": 0.4793, "step": 2569 }, { "epoch": 0.07876670344489396, "grad_norm": 1.589232709753776, "learning_rate": 9.937776092088659e-06, "loss": 0.7309, "step": 2570 }, { "epoch": 0.07879735196763515, "grad_norm": 1.4682315812552196, "learning_rate": 9.937698010533982e-06, "loss": 0.7002, "step": 2571 }, { "epoch": 0.07882800049037636, "grad_norm": 1.7225176248587513, "learning_rate": 9.937619880326905e-06, "loss": 0.8285, "step": 2572 }, { "epoch": 0.07885864901311757, "grad_norm": 1.5792755371235174, "learning_rate": 9.937541701468194e-06, "loss": 0.8417, "step": 2573 }, { "epoch": 0.07888929753585877, "grad_norm": 1.3536084806540822, "learning_rate": 9.937463473958624e-06, "loss": 0.8094, "step": 2574 }, { "epoch": 0.07891994605859998, "grad_norm": 0.640662019730719, "learning_rate": 9.93738519779896e-06, "loss": 0.4796, "step": 2575 }, { "epoch": 0.07895059458134118, "grad_norm": 1.4480980431418602, "learning_rate": 9.937306872989977e-06, "loss": 0.733, "step": 2576 }, { "epoch": 0.07898124310408239, "grad_norm": 1.7228233224973357, "learning_rate": 9.937228499532448e-06, "loss": 0.8308, "step": 2577 }, { "epoch": 0.0790118916268236, "grad_norm": 1.4710281637853764, "learning_rate": 9.937150077427143e-06, "loss": 0.8015, "step": 2578 }, { "epoch": 0.07904254014956479, "grad_norm": 1.5440302837093036, "learning_rate": 9.937071606674834e-06, "loss": 0.7428, "step": 2579 }, { "epoch": 0.07907318867230599, "grad_norm": 1.485954863091431, "learning_rate": 9.936993087276294e-06, "loss": 0.739, "step": 2580 }, { "epoch": 0.0791038371950472, "grad_norm": 1.3304809685002552, "learning_rate": 9.9369145192323e-06, "loss": 0.7404, "step": 2581 }, { "epoch": 0.0791344857177884, "grad_norm": 1.377352061797896, "learning_rate": 9.936835902543624e-06, "loss": 0.6561, "step": 2582 }, { "epoch": 0.07916513424052961, "grad_norm": 0.5908561070147216, "learning_rate": 9.93675723721104e-06, "loss": 0.4615, "step": 2583 }, { "epoch": 0.07919578276327081, "grad_norm": 1.5711952134141285, "learning_rate": 9.936678523235324e-06, "loss": 0.6977, "step": 2584 }, { "epoch": 0.07922643128601202, "grad_norm": 1.8826332223859563, "learning_rate": 9.936599760617251e-06, "loss": 0.8306, "step": 2585 }, { "epoch": 0.07925707980875321, "grad_norm": 1.7111979016578975, "learning_rate": 9.936520949357599e-06, "loss": 0.7663, "step": 2586 }, { "epoch": 0.07928772833149442, "grad_norm": 0.5001165159710876, "learning_rate": 9.936442089457142e-06, "loss": 0.484, "step": 2587 }, { "epoch": 0.07931837685423562, "grad_norm": 1.5754248792378251, "learning_rate": 9.936363180916657e-06, "loss": 0.7257, "step": 2588 }, { "epoch": 0.07934902537697683, "grad_norm": 1.3224001640307752, "learning_rate": 9.936284223736924e-06, "loss": 0.6977, "step": 2589 }, { "epoch": 0.07937967389971803, "grad_norm": 1.477554292932747, "learning_rate": 9.93620521791872e-06, "loss": 0.6685, "step": 2590 }, { "epoch": 0.07941032242245924, "grad_norm": 1.5679166028564555, "learning_rate": 9.936126163462821e-06, "loss": 0.78, "step": 2591 }, { "epoch": 0.07944097094520045, "grad_norm": 1.5981263281005151, "learning_rate": 9.936047060370008e-06, "loss": 0.8294, "step": 2592 }, { "epoch": 0.07947161946794165, "grad_norm": 1.546038174958897, "learning_rate": 9.935967908641063e-06, "loss": 0.68, "step": 2593 }, { "epoch": 0.07950226799068284, "grad_norm": 1.4557626074312684, "learning_rate": 9.935888708276761e-06, "loss": 0.7428, "step": 2594 }, { "epoch": 0.07953291651342405, "grad_norm": 0.5901937205133966, "learning_rate": 9.935809459277885e-06, "loss": 0.486, "step": 2595 }, { "epoch": 0.07956356503616525, "grad_norm": 1.3042103206371576, "learning_rate": 9.935730161645216e-06, "loss": 0.7355, "step": 2596 }, { "epoch": 0.07959421355890646, "grad_norm": 1.4806318302141734, "learning_rate": 9.935650815379536e-06, "loss": 0.8211, "step": 2597 }, { "epoch": 0.07962486208164767, "grad_norm": 1.4751087939145322, "learning_rate": 9.935571420481625e-06, "loss": 0.8014, "step": 2598 }, { "epoch": 0.07965551060438887, "grad_norm": 1.6983227481676708, "learning_rate": 9.935491976952265e-06, "loss": 0.8012, "step": 2599 }, { "epoch": 0.07968615912713008, "grad_norm": 1.3603309957839875, "learning_rate": 9.935412484792239e-06, "loss": 0.7195, "step": 2600 }, { "epoch": 0.07971680764987128, "grad_norm": 1.4222603963632479, "learning_rate": 9.935332944002333e-06, "loss": 0.7417, "step": 2601 }, { "epoch": 0.07974745617261247, "grad_norm": 0.5060059817905929, "learning_rate": 9.935253354583327e-06, "loss": 0.4868, "step": 2602 }, { "epoch": 0.07977810469535368, "grad_norm": 1.3127346936296074, "learning_rate": 9.935173716536008e-06, "loss": 0.6915, "step": 2603 }, { "epoch": 0.07980875321809489, "grad_norm": 1.6334132156574155, "learning_rate": 9.93509402986116e-06, "loss": 0.7999, "step": 2604 }, { "epoch": 0.07983940174083609, "grad_norm": 1.7138634015762073, "learning_rate": 9.935014294559567e-06, "loss": 0.8252, "step": 2605 }, { "epoch": 0.0798700502635773, "grad_norm": 1.4268232672611696, "learning_rate": 9.934934510632017e-06, "loss": 0.7346, "step": 2606 }, { "epoch": 0.0799006987863185, "grad_norm": 1.5110025452380853, "learning_rate": 9.934854678079293e-06, "loss": 0.763, "step": 2607 }, { "epoch": 0.07993134730905971, "grad_norm": 1.4527233942215403, "learning_rate": 9.934774796902182e-06, "loss": 0.7596, "step": 2608 }, { "epoch": 0.07996199583180091, "grad_norm": 1.7413251182346943, "learning_rate": 9.934694867101475e-06, "loss": 0.6718, "step": 2609 }, { "epoch": 0.0799926443545421, "grad_norm": 1.59353757429756, "learning_rate": 9.934614888677955e-06, "loss": 0.7099, "step": 2610 }, { "epoch": 0.08002329287728331, "grad_norm": 1.4876813392568748, "learning_rate": 9.934534861632413e-06, "loss": 0.792, "step": 2611 }, { "epoch": 0.08005394140002452, "grad_norm": 1.3966255688495501, "learning_rate": 9.934454785965636e-06, "loss": 0.792, "step": 2612 }, { "epoch": 0.08008458992276572, "grad_norm": 1.6723851646673618, "learning_rate": 9.934374661678415e-06, "loss": 0.7579, "step": 2613 }, { "epoch": 0.08011523844550693, "grad_norm": 1.3797383109012915, "learning_rate": 9.934294488771537e-06, "loss": 0.7419, "step": 2614 }, { "epoch": 0.08014588696824813, "grad_norm": 0.5398172762408496, "learning_rate": 9.934214267245792e-06, "loss": 0.454, "step": 2615 }, { "epoch": 0.08017653549098934, "grad_norm": 0.5228504390182536, "learning_rate": 9.934133997101972e-06, "loss": 0.4877, "step": 2616 }, { "epoch": 0.08020718401373053, "grad_norm": 1.6067395852737705, "learning_rate": 9.934053678340867e-06, "loss": 0.8691, "step": 2617 }, { "epoch": 0.08023783253647174, "grad_norm": 1.8633439152244315, "learning_rate": 9.933973310963269e-06, "loss": 0.7097, "step": 2618 }, { "epoch": 0.08026848105921294, "grad_norm": 1.358218353082768, "learning_rate": 9.933892894969968e-06, "loss": 0.6376, "step": 2619 }, { "epoch": 0.08029912958195415, "grad_norm": 0.5139529674827656, "learning_rate": 9.93381243036176e-06, "loss": 0.4932, "step": 2620 }, { "epoch": 0.08032977810469535, "grad_norm": 1.583793065705946, "learning_rate": 9.933731917139436e-06, "loss": 0.797, "step": 2621 }, { "epoch": 0.08036042662743656, "grad_norm": 1.5981562309407575, "learning_rate": 9.933651355303788e-06, "loss": 0.7224, "step": 2622 }, { "epoch": 0.08039107515017777, "grad_norm": 1.586782405859986, "learning_rate": 9.933570744855609e-06, "loss": 0.7105, "step": 2623 }, { "epoch": 0.08042172367291897, "grad_norm": 1.473906580685364, "learning_rate": 9.933490085795698e-06, "loss": 0.7532, "step": 2624 }, { "epoch": 0.08045237219566016, "grad_norm": 1.4437404278912727, "learning_rate": 9.933409378124847e-06, "loss": 0.8453, "step": 2625 }, { "epoch": 0.08048302071840137, "grad_norm": 1.4256819591466605, "learning_rate": 9.93332862184385e-06, "loss": 0.7578, "step": 2626 }, { "epoch": 0.08051366924114257, "grad_norm": 1.474383229386391, "learning_rate": 9.933247816953505e-06, "loss": 0.7898, "step": 2627 }, { "epoch": 0.08054431776388378, "grad_norm": 1.4642352579050695, "learning_rate": 9.933166963454604e-06, "loss": 0.8422, "step": 2628 }, { "epoch": 0.08057496628662499, "grad_norm": 1.6110369317577307, "learning_rate": 9.93308606134795e-06, "loss": 0.8167, "step": 2629 }, { "epoch": 0.08060561480936619, "grad_norm": 1.461075300110989, "learning_rate": 9.933005110634334e-06, "loss": 0.6906, "step": 2630 }, { "epoch": 0.0806362633321074, "grad_norm": 1.5340385291612733, "learning_rate": 9.93292411131456e-06, "loss": 0.8686, "step": 2631 }, { "epoch": 0.0806669118548486, "grad_norm": 1.6285918016480674, "learning_rate": 9.932843063389418e-06, "loss": 0.8065, "step": 2632 }, { "epoch": 0.0806975603775898, "grad_norm": 1.64831640971841, "learning_rate": 9.932761966859716e-06, "loss": 0.7473, "step": 2633 }, { "epoch": 0.080728208900331, "grad_norm": 1.6650876244886905, "learning_rate": 9.932680821726246e-06, "loss": 0.6562, "step": 2634 }, { "epoch": 0.0807588574230722, "grad_norm": 1.3897722907321743, "learning_rate": 9.93259962798981e-06, "loss": 0.7556, "step": 2635 }, { "epoch": 0.08078950594581341, "grad_norm": 1.5023178619113295, "learning_rate": 9.932518385651207e-06, "loss": 0.8697, "step": 2636 }, { "epoch": 0.08082015446855462, "grad_norm": 1.7592983807555744, "learning_rate": 9.932437094711238e-06, "loss": 0.705, "step": 2637 }, { "epoch": 0.08085080299129582, "grad_norm": 1.4992789673409288, "learning_rate": 9.932355755170706e-06, "loss": 0.7353, "step": 2638 }, { "epoch": 0.08088145151403703, "grad_norm": 1.6646488111454054, "learning_rate": 9.93227436703041e-06, "loss": 0.67, "step": 2639 }, { "epoch": 0.08091210003677823, "grad_norm": 1.5444030732344476, "learning_rate": 9.932192930291152e-06, "loss": 0.7384, "step": 2640 }, { "epoch": 0.08094274855951943, "grad_norm": 1.746286988522175, "learning_rate": 9.932111444953735e-06, "loss": 0.7331, "step": 2641 }, { "epoch": 0.08097339708226063, "grad_norm": 0.5530780400361753, "learning_rate": 9.932029911018965e-06, "loss": 0.4643, "step": 2642 }, { "epoch": 0.08100404560500184, "grad_norm": 1.3974274441034387, "learning_rate": 9.931948328487639e-06, "loss": 0.6942, "step": 2643 }, { "epoch": 0.08103469412774304, "grad_norm": 1.5868146480389362, "learning_rate": 9.931866697360566e-06, "loss": 0.6602, "step": 2644 }, { "epoch": 0.08106534265048425, "grad_norm": 1.4848192435030951, "learning_rate": 9.931785017638547e-06, "loss": 0.8224, "step": 2645 }, { "epoch": 0.08109599117322545, "grad_norm": 1.5066471293452317, "learning_rate": 9.931703289322389e-06, "loss": 0.7337, "step": 2646 }, { "epoch": 0.08112663969596666, "grad_norm": 1.4797505649165004, "learning_rate": 9.931621512412897e-06, "loss": 0.705, "step": 2647 }, { "epoch": 0.08115728821870785, "grad_norm": 1.4657367906024266, "learning_rate": 9.931539686910877e-06, "loss": 0.6705, "step": 2648 }, { "epoch": 0.08118793674144906, "grad_norm": 1.6407544610576246, "learning_rate": 9.931457812817135e-06, "loss": 0.7248, "step": 2649 }, { "epoch": 0.08121858526419026, "grad_norm": 1.677391027200713, "learning_rate": 9.931375890132475e-06, "loss": 0.748, "step": 2650 }, { "epoch": 0.08124923378693147, "grad_norm": 1.4273223493025091, "learning_rate": 9.93129391885771e-06, "loss": 0.7727, "step": 2651 }, { "epoch": 0.08127988230967267, "grad_norm": 1.4289654526677935, "learning_rate": 9.93121189899364e-06, "loss": 0.7283, "step": 2652 }, { "epoch": 0.08131053083241388, "grad_norm": 0.5694173329685116, "learning_rate": 9.931129830541082e-06, "loss": 0.4999, "step": 2653 }, { "epoch": 0.08134117935515509, "grad_norm": 1.6880438354243257, "learning_rate": 9.931047713500836e-06, "loss": 0.7934, "step": 2654 }, { "epoch": 0.08137182787789629, "grad_norm": 1.3939363286518132, "learning_rate": 9.930965547873719e-06, "loss": 0.7648, "step": 2655 }, { "epoch": 0.08140247640063748, "grad_norm": 1.6359301632815042, "learning_rate": 9.930883333660535e-06, "loss": 0.751, "step": 2656 }, { "epoch": 0.08143312492337869, "grad_norm": 1.5341803208082885, "learning_rate": 9.930801070862095e-06, "loss": 0.779, "step": 2657 }, { "epoch": 0.0814637734461199, "grad_norm": 1.60875473230396, "learning_rate": 9.930718759479213e-06, "loss": 0.6759, "step": 2658 }, { "epoch": 0.0814944219688611, "grad_norm": 1.6673816879405752, "learning_rate": 9.930636399512694e-06, "loss": 0.709, "step": 2659 }, { "epoch": 0.0815250704916023, "grad_norm": 1.3872706312720455, "learning_rate": 9.930553990963355e-06, "loss": 0.7797, "step": 2660 }, { "epoch": 0.08155571901434351, "grad_norm": 1.5398453075322207, "learning_rate": 9.930471533832005e-06, "loss": 0.7928, "step": 2661 }, { "epoch": 0.08158636753708472, "grad_norm": 1.5741812445561005, "learning_rate": 9.930389028119458e-06, "loss": 0.7971, "step": 2662 }, { "epoch": 0.08161701605982592, "grad_norm": 1.5721128093285388, "learning_rate": 9.930306473826528e-06, "loss": 0.7403, "step": 2663 }, { "epoch": 0.08164766458256711, "grad_norm": 1.4318397631662445, "learning_rate": 9.930223870954025e-06, "loss": 0.6634, "step": 2664 }, { "epoch": 0.08167831310530832, "grad_norm": 1.462703564589482, "learning_rate": 9.930141219502765e-06, "loss": 0.7449, "step": 2665 }, { "epoch": 0.08170896162804953, "grad_norm": 1.5273677736638884, "learning_rate": 9.930058519473562e-06, "loss": 0.7394, "step": 2666 }, { "epoch": 0.08173961015079073, "grad_norm": 1.3018147258059534, "learning_rate": 9.929975770867231e-06, "loss": 0.8173, "step": 2667 }, { "epoch": 0.08177025867353194, "grad_norm": 1.5171995415327573, "learning_rate": 9.92989297368459e-06, "loss": 0.7035, "step": 2668 }, { "epoch": 0.08180090719627314, "grad_norm": 1.4584525800859247, "learning_rate": 9.92981012792645e-06, "loss": 0.782, "step": 2669 }, { "epoch": 0.08183155571901435, "grad_norm": 1.5506560889866128, "learning_rate": 9.929727233593628e-06, "loss": 0.7703, "step": 2670 }, { "epoch": 0.08186220424175555, "grad_norm": 1.6852217898909538, "learning_rate": 9.929644290686945e-06, "loss": 0.8206, "step": 2671 }, { "epoch": 0.08189285276449675, "grad_norm": 1.6053046573596164, "learning_rate": 9.929561299207213e-06, "loss": 0.8263, "step": 2672 }, { "epoch": 0.08192350128723795, "grad_norm": 0.6051337091058484, "learning_rate": 9.929478259155253e-06, "loss": 0.4843, "step": 2673 }, { "epoch": 0.08195414980997916, "grad_norm": 1.3433710355921555, "learning_rate": 9.929395170531883e-06, "loss": 0.5848, "step": 2674 }, { "epoch": 0.08198479833272036, "grad_norm": 1.2977128448644561, "learning_rate": 9.929312033337922e-06, "loss": 0.7262, "step": 2675 }, { "epoch": 0.08201544685546157, "grad_norm": 1.5826883933197975, "learning_rate": 9.929228847574186e-06, "loss": 0.7047, "step": 2676 }, { "epoch": 0.08204609537820277, "grad_norm": 1.5442400970803087, "learning_rate": 9.929145613241497e-06, "loss": 0.7206, "step": 2677 }, { "epoch": 0.08207674390094398, "grad_norm": 1.4887346837378816, "learning_rate": 9.929062330340678e-06, "loss": 0.7295, "step": 2678 }, { "epoch": 0.08210739242368517, "grad_norm": 1.4845587347856082, "learning_rate": 9.928978998872543e-06, "loss": 0.8143, "step": 2679 }, { "epoch": 0.08213804094642638, "grad_norm": 1.685388957139995, "learning_rate": 9.92889561883792e-06, "loss": 0.7719, "step": 2680 }, { "epoch": 0.08216868946916758, "grad_norm": 0.54858881164447, "learning_rate": 9.928812190237627e-06, "loss": 0.464, "step": 2681 }, { "epoch": 0.08219933799190879, "grad_norm": 1.4549744729606928, "learning_rate": 9.928728713072485e-06, "loss": 0.7039, "step": 2682 }, { "epoch": 0.08222998651465, "grad_norm": 1.7310891137845712, "learning_rate": 9.928645187343318e-06, "loss": 0.708, "step": 2683 }, { "epoch": 0.0822606350373912, "grad_norm": 1.5361236023342195, "learning_rate": 9.92856161305095e-06, "loss": 0.7648, "step": 2684 }, { "epoch": 0.0822912835601324, "grad_norm": 1.679266065333836, "learning_rate": 9.928477990196202e-06, "loss": 0.6633, "step": 2685 }, { "epoch": 0.08232193208287361, "grad_norm": 1.4912040057286644, "learning_rate": 9.928394318779901e-06, "loss": 0.8007, "step": 2686 }, { "epoch": 0.0823525806056148, "grad_norm": 0.4782056910266649, "learning_rate": 9.928310598802868e-06, "loss": 0.4701, "step": 2687 }, { "epoch": 0.08238322912835601, "grad_norm": 1.400563696265659, "learning_rate": 9.92822683026593e-06, "loss": 0.6694, "step": 2688 }, { "epoch": 0.08241387765109721, "grad_norm": 1.5820503253886171, "learning_rate": 9.928143013169912e-06, "loss": 0.7291, "step": 2689 }, { "epoch": 0.08244452617383842, "grad_norm": 1.5318479006496064, "learning_rate": 9.928059147515643e-06, "loss": 0.7772, "step": 2690 }, { "epoch": 0.08247517469657963, "grad_norm": 1.3560597534560819, "learning_rate": 9.927975233303943e-06, "loss": 0.652, "step": 2691 }, { "epoch": 0.08250582321932083, "grad_norm": 1.4664367856868827, "learning_rate": 9.927891270535643e-06, "loss": 0.6872, "step": 2692 }, { "epoch": 0.08253647174206204, "grad_norm": 1.2938088914242365, "learning_rate": 9.92780725921157e-06, "loss": 0.6759, "step": 2693 }, { "epoch": 0.08256712026480324, "grad_norm": 1.4245679883584264, "learning_rate": 9.927723199332551e-06, "loss": 0.907, "step": 2694 }, { "epoch": 0.08259776878754443, "grad_norm": 1.6487535322345097, "learning_rate": 9.927639090899415e-06, "loss": 0.6562, "step": 2695 }, { "epoch": 0.08262841731028564, "grad_norm": 1.3504089046594356, "learning_rate": 9.927554933912988e-06, "loss": 0.7543, "step": 2696 }, { "epoch": 0.08265906583302685, "grad_norm": 1.6569654150627449, "learning_rate": 9.927470728374105e-06, "loss": 0.7962, "step": 2697 }, { "epoch": 0.08268971435576805, "grad_norm": 1.3097017147748296, "learning_rate": 9.927386474283589e-06, "loss": 0.5738, "step": 2698 }, { "epoch": 0.08272036287850926, "grad_norm": 1.5448218502255389, "learning_rate": 9.927302171642275e-06, "loss": 0.8069, "step": 2699 }, { "epoch": 0.08275101140125046, "grad_norm": 1.3870929089080573, "learning_rate": 9.927217820450992e-06, "loss": 0.6986, "step": 2700 }, { "epoch": 0.08278165992399167, "grad_norm": 1.6046694807671975, "learning_rate": 9.92713342071057e-06, "loss": 0.708, "step": 2701 }, { "epoch": 0.08281230844673287, "grad_norm": 1.8716090997062966, "learning_rate": 9.927048972421843e-06, "loss": 0.6914, "step": 2702 }, { "epoch": 0.08284295696947407, "grad_norm": 1.4531298500623444, "learning_rate": 9.92696447558564e-06, "loss": 0.6987, "step": 2703 }, { "epoch": 0.08287360549221527, "grad_norm": 1.7763304232120385, "learning_rate": 9.926879930202798e-06, "loss": 0.7862, "step": 2704 }, { "epoch": 0.08290425401495648, "grad_norm": 1.396207942505097, "learning_rate": 9.926795336274146e-06, "loss": 0.7156, "step": 2705 }, { "epoch": 0.08293490253769768, "grad_norm": 0.5237996579516131, "learning_rate": 9.926710693800518e-06, "loss": 0.4751, "step": 2706 }, { "epoch": 0.08296555106043889, "grad_norm": 1.4489410500072872, "learning_rate": 9.92662600278275e-06, "loss": 0.7252, "step": 2707 }, { "epoch": 0.0829961995831801, "grad_norm": 1.5505884516261308, "learning_rate": 9.926541263221676e-06, "loss": 0.8066, "step": 2708 }, { "epoch": 0.0830268481059213, "grad_norm": 1.3789110982023418, "learning_rate": 9.926456475118131e-06, "loss": 0.7274, "step": 2709 }, { "epoch": 0.08305749662866249, "grad_norm": 0.5125120219340414, "learning_rate": 9.926371638472949e-06, "loss": 0.4811, "step": 2710 }, { "epoch": 0.0830881451514037, "grad_norm": 1.4018384938041613, "learning_rate": 9.926286753286966e-06, "loss": 0.8105, "step": 2711 }, { "epoch": 0.0831187936741449, "grad_norm": 1.5087629108464125, "learning_rate": 9.92620181956102e-06, "loss": 0.9169, "step": 2712 }, { "epoch": 0.08314944219688611, "grad_norm": 0.5074110837250257, "learning_rate": 9.926116837295948e-06, "loss": 0.4793, "step": 2713 }, { "epoch": 0.08318009071962731, "grad_norm": 1.4692878079549825, "learning_rate": 9.926031806492584e-06, "loss": 0.7456, "step": 2714 }, { "epoch": 0.08321073924236852, "grad_norm": 1.4542824657374998, "learning_rate": 9.92594672715177e-06, "loss": 0.7837, "step": 2715 }, { "epoch": 0.08324138776510973, "grad_norm": 1.3417697081534081, "learning_rate": 9.925861599274342e-06, "loss": 0.7323, "step": 2716 }, { "epoch": 0.08327203628785093, "grad_norm": 1.4613752420561856, "learning_rate": 9.92577642286114e-06, "loss": 0.7005, "step": 2717 }, { "epoch": 0.08330268481059212, "grad_norm": 1.7351234963710136, "learning_rate": 9.925691197913001e-06, "loss": 0.7966, "step": 2718 }, { "epoch": 0.08333333333333333, "grad_norm": 1.6244342583418028, "learning_rate": 9.925605924430768e-06, "loss": 0.8009, "step": 2719 }, { "epoch": 0.08336398185607453, "grad_norm": 1.561522038422227, "learning_rate": 9.925520602415278e-06, "loss": 0.8613, "step": 2720 }, { "epoch": 0.08339463037881574, "grad_norm": 1.5141986622819479, "learning_rate": 9.925435231867374e-06, "loss": 0.7579, "step": 2721 }, { "epoch": 0.08342527890155695, "grad_norm": 0.5503982914630219, "learning_rate": 9.925349812787897e-06, "loss": 0.4914, "step": 2722 }, { "epoch": 0.08345592742429815, "grad_norm": 1.3624225205328084, "learning_rate": 9.925264345177687e-06, "loss": 0.6936, "step": 2723 }, { "epoch": 0.08348657594703936, "grad_norm": 1.4940795317505744, "learning_rate": 9.925178829037588e-06, "loss": 0.7686, "step": 2724 }, { "epoch": 0.08351722446978056, "grad_norm": 1.5878713639262896, "learning_rate": 9.925093264368441e-06, "loss": 0.8779, "step": 2725 }, { "epoch": 0.08354787299252175, "grad_norm": 1.420474645129122, "learning_rate": 9.925007651171091e-06, "loss": 0.7241, "step": 2726 }, { "epoch": 0.08357852151526296, "grad_norm": 1.74883622338818, "learning_rate": 9.924921989446382e-06, "loss": 0.8057, "step": 2727 }, { "epoch": 0.08360917003800417, "grad_norm": 1.8795646260762562, "learning_rate": 9.924836279195153e-06, "loss": 0.7571, "step": 2728 }, { "epoch": 0.08363981856074537, "grad_norm": 1.7844675588751924, "learning_rate": 9.924750520418254e-06, "loss": 0.695, "step": 2729 }, { "epoch": 0.08367046708348658, "grad_norm": 1.6099238043548754, "learning_rate": 9.924664713116528e-06, "loss": 0.7536, "step": 2730 }, { "epoch": 0.08370111560622778, "grad_norm": 1.4045821179179676, "learning_rate": 9.92457885729082e-06, "loss": 0.7771, "step": 2731 }, { "epoch": 0.08373176412896899, "grad_norm": 1.5565486208568846, "learning_rate": 9.924492952941977e-06, "loss": 0.7315, "step": 2732 }, { "epoch": 0.0837624126517102, "grad_norm": 1.5814699608082405, "learning_rate": 9.924407000070844e-06, "loss": 0.7402, "step": 2733 }, { "epoch": 0.08379306117445139, "grad_norm": 1.5230714521553206, "learning_rate": 9.924320998678271e-06, "loss": 0.7621, "step": 2734 }, { "epoch": 0.08382370969719259, "grad_norm": 1.4167554815623844, "learning_rate": 9.924234948765101e-06, "loss": 0.7782, "step": 2735 }, { "epoch": 0.0838543582199338, "grad_norm": 1.5170564337066397, "learning_rate": 9.924148850332185e-06, "loss": 0.7413, "step": 2736 }, { "epoch": 0.083885006742675, "grad_norm": 1.4265372145379334, "learning_rate": 9.92406270338037e-06, "loss": 0.6973, "step": 2737 }, { "epoch": 0.08391565526541621, "grad_norm": 1.4208967055881074, "learning_rate": 9.923976507910506e-06, "loss": 0.7865, "step": 2738 }, { "epoch": 0.08394630378815741, "grad_norm": 1.4523095921251292, "learning_rate": 9.923890263923443e-06, "loss": 0.8675, "step": 2739 }, { "epoch": 0.08397695231089862, "grad_norm": 1.773998703400149, "learning_rate": 9.923803971420027e-06, "loss": 0.8134, "step": 2740 }, { "epoch": 0.08400760083363981, "grad_norm": 1.438572183552176, "learning_rate": 9.923717630401113e-06, "loss": 0.7859, "step": 2741 }, { "epoch": 0.08403824935638102, "grad_norm": 1.2450794748958163, "learning_rate": 9.923631240867546e-06, "loss": 0.7086, "step": 2742 }, { "epoch": 0.08406889787912222, "grad_norm": 1.319542232618199, "learning_rate": 9.923544802820183e-06, "loss": 0.6748, "step": 2743 }, { "epoch": 0.08409954640186343, "grad_norm": 1.5429247183901984, "learning_rate": 9.923458316259872e-06, "loss": 0.7711, "step": 2744 }, { "epoch": 0.08413019492460463, "grad_norm": 1.6720530716150714, "learning_rate": 9.923371781187468e-06, "loss": 0.7467, "step": 2745 }, { "epoch": 0.08416084344734584, "grad_norm": 1.4368470450918531, "learning_rate": 9.923285197603823e-06, "loss": 0.738, "step": 2746 }, { "epoch": 0.08419149197008705, "grad_norm": 1.4070328021268257, "learning_rate": 9.923198565509787e-06, "loss": 0.7681, "step": 2747 }, { "epoch": 0.08422214049282825, "grad_norm": 1.5249801591195262, "learning_rate": 9.923111884906216e-06, "loss": 0.7223, "step": 2748 }, { "epoch": 0.08425278901556944, "grad_norm": 1.5261357934532835, "learning_rate": 9.923025155793965e-06, "loss": 0.7659, "step": 2749 }, { "epoch": 0.08428343753831065, "grad_norm": 1.5021726680080993, "learning_rate": 9.922938378173887e-06, "loss": 0.7243, "step": 2750 }, { "epoch": 0.08431408606105185, "grad_norm": 2.091133389671578, "learning_rate": 9.922851552046837e-06, "loss": 0.8109, "step": 2751 }, { "epoch": 0.08434473458379306, "grad_norm": 1.4738114556703101, "learning_rate": 9.922764677413672e-06, "loss": 0.7157, "step": 2752 }, { "epoch": 0.08437538310653427, "grad_norm": 1.5988095135936298, "learning_rate": 9.922677754275248e-06, "loss": 0.7309, "step": 2753 }, { "epoch": 0.08440603162927547, "grad_norm": 1.6315096449767523, "learning_rate": 9.922590782632419e-06, "loss": 0.8512, "step": 2754 }, { "epoch": 0.08443668015201668, "grad_norm": 1.437372533943573, "learning_rate": 9.922503762486044e-06, "loss": 0.8118, "step": 2755 }, { "epoch": 0.08446732867475788, "grad_norm": 1.5567907455311718, "learning_rate": 9.92241669383698e-06, "loss": 0.7718, "step": 2756 }, { "epoch": 0.08449797719749907, "grad_norm": 1.6954463428866835, "learning_rate": 9.922329576686084e-06, "loss": 0.7454, "step": 2757 }, { "epoch": 0.08452862572024028, "grad_norm": 1.54604196658586, "learning_rate": 9.922242411034216e-06, "loss": 0.7157, "step": 2758 }, { "epoch": 0.08455927424298149, "grad_norm": 1.2443858201481546, "learning_rate": 9.922155196882234e-06, "loss": 0.7139, "step": 2759 }, { "epoch": 0.08458992276572269, "grad_norm": 0.5522490556580808, "learning_rate": 9.922067934230999e-06, "loss": 0.4896, "step": 2760 }, { "epoch": 0.0846205712884639, "grad_norm": 1.4861968435991773, "learning_rate": 9.921980623081366e-06, "loss": 0.6624, "step": 2761 }, { "epoch": 0.0846512198112051, "grad_norm": 1.3555011663989813, "learning_rate": 9.9218932634342e-06, "loss": 0.6768, "step": 2762 }, { "epoch": 0.08468186833394631, "grad_norm": 1.365021040818122, "learning_rate": 9.921805855290362e-06, "loss": 0.6495, "step": 2763 }, { "epoch": 0.08471251685668751, "grad_norm": 1.2623495934289803, "learning_rate": 9.92171839865071e-06, "loss": 0.6663, "step": 2764 }, { "epoch": 0.0847431653794287, "grad_norm": 1.421039761494757, "learning_rate": 9.921630893516108e-06, "loss": 0.7395, "step": 2765 }, { "epoch": 0.08477381390216991, "grad_norm": 1.4398118254010428, "learning_rate": 9.921543339887419e-06, "loss": 0.7247, "step": 2766 }, { "epoch": 0.08480446242491112, "grad_norm": 0.6047951614278652, "learning_rate": 9.921455737765502e-06, "loss": 0.4987, "step": 2767 }, { "epoch": 0.08483511094765232, "grad_norm": 1.5093339404398534, "learning_rate": 9.921368087151222e-06, "loss": 0.6687, "step": 2768 }, { "epoch": 0.08486575947039353, "grad_norm": 1.591237762936904, "learning_rate": 9.921280388045444e-06, "loss": 0.8365, "step": 2769 }, { "epoch": 0.08489640799313473, "grad_norm": 1.5177517430430643, "learning_rate": 9.92119264044903e-06, "loss": 0.7343, "step": 2770 }, { "epoch": 0.08492705651587594, "grad_norm": 1.6128987024334371, "learning_rate": 9.921104844362849e-06, "loss": 0.763, "step": 2771 }, { "epoch": 0.08495770503861713, "grad_norm": 1.5507256794812616, "learning_rate": 9.921016999787761e-06, "loss": 0.7472, "step": 2772 }, { "epoch": 0.08498835356135834, "grad_norm": 1.836859762843271, "learning_rate": 9.920929106724633e-06, "loss": 0.6838, "step": 2773 }, { "epoch": 0.08501900208409954, "grad_norm": 1.6069133799323643, "learning_rate": 9.92084116517433e-06, "loss": 0.8325, "step": 2774 }, { "epoch": 0.08504965060684075, "grad_norm": 0.5299163493792022, "learning_rate": 9.920753175137723e-06, "loss": 0.4919, "step": 2775 }, { "epoch": 0.08508029912958195, "grad_norm": 1.4550107920361568, "learning_rate": 9.920665136615675e-06, "loss": 0.744, "step": 2776 }, { "epoch": 0.08511094765232316, "grad_norm": 1.2818411080120837, "learning_rate": 9.920577049609054e-06, "loss": 0.6806, "step": 2777 }, { "epoch": 0.08514159617506437, "grad_norm": 0.5106296296876986, "learning_rate": 9.920488914118727e-06, "loss": 0.4822, "step": 2778 }, { "epoch": 0.08517224469780557, "grad_norm": 1.618673396898502, "learning_rate": 9.920400730145566e-06, "loss": 0.7538, "step": 2779 }, { "epoch": 0.08520289322054676, "grad_norm": 0.5168537951346197, "learning_rate": 9.920312497690436e-06, "loss": 0.4907, "step": 2780 }, { "epoch": 0.08523354174328797, "grad_norm": 1.6162038215171084, "learning_rate": 9.92022421675421e-06, "loss": 0.7875, "step": 2781 }, { "epoch": 0.08526419026602917, "grad_norm": 1.3912816161726005, "learning_rate": 9.920135887337754e-06, "loss": 0.863, "step": 2782 }, { "epoch": 0.08529483878877038, "grad_norm": 1.338555007865675, "learning_rate": 9.92004750944194e-06, "loss": 0.7131, "step": 2783 }, { "epoch": 0.08532548731151159, "grad_norm": 1.398480879956688, "learning_rate": 9.919959083067641e-06, "loss": 0.6327, "step": 2784 }, { "epoch": 0.08535613583425279, "grad_norm": 0.5235163563941078, "learning_rate": 9.919870608215726e-06, "loss": 0.4748, "step": 2785 }, { "epoch": 0.085386784356994, "grad_norm": 1.399680929979468, "learning_rate": 9.919782084887066e-06, "loss": 0.7549, "step": 2786 }, { "epoch": 0.0854174328797352, "grad_norm": 1.5782489777808806, "learning_rate": 9.919693513082534e-06, "loss": 0.7895, "step": 2787 }, { "epoch": 0.0854480814024764, "grad_norm": 1.5027586501919161, "learning_rate": 9.919604892803003e-06, "loss": 0.7215, "step": 2788 }, { "epoch": 0.0854787299252176, "grad_norm": 1.3977190443065086, "learning_rate": 9.919516224049348e-06, "loss": 0.7026, "step": 2789 }, { "epoch": 0.0855093784479588, "grad_norm": 1.2550380376137455, "learning_rate": 9.91942750682244e-06, "loss": 0.6857, "step": 2790 }, { "epoch": 0.08554002697070001, "grad_norm": 1.5357781966643607, "learning_rate": 9.919338741123155e-06, "loss": 0.637, "step": 2791 }, { "epoch": 0.08557067549344122, "grad_norm": 1.7587593707860396, "learning_rate": 9.919249926952365e-06, "loss": 0.7408, "step": 2792 }, { "epoch": 0.08560132401618242, "grad_norm": 1.4202371342175206, "learning_rate": 9.919161064310948e-06, "loss": 0.7735, "step": 2793 }, { "epoch": 0.08563197253892363, "grad_norm": 1.4411784231838656, "learning_rate": 9.919072153199778e-06, "loss": 0.7457, "step": 2794 }, { "epoch": 0.08566262106166483, "grad_norm": 0.5138765406085863, "learning_rate": 9.91898319361973e-06, "loss": 0.4735, "step": 2795 }, { "epoch": 0.08569326958440603, "grad_norm": 1.3934354725938163, "learning_rate": 9.918894185571684e-06, "loss": 0.7335, "step": 2796 }, { "epoch": 0.08572391810714723, "grad_norm": 1.357742428842283, "learning_rate": 9.918805129056514e-06, "loss": 0.7417, "step": 2797 }, { "epoch": 0.08575456662988844, "grad_norm": 1.5916610275697929, "learning_rate": 9.9187160240751e-06, "loss": 0.778, "step": 2798 }, { "epoch": 0.08578521515262964, "grad_norm": 1.4107787835975099, "learning_rate": 9.918626870628317e-06, "loss": 0.6844, "step": 2799 }, { "epoch": 0.08581586367537085, "grad_norm": 1.4235483759463023, "learning_rate": 9.918537668717045e-06, "loss": 0.7569, "step": 2800 }, { "epoch": 0.08584651219811205, "grad_norm": 1.4562340200282489, "learning_rate": 9.918448418342164e-06, "loss": 0.8134, "step": 2801 }, { "epoch": 0.08587716072085326, "grad_norm": 1.4583107238090285, "learning_rate": 9.918359119504552e-06, "loss": 0.8075, "step": 2802 }, { "epoch": 0.08590780924359445, "grad_norm": 1.5988937066173365, "learning_rate": 9.918269772205089e-06, "loss": 0.6863, "step": 2803 }, { "epoch": 0.08593845776633566, "grad_norm": 1.4847245784894234, "learning_rate": 9.918180376444655e-06, "loss": 0.7254, "step": 2804 }, { "epoch": 0.08596910628907686, "grad_norm": 1.3946544426377276, "learning_rate": 9.918090932224131e-06, "loss": 0.5894, "step": 2805 }, { "epoch": 0.08599975481181807, "grad_norm": 1.641474053705991, "learning_rate": 9.9180014395444e-06, "loss": 0.81, "step": 2806 }, { "epoch": 0.08603040333455927, "grad_norm": 1.4329855324668972, "learning_rate": 9.917911898406343e-06, "loss": 0.7735, "step": 2807 }, { "epoch": 0.08606105185730048, "grad_norm": 1.6097009945436647, "learning_rate": 9.91782230881084e-06, "loss": 0.7393, "step": 2808 }, { "epoch": 0.08609170038004169, "grad_norm": 1.7599452887021365, "learning_rate": 9.917732670758776e-06, "loss": 0.6802, "step": 2809 }, { "epoch": 0.08612234890278289, "grad_norm": 1.488118782857683, "learning_rate": 9.917642984251034e-06, "loss": 0.7242, "step": 2810 }, { "epoch": 0.08615299742552408, "grad_norm": 1.778194597798602, "learning_rate": 9.9175532492885e-06, "loss": 0.8091, "step": 2811 }, { "epoch": 0.08618364594826529, "grad_norm": 0.530791712827411, "learning_rate": 9.917463465872051e-06, "loss": 0.4768, "step": 2812 }, { "epoch": 0.0862142944710065, "grad_norm": 1.4131558769048096, "learning_rate": 9.91737363400258e-06, "loss": 0.7357, "step": 2813 }, { "epoch": 0.0862449429937477, "grad_norm": 1.4044014845652504, "learning_rate": 9.917283753680966e-06, "loss": 0.8025, "step": 2814 }, { "epoch": 0.0862755915164889, "grad_norm": 0.5066168416465782, "learning_rate": 9.917193824908097e-06, "loss": 0.4797, "step": 2815 }, { "epoch": 0.08630624003923011, "grad_norm": 1.4525888596849126, "learning_rate": 9.91710384768486e-06, "loss": 0.7986, "step": 2816 }, { "epoch": 0.08633688856197132, "grad_norm": 1.4800606709840314, "learning_rate": 9.91701382201214e-06, "loss": 0.8269, "step": 2817 }, { "epoch": 0.08636753708471252, "grad_norm": 1.447116125138701, "learning_rate": 9.916923747890825e-06, "loss": 0.6599, "step": 2818 }, { "epoch": 0.08639818560745371, "grad_norm": 1.4896371871552287, "learning_rate": 9.916833625321804e-06, "loss": 0.8208, "step": 2819 }, { "epoch": 0.08642883413019492, "grad_norm": 1.52492165865455, "learning_rate": 9.91674345430596e-06, "loss": 0.8168, "step": 2820 }, { "epoch": 0.08645948265293613, "grad_norm": 1.6051190380630636, "learning_rate": 9.916653234844188e-06, "loss": 0.755, "step": 2821 }, { "epoch": 0.08649013117567733, "grad_norm": 1.2642302694664935, "learning_rate": 9.916562966937371e-06, "loss": 0.7034, "step": 2822 }, { "epoch": 0.08652077969841854, "grad_norm": 1.5381224523310568, "learning_rate": 9.916472650586404e-06, "loss": 0.6919, "step": 2823 }, { "epoch": 0.08655142822115974, "grad_norm": 0.6361896111229076, "learning_rate": 9.916382285792172e-06, "loss": 0.4858, "step": 2824 }, { "epoch": 0.08658207674390095, "grad_norm": 0.5842446151438543, "learning_rate": 9.916291872555568e-06, "loss": 0.5111, "step": 2825 }, { "epoch": 0.08661272526664215, "grad_norm": 1.4441542259344586, "learning_rate": 9.916201410877481e-06, "loss": 0.7073, "step": 2826 }, { "epoch": 0.08664337378938335, "grad_norm": 1.5477972740437107, "learning_rate": 9.916110900758806e-06, "loss": 0.756, "step": 2827 }, { "epoch": 0.08667402231212455, "grad_norm": 1.5856578375072046, "learning_rate": 9.916020342200432e-06, "loss": 0.8028, "step": 2828 }, { "epoch": 0.08670467083486576, "grad_norm": 1.4219931018259324, "learning_rate": 9.915929735203252e-06, "loss": 0.6366, "step": 2829 }, { "epoch": 0.08673531935760696, "grad_norm": 1.5980312103060845, "learning_rate": 9.915839079768156e-06, "loss": 0.7165, "step": 2830 }, { "epoch": 0.08676596788034817, "grad_norm": 1.417811678646109, "learning_rate": 9.915748375896041e-06, "loss": 0.7672, "step": 2831 }, { "epoch": 0.08679661640308937, "grad_norm": 1.4813610253708196, "learning_rate": 9.9156576235878e-06, "loss": 0.7876, "step": 2832 }, { "epoch": 0.08682726492583058, "grad_norm": 1.4163655949944776, "learning_rate": 9.915566822844326e-06, "loss": 0.7419, "step": 2833 }, { "epoch": 0.08685791344857179, "grad_norm": 0.8805003811644867, "learning_rate": 9.915475973666516e-06, "loss": 0.4901, "step": 2834 }, { "epoch": 0.08688856197131298, "grad_norm": 1.3971804860620516, "learning_rate": 9.915385076055262e-06, "loss": 0.7848, "step": 2835 }, { "epoch": 0.08691921049405418, "grad_norm": 1.6546663200278602, "learning_rate": 9.915294130011461e-06, "loss": 0.7259, "step": 2836 }, { "epoch": 0.08694985901679539, "grad_norm": 1.4487559513205714, "learning_rate": 9.915203135536011e-06, "loss": 0.7311, "step": 2837 }, { "epoch": 0.0869805075395366, "grad_norm": 1.4491534699774293, "learning_rate": 9.915112092629806e-06, "loss": 0.7129, "step": 2838 }, { "epoch": 0.0870111560622778, "grad_norm": 1.4278970553967625, "learning_rate": 9.915021001293743e-06, "loss": 0.6384, "step": 2839 }, { "epoch": 0.087041804585019, "grad_norm": 1.743741416950481, "learning_rate": 9.914929861528722e-06, "loss": 0.7686, "step": 2840 }, { "epoch": 0.08707245310776021, "grad_norm": 1.5412346725650266, "learning_rate": 9.914838673335639e-06, "loss": 0.7084, "step": 2841 }, { "epoch": 0.0871031016305014, "grad_norm": 1.5868210661567832, "learning_rate": 9.914747436715394e-06, "loss": 0.7932, "step": 2842 }, { "epoch": 0.08713375015324261, "grad_norm": 1.3856313132748963, "learning_rate": 9.914656151668884e-06, "loss": 0.8074, "step": 2843 }, { "epoch": 0.08716439867598381, "grad_norm": 1.286030530072747, "learning_rate": 9.914564818197008e-06, "loss": 0.7159, "step": 2844 }, { "epoch": 0.08719504719872502, "grad_norm": 1.7007822794727474, "learning_rate": 9.914473436300668e-06, "loss": 0.7694, "step": 2845 }, { "epoch": 0.08722569572146623, "grad_norm": 1.4662796573254422, "learning_rate": 9.914382005980766e-06, "loss": 0.6671, "step": 2846 }, { "epoch": 0.08725634424420743, "grad_norm": 1.59382745706142, "learning_rate": 9.9142905272382e-06, "loss": 0.7134, "step": 2847 }, { "epoch": 0.08728699276694864, "grad_norm": 1.5523570293326758, "learning_rate": 9.914199000073871e-06, "loss": 0.7868, "step": 2848 }, { "epoch": 0.08731764128968984, "grad_norm": 0.6024929159128435, "learning_rate": 9.91410742448868e-06, "loss": 0.4871, "step": 2849 }, { "epoch": 0.08734828981243103, "grad_norm": 1.3943534731695413, "learning_rate": 9.914015800483536e-06, "loss": 0.7208, "step": 2850 }, { "epoch": 0.08737893833517224, "grad_norm": 1.4676871607958677, "learning_rate": 9.913924128059334e-06, "loss": 0.6575, "step": 2851 }, { "epoch": 0.08740958685791345, "grad_norm": 1.5532900905838518, "learning_rate": 9.91383240721698e-06, "loss": 0.7569, "step": 2852 }, { "epoch": 0.08744023538065465, "grad_norm": 1.4843126156567763, "learning_rate": 9.91374063795738e-06, "loss": 0.7874, "step": 2853 }, { "epoch": 0.08747088390339586, "grad_norm": 1.4307366147862928, "learning_rate": 9.913648820281435e-06, "loss": 0.6741, "step": 2854 }, { "epoch": 0.08750153242613706, "grad_norm": 0.5231305756574459, "learning_rate": 9.913556954190051e-06, "loss": 0.4874, "step": 2855 }, { "epoch": 0.08753218094887827, "grad_norm": 1.3962473363345618, "learning_rate": 9.913465039684134e-06, "loss": 0.7846, "step": 2856 }, { "epoch": 0.08756282947161947, "grad_norm": 1.390827366707527, "learning_rate": 9.913373076764587e-06, "loss": 0.6463, "step": 2857 }, { "epoch": 0.08759347799436067, "grad_norm": 1.575068099782183, "learning_rate": 9.913281065432318e-06, "loss": 0.7728, "step": 2858 }, { "epoch": 0.08762412651710187, "grad_norm": 1.4979696152789093, "learning_rate": 9.913189005688235e-06, "loss": 0.8261, "step": 2859 }, { "epoch": 0.08765477503984308, "grad_norm": 1.45132224675601, "learning_rate": 9.913096897533244e-06, "loss": 0.8211, "step": 2860 }, { "epoch": 0.08768542356258428, "grad_norm": 1.5625698448323613, "learning_rate": 9.913004740968251e-06, "loss": 0.7833, "step": 2861 }, { "epoch": 0.08771607208532549, "grad_norm": 1.5460150833499937, "learning_rate": 9.912912535994166e-06, "loss": 0.7547, "step": 2862 }, { "epoch": 0.0877467206080667, "grad_norm": 1.7020489107602612, "learning_rate": 9.912820282611896e-06, "loss": 0.7636, "step": 2863 }, { "epoch": 0.0877773691308079, "grad_norm": 1.4697981080263607, "learning_rate": 9.912727980822352e-06, "loss": 0.7871, "step": 2864 }, { "epoch": 0.0878080176535491, "grad_norm": 1.6176763954175946, "learning_rate": 9.91263563062644e-06, "loss": 0.8028, "step": 2865 }, { "epoch": 0.0878386661762903, "grad_norm": 0.5736940396350838, "learning_rate": 9.912543232025074e-06, "loss": 0.467, "step": 2866 }, { "epoch": 0.0878693146990315, "grad_norm": 1.3085998750251673, "learning_rate": 9.912450785019162e-06, "loss": 0.7299, "step": 2867 }, { "epoch": 0.08789996322177271, "grad_norm": 1.4558127205612772, "learning_rate": 9.912358289609616e-06, "loss": 0.7537, "step": 2868 }, { "epoch": 0.08793061174451391, "grad_norm": 0.5076066351130126, "learning_rate": 9.912265745797347e-06, "loss": 0.4529, "step": 2869 }, { "epoch": 0.08796126026725512, "grad_norm": 1.4128824207471886, "learning_rate": 9.912173153583266e-06, "loss": 0.7735, "step": 2870 }, { "epoch": 0.08799190878999633, "grad_norm": 1.4726537047634547, "learning_rate": 9.912080512968286e-06, "loss": 0.765, "step": 2871 }, { "epoch": 0.08802255731273753, "grad_norm": 1.3210850868435324, "learning_rate": 9.91198782395332e-06, "loss": 0.786, "step": 2872 }, { "epoch": 0.08805320583547872, "grad_norm": 0.49606390955713847, "learning_rate": 9.911895086539281e-06, "loss": 0.4805, "step": 2873 }, { "epoch": 0.08808385435821993, "grad_norm": 1.9033231542348814, "learning_rate": 9.911802300727084e-06, "loss": 0.7655, "step": 2874 }, { "epoch": 0.08811450288096113, "grad_norm": 1.480527674441038, "learning_rate": 9.911709466517641e-06, "loss": 0.7561, "step": 2875 }, { "epoch": 0.08814515140370234, "grad_norm": 1.4253573805942705, "learning_rate": 9.91161658391187e-06, "loss": 0.6951, "step": 2876 }, { "epoch": 0.08817579992644355, "grad_norm": 1.5956817831247139, "learning_rate": 9.911523652910681e-06, "loss": 0.7703, "step": 2877 }, { "epoch": 0.08820644844918475, "grad_norm": 1.5071997347801847, "learning_rate": 9.911430673514994e-06, "loss": 0.7857, "step": 2878 }, { "epoch": 0.08823709697192596, "grad_norm": 0.48211699586790513, "learning_rate": 9.911337645725725e-06, "loss": 0.4852, "step": 2879 }, { "epoch": 0.08826774549466716, "grad_norm": 1.4545576748222309, "learning_rate": 9.91124456954379e-06, "loss": 0.6817, "step": 2880 }, { "epoch": 0.08829839401740835, "grad_norm": 1.4382167991332955, "learning_rate": 9.911151444970104e-06, "loss": 0.8098, "step": 2881 }, { "epoch": 0.08832904254014956, "grad_norm": 1.4829611938930212, "learning_rate": 9.911058272005587e-06, "loss": 0.6462, "step": 2882 }, { "epoch": 0.08835969106289077, "grad_norm": 1.4869033262683393, "learning_rate": 9.910965050651155e-06, "loss": 0.8079, "step": 2883 }, { "epoch": 0.08839033958563197, "grad_norm": 1.3559065691428926, "learning_rate": 9.910871780907729e-06, "loss": 0.7616, "step": 2884 }, { "epoch": 0.08842098810837318, "grad_norm": 1.4357735223921257, "learning_rate": 9.910778462776227e-06, "loss": 0.7851, "step": 2885 }, { "epoch": 0.08845163663111438, "grad_norm": 1.376269315075562, "learning_rate": 9.910685096257568e-06, "loss": 0.6693, "step": 2886 }, { "epoch": 0.08848228515385559, "grad_norm": 1.3228811504480371, "learning_rate": 9.910591681352673e-06, "loss": 0.7041, "step": 2887 }, { "epoch": 0.0885129336765968, "grad_norm": 0.5268660274310838, "learning_rate": 9.910498218062461e-06, "loss": 0.4821, "step": 2888 }, { "epoch": 0.08854358219933799, "grad_norm": 1.768513290316746, "learning_rate": 9.910404706387853e-06, "loss": 0.711, "step": 2889 }, { "epoch": 0.08857423072207919, "grad_norm": 1.4638456271378535, "learning_rate": 9.910311146329772e-06, "loss": 0.7473, "step": 2890 }, { "epoch": 0.0886048792448204, "grad_norm": 1.5138085670760963, "learning_rate": 9.910217537889139e-06, "loss": 0.7245, "step": 2891 }, { "epoch": 0.0886355277675616, "grad_norm": 1.332628899282107, "learning_rate": 9.910123881066875e-06, "loss": 0.6114, "step": 2892 }, { "epoch": 0.08866617629030281, "grad_norm": 0.5018347683443196, "learning_rate": 9.910030175863905e-06, "loss": 0.495, "step": 2893 }, { "epoch": 0.08869682481304401, "grad_norm": 1.6684257081355818, "learning_rate": 9.909936422281152e-06, "loss": 0.7791, "step": 2894 }, { "epoch": 0.08872747333578522, "grad_norm": 1.4062069207197023, "learning_rate": 9.909842620319539e-06, "loss": 0.7698, "step": 2895 }, { "epoch": 0.08875812185852643, "grad_norm": 0.48707729888222284, "learning_rate": 9.90974876997999e-06, "loss": 0.4865, "step": 2896 }, { "epoch": 0.08878877038126762, "grad_norm": 1.4331649630937127, "learning_rate": 9.90965487126343e-06, "loss": 0.7461, "step": 2897 }, { "epoch": 0.08881941890400882, "grad_norm": 1.3488321891109183, "learning_rate": 9.909560924170784e-06, "loss": 0.7466, "step": 2898 }, { "epoch": 0.08885006742675003, "grad_norm": 1.5999436588709322, "learning_rate": 9.90946692870298e-06, "loss": 0.7255, "step": 2899 }, { "epoch": 0.08888071594949123, "grad_norm": 1.5705334078089224, "learning_rate": 9.90937288486094e-06, "loss": 0.7688, "step": 2900 }, { "epoch": 0.08891136447223244, "grad_norm": 0.48720849835970936, "learning_rate": 9.909278792645594e-06, "loss": 0.478, "step": 2901 }, { "epoch": 0.08894201299497365, "grad_norm": 0.5501611142056031, "learning_rate": 9.909184652057866e-06, "loss": 0.5094, "step": 2902 }, { "epoch": 0.08897266151771485, "grad_norm": 1.5719018324113412, "learning_rate": 9.909090463098688e-06, "loss": 0.6962, "step": 2903 }, { "epoch": 0.08900331004045604, "grad_norm": 1.4002652338741892, "learning_rate": 9.908996225768985e-06, "loss": 0.6594, "step": 2904 }, { "epoch": 0.08903395856319725, "grad_norm": 0.5097019582678117, "learning_rate": 9.908901940069686e-06, "loss": 0.479, "step": 2905 }, { "epoch": 0.08906460708593845, "grad_norm": 1.728619537164475, "learning_rate": 9.908807606001721e-06, "loss": 0.8753, "step": 2906 }, { "epoch": 0.08909525560867966, "grad_norm": 1.52089484704496, "learning_rate": 9.908713223566018e-06, "loss": 0.8372, "step": 2907 }, { "epoch": 0.08912590413142087, "grad_norm": 0.5075747739302641, "learning_rate": 9.908618792763507e-06, "loss": 0.475, "step": 2908 }, { "epoch": 0.08915655265416207, "grad_norm": 1.3711813731732916, "learning_rate": 9.90852431359512e-06, "loss": 0.6877, "step": 2909 }, { "epoch": 0.08918720117690328, "grad_norm": 1.6081634088909933, "learning_rate": 9.908429786061787e-06, "loss": 0.7356, "step": 2910 }, { "epoch": 0.08921784969964448, "grad_norm": 1.403741384190338, "learning_rate": 9.908335210164438e-06, "loss": 0.696, "step": 2911 }, { "epoch": 0.08924849822238567, "grad_norm": 1.438518131328809, "learning_rate": 9.908240585904008e-06, "loss": 0.7167, "step": 2912 }, { "epoch": 0.08927914674512688, "grad_norm": 1.2642541475205045, "learning_rate": 9.908145913281426e-06, "loss": 0.6713, "step": 2913 }, { "epoch": 0.08930979526786809, "grad_norm": 1.429820023861784, "learning_rate": 9.908051192297628e-06, "loss": 0.7791, "step": 2914 }, { "epoch": 0.08934044379060929, "grad_norm": 1.4143397133517066, "learning_rate": 9.907956422953546e-06, "loss": 0.7035, "step": 2915 }, { "epoch": 0.0893710923133505, "grad_norm": 1.4732546432857732, "learning_rate": 9.907861605250114e-06, "loss": 0.7406, "step": 2916 }, { "epoch": 0.0894017408360917, "grad_norm": 1.3713936174983357, "learning_rate": 9.907766739188264e-06, "loss": 0.7844, "step": 2917 }, { "epoch": 0.08943238935883291, "grad_norm": 1.441091433357446, "learning_rate": 9.907671824768933e-06, "loss": 0.7947, "step": 2918 }, { "epoch": 0.08946303788157411, "grad_norm": 0.6695903191405784, "learning_rate": 9.907576861993056e-06, "loss": 0.4871, "step": 2919 }, { "epoch": 0.0894936864043153, "grad_norm": 1.5422327111800145, "learning_rate": 9.90748185086157e-06, "loss": 0.7103, "step": 2920 }, { "epoch": 0.08952433492705651, "grad_norm": 1.4568900321242424, "learning_rate": 9.907386791375408e-06, "loss": 0.8104, "step": 2921 }, { "epoch": 0.08955498344979772, "grad_norm": 1.5911523261793346, "learning_rate": 9.90729168353551e-06, "loss": 0.8088, "step": 2922 }, { "epoch": 0.08958563197253892, "grad_norm": 1.5675627545582602, "learning_rate": 9.907196527342809e-06, "loss": 0.7133, "step": 2923 }, { "epoch": 0.08961628049528013, "grad_norm": 1.3866452093231942, "learning_rate": 9.907101322798247e-06, "loss": 0.7685, "step": 2924 }, { "epoch": 0.08964692901802133, "grad_norm": 1.4620473803736584, "learning_rate": 9.90700606990276e-06, "loss": 0.7285, "step": 2925 }, { "epoch": 0.08967757754076254, "grad_norm": 2.29674032238358, "learning_rate": 9.906910768657286e-06, "loss": 0.7598, "step": 2926 }, { "epoch": 0.08970822606350375, "grad_norm": 1.5380642965160303, "learning_rate": 9.906815419062763e-06, "loss": 0.7935, "step": 2927 }, { "epoch": 0.08973887458624494, "grad_norm": 0.5290394299350963, "learning_rate": 9.906720021120136e-06, "loss": 0.4779, "step": 2928 }, { "epoch": 0.08976952310898614, "grad_norm": 1.5466418710069425, "learning_rate": 9.90662457483034e-06, "loss": 0.775, "step": 2929 }, { "epoch": 0.08980017163172735, "grad_norm": 1.2790661151728941, "learning_rate": 9.906529080194315e-06, "loss": 0.7209, "step": 2930 }, { "epoch": 0.08983082015446855, "grad_norm": 1.150675944994273, "learning_rate": 9.906433537213006e-06, "loss": 0.7095, "step": 2931 }, { "epoch": 0.08986146867720976, "grad_norm": 1.3888288200306362, "learning_rate": 9.90633794588735e-06, "loss": 0.7244, "step": 2932 }, { "epoch": 0.08989211719995097, "grad_norm": 1.4043506960631356, "learning_rate": 9.90624230621829e-06, "loss": 0.7278, "step": 2933 }, { "epoch": 0.08992276572269217, "grad_norm": 0.45802387025417285, "learning_rate": 9.906146618206772e-06, "loss": 0.4637, "step": 2934 }, { "epoch": 0.08995341424543336, "grad_norm": 0.5000711820727096, "learning_rate": 9.906050881853735e-06, "loss": 0.4691, "step": 2935 }, { "epoch": 0.08998406276817457, "grad_norm": 1.4757244197107768, "learning_rate": 9.905955097160122e-06, "loss": 0.6952, "step": 2936 }, { "epoch": 0.09001471129091577, "grad_norm": 1.3889398084601015, "learning_rate": 9.90585926412688e-06, "loss": 0.8172, "step": 2937 }, { "epoch": 0.09004535981365698, "grad_norm": 0.4919987462684009, "learning_rate": 9.90576338275495e-06, "loss": 0.4725, "step": 2938 }, { "epoch": 0.09007600833639819, "grad_norm": 1.4157283842016877, "learning_rate": 9.90566745304528e-06, "loss": 0.7427, "step": 2939 }, { "epoch": 0.09010665685913939, "grad_norm": 1.435514079913553, "learning_rate": 9.905571474998812e-06, "loss": 0.748, "step": 2940 }, { "epoch": 0.0901373053818806, "grad_norm": 1.3522068005504357, "learning_rate": 9.905475448616493e-06, "loss": 0.7147, "step": 2941 }, { "epoch": 0.0901679539046218, "grad_norm": 1.4417654169217835, "learning_rate": 9.90537937389927e-06, "loss": 0.7616, "step": 2942 }, { "epoch": 0.090198602427363, "grad_norm": 1.4359833847491608, "learning_rate": 9.905283250848089e-06, "loss": 0.8192, "step": 2943 }, { "epoch": 0.0902292509501042, "grad_norm": 1.306474366397103, "learning_rate": 9.905187079463895e-06, "loss": 0.7167, "step": 2944 }, { "epoch": 0.0902598994728454, "grad_norm": 0.5086491865220076, "learning_rate": 9.90509085974764e-06, "loss": 0.4811, "step": 2945 }, { "epoch": 0.09029054799558661, "grad_norm": 1.4956129941891876, "learning_rate": 9.90499459170027e-06, "loss": 0.7236, "step": 2946 }, { "epoch": 0.09032119651832782, "grad_norm": 0.5025781450834621, "learning_rate": 9.904898275322734e-06, "loss": 0.4816, "step": 2947 }, { "epoch": 0.09035184504106902, "grad_norm": 1.6212406972872213, "learning_rate": 9.904801910615978e-06, "loss": 0.6596, "step": 2948 }, { "epoch": 0.09038249356381023, "grad_norm": 2.050345889664281, "learning_rate": 9.904705497580954e-06, "loss": 0.7408, "step": 2949 }, { "epoch": 0.09041314208655143, "grad_norm": 1.5715555817703935, "learning_rate": 9.904609036218613e-06, "loss": 0.8153, "step": 2950 }, { "epoch": 0.09044379060929263, "grad_norm": 1.4595486393118624, "learning_rate": 9.904512526529904e-06, "loss": 0.855, "step": 2951 }, { "epoch": 0.09047443913203383, "grad_norm": 1.4243526021076791, "learning_rate": 9.904415968515777e-06, "loss": 0.6073, "step": 2952 }, { "epoch": 0.09050508765477504, "grad_norm": 1.4001956598926262, "learning_rate": 9.904319362177186e-06, "loss": 0.6959, "step": 2953 }, { "epoch": 0.09053573617751624, "grad_norm": 1.647176043298318, "learning_rate": 9.90422270751508e-06, "loss": 0.6929, "step": 2954 }, { "epoch": 0.09056638470025745, "grad_norm": 1.503292225627756, "learning_rate": 9.904126004530415e-06, "loss": 0.7385, "step": 2955 }, { "epoch": 0.09059703322299865, "grad_norm": 1.5642854706452942, "learning_rate": 9.904029253224142e-06, "loss": 0.7378, "step": 2956 }, { "epoch": 0.09062768174573986, "grad_norm": 1.5853483187349995, "learning_rate": 9.903932453597212e-06, "loss": 0.7437, "step": 2957 }, { "epoch": 0.09065833026848107, "grad_norm": 1.5447148997876075, "learning_rate": 9.90383560565058e-06, "loss": 0.718, "step": 2958 }, { "epoch": 0.09068897879122226, "grad_norm": 1.4645479378050608, "learning_rate": 9.903738709385203e-06, "loss": 0.6739, "step": 2959 }, { "epoch": 0.09071962731396346, "grad_norm": 1.5205844978876812, "learning_rate": 9.903641764802033e-06, "loss": 0.7125, "step": 2960 }, { "epoch": 0.09075027583670467, "grad_norm": 0.6057345212089691, "learning_rate": 9.903544771902027e-06, "loss": 0.4632, "step": 2961 }, { "epoch": 0.09078092435944587, "grad_norm": 1.4681105063576714, "learning_rate": 9.903447730686139e-06, "loss": 0.761, "step": 2962 }, { "epoch": 0.09081157288218708, "grad_norm": 1.4744888747778864, "learning_rate": 9.903350641155325e-06, "loss": 0.7812, "step": 2963 }, { "epoch": 0.09084222140492829, "grad_norm": 0.522258419585213, "learning_rate": 9.903253503310544e-06, "loss": 0.4955, "step": 2964 }, { "epoch": 0.09087286992766949, "grad_norm": 1.5344104449196148, "learning_rate": 9.90315631715275e-06, "loss": 0.8481, "step": 2965 }, { "epoch": 0.09090351845041068, "grad_norm": 0.47779940520081327, "learning_rate": 9.903059082682906e-06, "loss": 0.4747, "step": 2966 }, { "epoch": 0.09093416697315189, "grad_norm": 1.3528526570393928, "learning_rate": 9.902961799901964e-06, "loss": 0.7005, "step": 2967 }, { "epoch": 0.0909648154958931, "grad_norm": 1.7581444337780583, "learning_rate": 9.902864468810884e-06, "loss": 0.7659, "step": 2968 }, { "epoch": 0.0909954640186343, "grad_norm": 1.327932876804136, "learning_rate": 9.902767089410627e-06, "loss": 0.7752, "step": 2969 }, { "epoch": 0.0910261125413755, "grad_norm": 0.5292535743913493, "learning_rate": 9.902669661702151e-06, "loss": 0.4706, "step": 2970 }, { "epoch": 0.09105676106411671, "grad_norm": 1.4939366671600405, "learning_rate": 9.902572185686416e-06, "loss": 0.8791, "step": 2971 }, { "epoch": 0.09108740958685792, "grad_norm": 1.4587854986029918, "learning_rate": 9.902474661364383e-06, "loss": 0.6677, "step": 2972 }, { "epoch": 0.09111805810959912, "grad_norm": 1.4300238752200403, "learning_rate": 9.902377088737014e-06, "loss": 0.7349, "step": 2973 }, { "epoch": 0.09114870663234032, "grad_norm": 0.5014948072659956, "learning_rate": 9.90227946780527e-06, "loss": 0.4811, "step": 2974 }, { "epoch": 0.09117935515508152, "grad_norm": 1.5905543315766337, "learning_rate": 9.90218179857011e-06, "loss": 0.7176, "step": 2975 }, { "epoch": 0.09121000367782273, "grad_norm": 1.4261732424264821, "learning_rate": 9.902084081032499e-06, "loss": 0.7741, "step": 2976 }, { "epoch": 0.09124065220056393, "grad_norm": 1.4070475784730785, "learning_rate": 9.901986315193399e-06, "loss": 0.7165, "step": 2977 }, { "epoch": 0.09127130072330514, "grad_norm": 1.5930631123321475, "learning_rate": 9.901888501053773e-06, "loss": 0.7543, "step": 2978 }, { "epoch": 0.09130194924604634, "grad_norm": 0.5077423372208916, "learning_rate": 9.901790638614588e-06, "loss": 0.4836, "step": 2979 }, { "epoch": 0.09133259776878755, "grad_norm": 0.4994210531049394, "learning_rate": 9.901692727876804e-06, "loss": 0.4666, "step": 2980 }, { "epoch": 0.09136324629152875, "grad_norm": 1.7421182029025848, "learning_rate": 9.901594768841386e-06, "loss": 0.6045, "step": 2981 }, { "epoch": 0.09139389481426995, "grad_norm": 1.3176239067521656, "learning_rate": 9.901496761509304e-06, "loss": 0.7668, "step": 2982 }, { "epoch": 0.09142454333701115, "grad_norm": 0.509023947750069, "learning_rate": 9.901398705881518e-06, "loss": 0.4982, "step": 2983 }, { "epoch": 0.09145519185975236, "grad_norm": 1.5006708908924562, "learning_rate": 9.901300601958997e-06, "loss": 0.7619, "step": 2984 }, { "epoch": 0.09148584038249356, "grad_norm": 1.4028575257877687, "learning_rate": 9.901202449742706e-06, "loss": 0.7469, "step": 2985 }, { "epoch": 0.09151648890523477, "grad_norm": 1.4952528061452741, "learning_rate": 9.901104249233614e-06, "loss": 0.7789, "step": 2986 }, { "epoch": 0.09154713742797597, "grad_norm": 1.424605543772305, "learning_rate": 9.901006000432688e-06, "loss": 0.7254, "step": 2987 }, { "epoch": 0.09157778595071718, "grad_norm": 1.3285583423909078, "learning_rate": 9.900907703340897e-06, "loss": 0.6905, "step": 2988 }, { "epoch": 0.09160843447345839, "grad_norm": 1.522734662923261, "learning_rate": 9.900809357959206e-06, "loss": 0.6807, "step": 2989 }, { "epoch": 0.09163908299619958, "grad_norm": 0.5486804394989904, "learning_rate": 9.900710964288588e-06, "loss": 0.4868, "step": 2990 }, { "epoch": 0.09166973151894078, "grad_norm": 1.3187200439972695, "learning_rate": 9.900612522330012e-06, "loss": 0.6918, "step": 2991 }, { "epoch": 0.09170038004168199, "grad_norm": 1.376553794653776, "learning_rate": 9.900514032084445e-06, "loss": 0.7286, "step": 2992 }, { "epoch": 0.0917310285644232, "grad_norm": 1.241912508435406, "learning_rate": 9.90041549355286e-06, "loss": 0.7869, "step": 2993 }, { "epoch": 0.0917616770871644, "grad_norm": 1.5428644988886862, "learning_rate": 9.900316906736227e-06, "loss": 0.7466, "step": 2994 }, { "epoch": 0.0917923256099056, "grad_norm": 1.47019819570816, "learning_rate": 9.900218271635517e-06, "loss": 0.7425, "step": 2995 }, { "epoch": 0.09182297413264681, "grad_norm": 1.4423992999457347, "learning_rate": 9.900119588251706e-06, "loss": 0.7212, "step": 2996 }, { "epoch": 0.091853622655388, "grad_norm": 1.4135176013038582, "learning_rate": 9.90002085658576e-06, "loss": 0.7551, "step": 2997 }, { "epoch": 0.09188427117812921, "grad_norm": 1.437751207599783, "learning_rate": 9.899922076638655e-06, "loss": 0.7273, "step": 2998 }, { "epoch": 0.09191491970087041, "grad_norm": 1.4472773981659308, "learning_rate": 9.899823248411364e-06, "loss": 0.7212, "step": 2999 }, { "epoch": 0.09194556822361162, "grad_norm": 1.3216195910357935, "learning_rate": 9.899724371904862e-06, "loss": 0.6954, "step": 3000 }, { "epoch": 0.09197621674635283, "grad_norm": 1.4597997535474942, "learning_rate": 9.899625447120122e-06, "loss": 0.7469, "step": 3001 }, { "epoch": 0.09200686526909403, "grad_norm": 1.5201802070525696, "learning_rate": 9.899526474058118e-06, "loss": 0.7783, "step": 3002 }, { "epoch": 0.09203751379183524, "grad_norm": 0.571222372815047, "learning_rate": 9.899427452719826e-06, "loss": 0.4803, "step": 3003 }, { "epoch": 0.09206816231457644, "grad_norm": 1.5780194076392613, "learning_rate": 9.899328383106224e-06, "loss": 0.7557, "step": 3004 }, { "epoch": 0.09209881083731764, "grad_norm": 1.3650610364528017, "learning_rate": 9.899229265218284e-06, "loss": 0.7815, "step": 3005 }, { "epoch": 0.09212945936005884, "grad_norm": 1.529661923615162, "learning_rate": 9.899130099056983e-06, "loss": 0.7375, "step": 3006 }, { "epoch": 0.09216010788280005, "grad_norm": 1.4606502081874109, "learning_rate": 9.899030884623302e-06, "loss": 0.8219, "step": 3007 }, { "epoch": 0.09219075640554125, "grad_norm": 1.3755429078972123, "learning_rate": 9.898931621918215e-06, "loss": 0.7052, "step": 3008 }, { "epoch": 0.09222140492828246, "grad_norm": 1.3993980919638636, "learning_rate": 9.898832310942702e-06, "loss": 0.6987, "step": 3009 }, { "epoch": 0.09225205345102366, "grad_norm": 1.6679317656684054, "learning_rate": 9.89873295169774e-06, "loss": 0.7629, "step": 3010 }, { "epoch": 0.09228270197376487, "grad_norm": 1.2579791006247554, "learning_rate": 9.89863354418431e-06, "loss": 0.6235, "step": 3011 }, { "epoch": 0.09231335049650607, "grad_norm": 1.4742584638254217, "learning_rate": 9.89853408840339e-06, "loss": 0.7037, "step": 3012 }, { "epoch": 0.09234399901924727, "grad_norm": 1.457360206915252, "learning_rate": 9.89843458435596e-06, "loss": 0.8169, "step": 3013 }, { "epoch": 0.09237464754198847, "grad_norm": 1.4640021222607469, "learning_rate": 9.898335032043001e-06, "loss": 0.8173, "step": 3014 }, { "epoch": 0.09240529606472968, "grad_norm": 1.3564073940926953, "learning_rate": 9.898235431465492e-06, "loss": 0.7198, "step": 3015 }, { "epoch": 0.09243594458747088, "grad_norm": 1.2527725837560446, "learning_rate": 9.898135782624418e-06, "loss": 0.6636, "step": 3016 }, { "epoch": 0.09246659311021209, "grad_norm": 0.7501039309390538, "learning_rate": 9.898036085520759e-06, "loss": 0.4947, "step": 3017 }, { "epoch": 0.0924972416329533, "grad_norm": 1.4927323009390319, "learning_rate": 9.897936340155496e-06, "loss": 0.7797, "step": 3018 }, { "epoch": 0.0925278901556945, "grad_norm": 0.5506353202070534, "learning_rate": 9.897836546529614e-06, "loss": 0.4772, "step": 3019 }, { "epoch": 0.0925585386784357, "grad_norm": 0.4873818901617623, "learning_rate": 9.897736704644093e-06, "loss": 0.4525, "step": 3020 }, { "epoch": 0.0925891872011769, "grad_norm": 1.435702085136471, "learning_rate": 9.897636814499923e-06, "loss": 0.7263, "step": 3021 }, { "epoch": 0.0926198357239181, "grad_norm": 1.361115614176244, "learning_rate": 9.897536876098081e-06, "loss": 0.6929, "step": 3022 }, { "epoch": 0.09265048424665931, "grad_norm": 0.7213835015708785, "learning_rate": 9.897436889439558e-06, "loss": 0.509, "step": 3023 }, { "epoch": 0.09268113276940051, "grad_norm": 0.6491373017368528, "learning_rate": 9.897336854525334e-06, "loss": 0.4844, "step": 3024 }, { "epoch": 0.09271178129214172, "grad_norm": 1.714937349444836, "learning_rate": 9.897236771356397e-06, "loss": 0.8316, "step": 3025 }, { "epoch": 0.09274242981488293, "grad_norm": 1.4119797307289665, "learning_rate": 9.897136639933734e-06, "loss": 0.6737, "step": 3026 }, { "epoch": 0.09277307833762413, "grad_norm": 1.5847816514157345, "learning_rate": 9.89703646025833e-06, "loss": 0.9257, "step": 3027 }, { "epoch": 0.09280372686036532, "grad_norm": 1.4217065937840623, "learning_rate": 9.896936232331173e-06, "loss": 0.6358, "step": 3028 }, { "epoch": 0.09283437538310653, "grad_norm": 1.4576330109758253, "learning_rate": 9.896835956153251e-06, "loss": 0.6781, "step": 3029 }, { "epoch": 0.09286502390584774, "grad_norm": 1.4219358018215407, "learning_rate": 9.896735631725551e-06, "loss": 0.6891, "step": 3030 }, { "epoch": 0.09289567242858894, "grad_norm": 1.4282777849249257, "learning_rate": 9.896635259049062e-06, "loss": 0.6796, "step": 3031 }, { "epoch": 0.09292632095133015, "grad_norm": 1.3464364369562705, "learning_rate": 9.896534838124773e-06, "loss": 0.7169, "step": 3032 }, { "epoch": 0.09295696947407135, "grad_norm": 1.4798014169081448, "learning_rate": 9.896434368953673e-06, "loss": 0.7056, "step": 3033 }, { "epoch": 0.09298761799681256, "grad_norm": 1.6050927422630863, "learning_rate": 9.896333851536753e-06, "loss": 0.8212, "step": 3034 }, { "epoch": 0.09301826651955376, "grad_norm": 1.5228279959306388, "learning_rate": 9.896233285875003e-06, "loss": 0.7646, "step": 3035 }, { "epoch": 0.09304891504229496, "grad_norm": 1.395662272326214, "learning_rate": 9.896132671969412e-06, "loss": 0.7074, "step": 3036 }, { "epoch": 0.09307956356503616, "grad_norm": 1.3706215990263497, "learning_rate": 9.896032009820975e-06, "loss": 0.7146, "step": 3037 }, { "epoch": 0.09311021208777737, "grad_norm": 1.3920376727031019, "learning_rate": 9.895931299430681e-06, "loss": 0.7013, "step": 3038 }, { "epoch": 0.09314086061051857, "grad_norm": 1.3829926309071827, "learning_rate": 9.895830540799523e-06, "loss": 0.7081, "step": 3039 }, { "epoch": 0.09317150913325978, "grad_norm": 1.2154542759823366, "learning_rate": 9.895729733928494e-06, "loss": 0.6861, "step": 3040 }, { "epoch": 0.09320215765600098, "grad_norm": 1.7232717812073899, "learning_rate": 9.895628878818588e-06, "loss": 0.6821, "step": 3041 }, { "epoch": 0.09323280617874219, "grad_norm": 1.4926387454705754, "learning_rate": 9.895527975470799e-06, "loss": 0.7559, "step": 3042 }, { "epoch": 0.0932634547014834, "grad_norm": 1.546825446386272, "learning_rate": 9.895427023886118e-06, "loss": 0.7553, "step": 3043 }, { "epoch": 0.09329410322422459, "grad_norm": 1.8412341432507753, "learning_rate": 9.895326024065542e-06, "loss": 0.7082, "step": 3044 }, { "epoch": 0.09332475174696579, "grad_norm": 1.3849664320037114, "learning_rate": 9.895224976010067e-06, "loss": 0.7174, "step": 3045 }, { "epoch": 0.093355400269707, "grad_norm": 1.4184410385258528, "learning_rate": 9.895123879720688e-06, "loss": 0.7177, "step": 3046 }, { "epoch": 0.0933860487924482, "grad_norm": 1.5961941844963514, "learning_rate": 9.8950227351984e-06, "loss": 0.7179, "step": 3047 }, { "epoch": 0.09341669731518941, "grad_norm": 1.3886620562002747, "learning_rate": 9.894921542444202e-06, "loss": 0.6595, "step": 3048 }, { "epoch": 0.09344734583793061, "grad_norm": 1.6042166302728564, "learning_rate": 9.894820301459089e-06, "loss": 0.7631, "step": 3049 }, { "epoch": 0.09347799436067182, "grad_norm": 1.517012063884802, "learning_rate": 9.89471901224406e-06, "loss": 0.829, "step": 3050 }, { "epoch": 0.09350864288341303, "grad_norm": 1.476580837531508, "learning_rate": 9.89461767480011e-06, "loss": 0.7266, "step": 3051 }, { "epoch": 0.09353929140615422, "grad_norm": 1.498283559425004, "learning_rate": 9.894516289128242e-06, "loss": 0.6923, "step": 3052 }, { "epoch": 0.09356993992889542, "grad_norm": 1.3131986810906682, "learning_rate": 9.894414855229453e-06, "loss": 0.7334, "step": 3053 }, { "epoch": 0.09360058845163663, "grad_norm": 1.2455440615027007, "learning_rate": 9.89431337310474e-06, "loss": 0.7049, "step": 3054 }, { "epoch": 0.09363123697437783, "grad_norm": 1.5018567921060917, "learning_rate": 9.894211842755107e-06, "loss": 0.7486, "step": 3055 }, { "epoch": 0.09366188549711904, "grad_norm": 1.4508781137228088, "learning_rate": 9.894110264181551e-06, "loss": 0.5203, "step": 3056 }, { "epoch": 0.09369253401986025, "grad_norm": 1.435551631426444, "learning_rate": 9.894008637385075e-06, "loss": 0.6816, "step": 3057 }, { "epoch": 0.09372318254260145, "grad_norm": 0.8142212313155475, "learning_rate": 9.89390696236668e-06, "loss": 0.4666, "step": 3058 }, { "epoch": 0.09375383106534264, "grad_norm": 1.5937628857068689, "learning_rate": 9.893805239127366e-06, "loss": 0.6592, "step": 3059 }, { "epoch": 0.09378447958808385, "grad_norm": 1.6244418924978077, "learning_rate": 9.893703467668139e-06, "loss": 0.7687, "step": 3060 }, { "epoch": 0.09381512811082506, "grad_norm": 1.6102050040991678, "learning_rate": 9.893601647989997e-06, "loss": 0.7688, "step": 3061 }, { "epoch": 0.09384577663356626, "grad_norm": 1.0868648527998432, "learning_rate": 9.893499780093948e-06, "loss": 0.4838, "step": 3062 }, { "epoch": 0.09387642515630747, "grad_norm": 1.4286069478735495, "learning_rate": 9.893397863980993e-06, "loss": 0.7158, "step": 3063 }, { "epoch": 0.09390707367904867, "grad_norm": 1.6820349218343822, "learning_rate": 9.893295899652137e-06, "loss": 0.9073, "step": 3064 }, { "epoch": 0.09393772220178988, "grad_norm": 1.0162497126436216, "learning_rate": 9.893193887108385e-06, "loss": 0.4952, "step": 3065 }, { "epoch": 0.09396837072453108, "grad_norm": 1.4518082583081358, "learning_rate": 9.893091826350741e-06, "loss": 0.8054, "step": 3066 }, { "epoch": 0.09399901924727228, "grad_norm": 1.577311435739306, "learning_rate": 9.892989717380211e-06, "loss": 0.8463, "step": 3067 }, { "epoch": 0.09402966777001348, "grad_norm": 1.6341151160274339, "learning_rate": 9.892887560197802e-06, "loss": 0.7376, "step": 3068 }, { "epoch": 0.09406031629275469, "grad_norm": 0.5770721956435402, "learning_rate": 9.892785354804519e-06, "loss": 0.4742, "step": 3069 }, { "epoch": 0.09409096481549589, "grad_norm": 1.7027502042231062, "learning_rate": 9.89268310120137e-06, "loss": 0.7783, "step": 3070 }, { "epoch": 0.0941216133382371, "grad_norm": 1.5567798208005146, "learning_rate": 9.892580799389364e-06, "loss": 0.8485, "step": 3071 }, { "epoch": 0.0941522618609783, "grad_norm": 1.6846347837421531, "learning_rate": 9.892478449369507e-06, "loss": 0.8475, "step": 3072 }, { "epoch": 0.09418291038371951, "grad_norm": 0.6294859532598411, "learning_rate": 9.892376051142807e-06, "loss": 0.4957, "step": 3073 }, { "epoch": 0.09421355890646071, "grad_norm": 1.5867041583457042, "learning_rate": 9.892273604710275e-06, "loss": 0.8143, "step": 3074 }, { "epoch": 0.0942442074292019, "grad_norm": 1.2212457616390324, "learning_rate": 9.89217111007292e-06, "loss": 0.6546, "step": 3075 }, { "epoch": 0.09427485595194311, "grad_norm": 1.3911905151817487, "learning_rate": 9.89206856723175e-06, "loss": 0.7796, "step": 3076 }, { "epoch": 0.09430550447468432, "grad_norm": 1.3480948134437525, "learning_rate": 9.891965976187778e-06, "loss": 0.8161, "step": 3077 }, { "epoch": 0.09433615299742552, "grad_norm": 1.3434053657696656, "learning_rate": 9.891863336942012e-06, "loss": 0.6797, "step": 3078 }, { "epoch": 0.09436680152016673, "grad_norm": 1.3244829191030605, "learning_rate": 9.891760649495465e-06, "loss": 0.7323, "step": 3079 }, { "epoch": 0.09439745004290793, "grad_norm": 1.3397730254837263, "learning_rate": 9.89165791384915e-06, "loss": 0.7691, "step": 3080 }, { "epoch": 0.09442809856564914, "grad_norm": 1.635668104701898, "learning_rate": 9.891555130004078e-06, "loss": 0.8029, "step": 3081 }, { "epoch": 0.09445874708839035, "grad_norm": 1.5700987762086136, "learning_rate": 9.891452297961261e-06, "loss": 0.7576, "step": 3082 }, { "epoch": 0.09448939561113154, "grad_norm": 1.288965404448508, "learning_rate": 9.891349417721713e-06, "loss": 0.7119, "step": 3083 }, { "epoch": 0.09452004413387274, "grad_norm": 0.5944103385527888, "learning_rate": 9.891246489286448e-06, "loss": 0.4889, "step": 3084 }, { "epoch": 0.09455069265661395, "grad_norm": 1.4962520853787498, "learning_rate": 9.89114351265648e-06, "loss": 0.8348, "step": 3085 }, { "epoch": 0.09458134117935516, "grad_norm": 1.680123434387346, "learning_rate": 9.891040487832824e-06, "loss": 0.7533, "step": 3086 }, { "epoch": 0.09461198970209636, "grad_norm": 1.3016444096428788, "learning_rate": 9.890937414816493e-06, "loss": 0.78, "step": 3087 }, { "epoch": 0.09464263822483757, "grad_norm": 1.3288225505463567, "learning_rate": 9.890834293608506e-06, "loss": 0.7123, "step": 3088 }, { "epoch": 0.09467328674757877, "grad_norm": 1.5108579009276584, "learning_rate": 9.890731124209875e-06, "loss": 0.7298, "step": 3089 }, { "epoch": 0.09470393527031996, "grad_norm": 1.5402799398997893, "learning_rate": 9.890627906621622e-06, "loss": 0.7721, "step": 3090 }, { "epoch": 0.09473458379306117, "grad_norm": 1.4419984095462524, "learning_rate": 9.890524640844759e-06, "loss": 0.7878, "step": 3091 }, { "epoch": 0.09476523231580238, "grad_norm": 1.7424002382272579, "learning_rate": 9.890421326880306e-06, "loss": 0.6604, "step": 3092 }, { "epoch": 0.09479588083854358, "grad_norm": 1.3182552660356877, "learning_rate": 9.89031796472928e-06, "loss": 0.6524, "step": 3093 }, { "epoch": 0.09482652936128479, "grad_norm": 1.4314592534594544, "learning_rate": 9.8902145543927e-06, "loss": 0.7609, "step": 3094 }, { "epoch": 0.09485717788402599, "grad_norm": 0.5286686854397195, "learning_rate": 9.890111095871584e-06, "loss": 0.4734, "step": 3095 }, { "epoch": 0.0948878264067672, "grad_norm": 1.542106309732653, "learning_rate": 9.890007589166954e-06, "loss": 0.6566, "step": 3096 }, { "epoch": 0.0949184749295084, "grad_norm": 1.4426607588234757, "learning_rate": 9.889904034279827e-06, "loss": 0.7498, "step": 3097 }, { "epoch": 0.0949491234522496, "grad_norm": 1.4686323137830717, "learning_rate": 9.889800431211224e-06, "loss": 0.773, "step": 3098 }, { "epoch": 0.0949797719749908, "grad_norm": 1.2845240022905422, "learning_rate": 9.889696779962167e-06, "loss": 0.5698, "step": 3099 }, { "epoch": 0.095010420497732, "grad_norm": 1.5652042292441746, "learning_rate": 9.889593080533675e-06, "loss": 0.7819, "step": 3100 }, { "epoch": 0.09504106902047321, "grad_norm": 0.4851579645680659, "learning_rate": 9.889489332926773e-06, "loss": 0.4938, "step": 3101 }, { "epoch": 0.09507171754321442, "grad_norm": 1.5343911776036752, "learning_rate": 9.889385537142482e-06, "loss": 0.7274, "step": 3102 }, { "epoch": 0.09510236606595562, "grad_norm": 0.4902994954455639, "learning_rate": 9.889281693181823e-06, "loss": 0.4643, "step": 3103 }, { "epoch": 0.09513301458869683, "grad_norm": 1.3666070181039907, "learning_rate": 9.889177801045821e-06, "loss": 0.8012, "step": 3104 }, { "epoch": 0.09516366311143803, "grad_norm": 1.4129323017075428, "learning_rate": 9.889073860735499e-06, "loss": 0.6995, "step": 3105 }, { "epoch": 0.09519431163417923, "grad_norm": 1.2569326825435942, "learning_rate": 9.888969872251881e-06, "loss": 0.6971, "step": 3106 }, { "epoch": 0.09522496015692043, "grad_norm": 0.47523041027946256, "learning_rate": 9.888865835595994e-06, "loss": 0.4667, "step": 3107 }, { "epoch": 0.09525560867966164, "grad_norm": 1.4779054655038504, "learning_rate": 9.888761750768858e-06, "loss": 0.7279, "step": 3108 }, { "epoch": 0.09528625720240284, "grad_norm": 1.5163232371404733, "learning_rate": 9.888657617771503e-06, "loss": 0.8101, "step": 3109 }, { "epoch": 0.09531690572514405, "grad_norm": 0.4652549067914421, "learning_rate": 9.888553436604954e-06, "loss": 0.4805, "step": 3110 }, { "epoch": 0.09534755424788526, "grad_norm": 0.49421324318551063, "learning_rate": 9.888449207270237e-06, "loss": 0.4804, "step": 3111 }, { "epoch": 0.09537820277062646, "grad_norm": 1.3652868716624569, "learning_rate": 9.888344929768378e-06, "loss": 0.6587, "step": 3112 }, { "epoch": 0.09540885129336767, "grad_norm": 1.5923206465628306, "learning_rate": 9.888240604100407e-06, "loss": 0.7647, "step": 3113 }, { "epoch": 0.09543949981610886, "grad_norm": 1.4468442436331208, "learning_rate": 9.888136230267351e-06, "loss": 0.7929, "step": 3114 }, { "epoch": 0.09547014833885006, "grad_norm": 1.4858296591881854, "learning_rate": 9.888031808270237e-06, "loss": 0.611, "step": 3115 }, { "epoch": 0.09550079686159127, "grad_norm": 1.2953608266549332, "learning_rate": 9.887927338110095e-06, "loss": 0.6862, "step": 3116 }, { "epoch": 0.09553144538433248, "grad_norm": 0.4774876677210367, "learning_rate": 9.887822819787955e-06, "loss": 0.4831, "step": 3117 }, { "epoch": 0.09556209390707368, "grad_norm": 1.5601449180977587, "learning_rate": 9.887718253304847e-06, "loss": 0.6532, "step": 3118 }, { "epoch": 0.09559274242981489, "grad_norm": 1.551394268160097, "learning_rate": 9.8876136386618e-06, "loss": 0.7331, "step": 3119 }, { "epoch": 0.09562339095255609, "grad_norm": 1.3387671948266087, "learning_rate": 9.887508975859843e-06, "loss": 0.6907, "step": 3120 }, { "epoch": 0.09565403947529728, "grad_norm": 0.5048239396343741, "learning_rate": 9.887404264900012e-06, "loss": 0.4602, "step": 3121 }, { "epoch": 0.09568468799803849, "grad_norm": 1.4190365498788633, "learning_rate": 9.887299505783334e-06, "loss": 0.6872, "step": 3122 }, { "epoch": 0.0957153365207797, "grad_norm": 1.608952322999461, "learning_rate": 9.887194698510846e-06, "loss": 0.7022, "step": 3123 }, { "epoch": 0.0957459850435209, "grad_norm": 1.3561948874529044, "learning_rate": 9.887089843083577e-06, "loss": 0.7371, "step": 3124 }, { "epoch": 0.0957766335662621, "grad_norm": 1.5049910037289795, "learning_rate": 9.886984939502562e-06, "loss": 0.7274, "step": 3125 }, { "epoch": 0.09580728208900331, "grad_norm": 1.3675124207672358, "learning_rate": 9.886879987768833e-06, "loss": 0.7019, "step": 3126 }, { "epoch": 0.09583793061174452, "grad_norm": 1.392293980167267, "learning_rate": 9.886774987883426e-06, "loss": 0.7631, "step": 3127 }, { "epoch": 0.09586857913448572, "grad_norm": 1.4766166161457353, "learning_rate": 9.886669939847373e-06, "loss": 0.7441, "step": 3128 }, { "epoch": 0.09589922765722692, "grad_norm": 1.474642847975144, "learning_rate": 9.886564843661713e-06, "loss": 0.7437, "step": 3129 }, { "epoch": 0.09592987617996812, "grad_norm": 0.5975577507762647, "learning_rate": 9.886459699327478e-06, "loss": 0.4591, "step": 3130 }, { "epoch": 0.09596052470270933, "grad_norm": 1.57939431063611, "learning_rate": 9.886354506845706e-06, "loss": 0.7222, "step": 3131 }, { "epoch": 0.09599117322545053, "grad_norm": 1.4992556281449585, "learning_rate": 9.886249266217432e-06, "loss": 0.728, "step": 3132 }, { "epoch": 0.09602182174819174, "grad_norm": 1.426815616526066, "learning_rate": 9.886143977443694e-06, "loss": 0.7691, "step": 3133 }, { "epoch": 0.09605247027093294, "grad_norm": 1.5345832106624089, "learning_rate": 9.886038640525531e-06, "loss": 0.749, "step": 3134 }, { "epoch": 0.09608311879367415, "grad_norm": 1.3802618889548564, "learning_rate": 9.885933255463978e-06, "loss": 0.7409, "step": 3135 }, { "epoch": 0.09611376731641535, "grad_norm": 1.736416867713356, "learning_rate": 9.885827822260073e-06, "loss": 0.7661, "step": 3136 }, { "epoch": 0.09614441583915655, "grad_norm": 1.52783605268907, "learning_rate": 9.885722340914857e-06, "loss": 0.7455, "step": 3137 }, { "epoch": 0.09617506436189775, "grad_norm": 0.5102979792429496, "learning_rate": 9.88561681142937e-06, "loss": 0.4898, "step": 3138 }, { "epoch": 0.09620571288463896, "grad_norm": 1.8975194508066617, "learning_rate": 9.88551123380465e-06, "loss": 0.7621, "step": 3139 }, { "epoch": 0.09623636140738016, "grad_norm": 1.5395151289969078, "learning_rate": 9.885405608041738e-06, "loss": 0.8772, "step": 3140 }, { "epoch": 0.09626700993012137, "grad_norm": 1.235334462994027, "learning_rate": 9.885299934141674e-06, "loss": 0.8064, "step": 3141 }, { "epoch": 0.09629765845286258, "grad_norm": 0.5079371909496647, "learning_rate": 9.885194212105498e-06, "loss": 0.4711, "step": 3142 }, { "epoch": 0.09632830697560378, "grad_norm": 1.4194352315821872, "learning_rate": 9.885088441934257e-06, "loss": 0.7283, "step": 3143 }, { "epoch": 0.09635895549834499, "grad_norm": 1.3878832580123484, "learning_rate": 9.884982623628987e-06, "loss": 0.7042, "step": 3144 }, { "epoch": 0.09638960402108618, "grad_norm": 2.2649372425026284, "learning_rate": 9.884876757190736e-06, "loss": 0.7553, "step": 3145 }, { "epoch": 0.09642025254382738, "grad_norm": 2.1114239093231277, "learning_rate": 9.884770842620541e-06, "loss": 0.7855, "step": 3146 }, { "epoch": 0.09645090106656859, "grad_norm": 0.547725107784587, "learning_rate": 9.884664879919452e-06, "loss": 0.4817, "step": 3147 }, { "epoch": 0.0964815495893098, "grad_norm": 1.4762163906022538, "learning_rate": 9.88455886908851e-06, "loss": 0.7609, "step": 3148 }, { "epoch": 0.096512198112051, "grad_norm": 1.4141440560122285, "learning_rate": 9.884452810128757e-06, "loss": 0.565, "step": 3149 }, { "epoch": 0.0965428466347922, "grad_norm": 1.4441172062245033, "learning_rate": 9.884346703041243e-06, "loss": 0.6706, "step": 3150 }, { "epoch": 0.09657349515753341, "grad_norm": 1.4211836715866566, "learning_rate": 9.88424054782701e-06, "loss": 0.7741, "step": 3151 }, { "epoch": 0.0966041436802746, "grad_norm": 1.3832603982513016, "learning_rate": 9.884134344487106e-06, "loss": 0.7831, "step": 3152 }, { "epoch": 0.09663479220301581, "grad_norm": 1.514114464660884, "learning_rate": 9.884028093022577e-06, "loss": 0.7158, "step": 3153 }, { "epoch": 0.09666544072575702, "grad_norm": 1.387591832839582, "learning_rate": 9.88392179343447e-06, "loss": 0.6416, "step": 3154 }, { "epoch": 0.09669608924849822, "grad_norm": 1.4886680971651225, "learning_rate": 9.88381544572383e-06, "loss": 0.768, "step": 3155 }, { "epoch": 0.09672673777123943, "grad_norm": 0.5457583946038628, "learning_rate": 9.883709049891709e-06, "loss": 0.4774, "step": 3156 }, { "epoch": 0.09675738629398063, "grad_norm": 1.3980215176431638, "learning_rate": 9.883602605939151e-06, "loss": 0.6838, "step": 3157 }, { "epoch": 0.09678803481672184, "grad_norm": 1.6464722277619828, "learning_rate": 9.883496113867209e-06, "loss": 0.7922, "step": 3158 }, { "epoch": 0.09681868333946304, "grad_norm": 1.339210483081753, "learning_rate": 9.883389573676929e-06, "loss": 0.7955, "step": 3159 }, { "epoch": 0.09684933186220424, "grad_norm": 1.2771056379622943, "learning_rate": 9.883282985369362e-06, "loss": 0.6656, "step": 3160 }, { "epoch": 0.09687998038494544, "grad_norm": 1.3454367119563706, "learning_rate": 9.88317634894556e-06, "loss": 0.7151, "step": 3161 }, { "epoch": 0.09691062890768665, "grad_norm": 1.4465773306907652, "learning_rate": 9.883069664406571e-06, "loss": 0.6882, "step": 3162 }, { "epoch": 0.09694127743042785, "grad_norm": 0.48296113533990337, "learning_rate": 9.882962931753446e-06, "loss": 0.4582, "step": 3163 }, { "epoch": 0.09697192595316906, "grad_norm": 1.4165871690595346, "learning_rate": 9.88285615098724e-06, "loss": 0.7203, "step": 3164 }, { "epoch": 0.09700257447591026, "grad_norm": 1.3481850310041128, "learning_rate": 9.882749322109002e-06, "loss": 0.7801, "step": 3165 }, { "epoch": 0.09703322299865147, "grad_norm": 1.507848828952337, "learning_rate": 9.882642445119784e-06, "loss": 0.6454, "step": 3166 }, { "epoch": 0.09706387152139268, "grad_norm": 1.579420363983463, "learning_rate": 9.882535520020641e-06, "loss": 0.8884, "step": 3167 }, { "epoch": 0.09709452004413387, "grad_norm": 1.3379303768560988, "learning_rate": 9.88242854681263e-06, "loss": 0.7606, "step": 3168 }, { "epoch": 0.09712516856687507, "grad_norm": 1.5186565880554135, "learning_rate": 9.882321525496799e-06, "loss": 0.695, "step": 3169 }, { "epoch": 0.09715581708961628, "grad_norm": 1.3711245898303126, "learning_rate": 9.882214456074204e-06, "loss": 0.7701, "step": 3170 }, { "epoch": 0.09718646561235748, "grad_norm": 0.4807233228166517, "learning_rate": 9.882107338545902e-06, "loss": 0.4901, "step": 3171 }, { "epoch": 0.09721711413509869, "grad_norm": 1.3798830563387698, "learning_rate": 9.882000172912946e-06, "loss": 0.7699, "step": 3172 }, { "epoch": 0.0972477626578399, "grad_norm": 0.47869915974959887, "learning_rate": 9.881892959176394e-06, "loss": 0.4683, "step": 3173 }, { "epoch": 0.0972784111805811, "grad_norm": 1.2772539723407617, "learning_rate": 9.8817856973373e-06, "loss": 0.7504, "step": 3174 }, { "epoch": 0.0973090597033223, "grad_norm": 1.3594431769857125, "learning_rate": 9.881678387396724e-06, "loss": 0.6944, "step": 3175 }, { "epoch": 0.0973397082260635, "grad_norm": 1.5760914256393415, "learning_rate": 9.881571029355724e-06, "loss": 0.8065, "step": 3176 }, { "epoch": 0.0973703567488047, "grad_norm": 1.503333982589089, "learning_rate": 9.881463623215352e-06, "loss": 0.7185, "step": 3177 }, { "epoch": 0.09740100527154591, "grad_norm": 1.4734102457284792, "learning_rate": 9.88135616897667e-06, "loss": 0.7471, "step": 3178 }, { "epoch": 0.09743165379428712, "grad_norm": 1.6392213556069368, "learning_rate": 9.88124866664074e-06, "loss": 0.8113, "step": 3179 }, { "epoch": 0.09746230231702832, "grad_norm": 1.5302537102995848, "learning_rate": 9.881141116208614e-06, "loss": 0.7503, "step": 3180 }, { "epoch": 0.09749295083976953, "grad_norm": 1.3378765675769946, "learning_rate": 9.88103351768136e-06, "loss": 0.6826, "step": 3181 }, { "epoch": 0.09752359936251073, "grad_norm": 1.3841355466659784, "learning_rate": 9.88092587106003e-06, "loss": 0.7949, "step": 3182 }, { "epoch": 0.09755424788525192, "grad_norm": 1.4724558343081764, "learning_rate": 9.88081817634569e-06, "loss": 0.6888, "step": 3183 }, { "epoch": 0.09758489640799313, "grad_norm": 1.524194182864592, "learning_rate": 9.8807104335394e-06, "loss": 0.6598, "step": 3184 }, { "epoch": 0.09761554493073434, "grad_norm": 1.4709790670077985, "learning_rate": 9.88060264264222e-06, "loss": 0.7764, "step": 3185 }, { "epoch": 0.09764619345347554, "grad_norm": 1.4861230801938081, "learning_rate": 9.880494803655216e-06, "loss": 0.6846, "step": 3186 }, { "epoch": 0.09767684197621675, "grad_norm": 1.375787726106334, "learning_rate": 9.880386916579446e-06, "loss": 0.704, "step": 3187 }, { "epoch": 0.09770749049895795, "grad_norm": 1.3157828081038532, "learning_rate": 9.880278981415975e-06, "loss": 0.7338, "step": 3188 }, { "epoch": 0.09773813902169916, "grad_norm": 1.6688679055164144, "learning_rate": 9.880170998165868e-06, "loss": 0.7845, "step": 3189 }, { "epoch": 0.09776878754444036, "grad_norm": 1.4652878022315603, "learning_rate": 9.880062966830186e-06, "loss": 0.8244, "step": 3190 }, { "epoch": 0.09779943606718156, "grad_norm": 1.5037218264380832, "learning_rate": 9.879954887409996e-06, "loss": 0.7595, "step": 3191 }, { "epoch": 0.09783008458992276, "grad_norm": 1.5080487715756703, "learning_rate": 9.879846759906361e-06, "loss": 0.6581, "step": 3192 }, { "epoch": 0.09786073311266397, "grad_norm": 1.157882646151878, "learning_rate": 9.87973858432035e-06, "loss": 0.6813, "step": 3193 }, { "epoch": 0.09789138163540517, "grad_norm": 1.4997486983993895, "learning_rate": 9.879630360653022e-06, "loss": 0.7675, "step": 3194 }, { "epoch": 0.09792203015814638, "grad_norm": 0.569419165519403, "learning_rate": 9.879522088905448e-06, "loss": 0.4725, "step": 3195 }, { "epoch": 0.09795267868088758, "grad_norm": 1.5941259787952695, "learning_rate": 9.879413769078697e-06, "loss": 0.7789, "step": 3196 }, { "epoch": 0.09798332720362879, "grad_norm": 1.3866466139887683, "learning_rate": 9.879305401173832e-06, "loss": 0.71, "step": 3197 }, { "epoch": 0.09801397572637, "grad_norm": 0.5162022555510358, "learning_rate": 9.879196985191923e-06, "loss": 0.5039, "step": 3198 }, { "epoch": 0.09804462424911119, "grad_norm": 1.6530675034707, "learning_rate": 9.87908852113404e-06, "loss": 0.8111, "step": 3199 }, { "epoch": 0.09807527277185239, "grad_norm": 1.423327848250641, "learning_rate": 9.878980009001245e-06, "loss": 0.7112, "step": 3200 }, { "epoch": 0.0981059212945936, "grad_norm": 1.4007691068813086, "learning_rate": 9.878871448794615e-06, "loss": 0.7428, "step": 3201 }, { "epoch": 0.0981365698173348, "grad_norm": 0.5260382007957256, "learning_rate": 9.878762840515215e-06, "loss": 0.4614, "step": 3202 }, { "epoch": 0.09816721834007601, "grad_norm": 1.6475124057306334, "learning_rate": 9.878654184164116e-06, "loss": 0.7554, "step": 3203 }, { "epoch": 0.09819786686281722, "grad_norm": 0.5229458748324796, "learning_rate": 9.87854547974239e-06, "loss": 0.4853, "step": 3204 }, { "epoch": 0.09822851538555842, "grad_norm": 1.3772247467281413, "learning_rate": 9.878436727251106e-06, "loss": 0.692, "step": 3205 }, { "epoch": 0.09825916390829963, "grad_norm": 0.46190137206247284, "learning_rate": 9.878327926691338e-06, "loss": 0.4732, "step": 3206 }, { "epoch": 0.09828981243104082, "grad_norm": 1.4562674675363112, "learning_rate": 9.878219078064156e-06, "loss": 0.727, "step": 3207 }, { "epoch": 0.09832046095378202, "grad_norm": 1.5299770326078308, "learning_rate": 9.878110181370634e-06, "loss": 0.6812, "step": 3208 }, { "epoch": 0.09835110947652323, "grad_norm": 1.3667289286113653, "learning_rate": 9.878001236611842e-06, "loss": 0.7093, "step": 3209 }, { "epoch": 0.09838175799926444, "grad_norm": 1.3930039128590992, "learning_rate": 9.877892243788858e-06, "loss": 0.7582, "step": 3210 }, { "epoch": 0.09841240652200564, "grad_norm": 1.4642420798982407, "learning_rate": 9.877783202902754e-06, "loss": 0.7529, "step": 3211 }, { "epoch": 0.09844305504474685, "grad_norm": 1.4723328193595464, "learning_rate": 9.877674113954603e-06, "loss": 0.6882, "step": 3212 }, { "epoch": 0.09847370356748805, "grad_norm": 0.6283411071694323, "learning_rate": 9.877564976945482e-06, "loss": 0.4777, "step": 3213 }, { "epoch": 0.09850435209022924, "grad_norm": 1.466663581813167, "learning_rate": 9.877455791876464e-06, "loss": 0.7574, "step": 3214 }, { "epoch": 0.09853500061297045, "grad_norm": 1.5706721525153462, "learning_rate": 9.877346558748626e-06, "loss": 0.6371, "step": 3215 }, { "epoch": 0.09856564913571166, "grad_norm": 1.4328121156764646, "learning_rate": 9.877237277563046e-06, "loss": 0.7143, "step": 3216 }, { "epoch": 0.09859629765845286, "grad_norm": 1.4695103575224981, "learning_rate": 9.877127948320798e-06, "loss": 0.7092, "step": 3217 }, { "epoch": 0.09862694618119407, "grad_norm": 1.4986859418762803, "learning_rate": 9.87701857102296e-06, "loss": 0.7765, "step": 3218 }, { "epoch": 0.09865759470393527, "grad_norm": 0.5287046470175123, "learning_rate": 9.876909145670612e-06, "loss": 0.4695, "step": 3219 }, { "epoch": 0.09868824322667648, "grad_norm": 1.3921673835364288, "learning_rate": 9.876799672264828e-06, "loss": 0.6829, "step": 3220 }, { "epoch": 0.09871889174941768, "grad_norm": 1.635206951324778, "learning_rate": 9.876690150806692e-06, "loss": 0.9174, "step": 3221 }, { "epoch": 0.09874954027215888, "grad_norm": 1.5240840736304073, "learning_rate": 9.876580581297277e-06, "loss": 0.745, "step": 3222 }, { "epoch": 0.09878018879490008, "grad_norm": 1.4835350979223898, "learning_rate": 9.876470963737667e-06, "loss": 0.762, "step": 3223 }, { "epoch": 0.09881083731764129, "grad_norm": 1.3270636495835935, "learning_rate": 9.876361298128942e-06, "loss": 0.7012, "step": 3224 }, { "epoch": 0.09884148584038249, "grad_norm": 0.48878973171006135, "learning_rate": 9.876251584472181e-06, "loss": 0.4676, "step": 3225 }, { "epoch": 0.0988721343631237, "grad_norm": 0.5001701866704871, "learning_rate": 9.876141822768464e-06, "loss": 0.4784, "step": 3226 }, { "epoch": 0.0989027828858649, "grad_norm": 1.2766467945939906, "learning_rate": 9.876032013018875e-06, "loss": 0.6448, "step": 3227 }, { "epoch": 0.09893343140860611, "grad_norm": 1.4369656004159588, "learning_rate": 9.875922155224495e-06, "loss": 0.8485, "step": 3228 }, { "epoch": 0.09896407993134732, "grad_norm": 1.50172845527081, "learning_rate": 9.875812249386407e-06, "loss": 0.7622, "step": 3229 }, { "epoch": 0.09899472845408851, "grad_norm": 1.60572354001562, "learning_rate": 9.875702295505694e-06, "loss": 0.7895, "step": 3230 }, { "epoch": 0.09902537697682971, "grad_norm": 1.4679281141560303, "learning_rate": 9.875592293583438e-06, "loss": 0.7265, "step": 3231 }, { "epoch": 0.09905602549957092, "grad_norm": 1.7480901516119145, "learning_rate": 9.875482243620722e-06, "loss": 0.7501, "step": 3232 }, { "epoch": 0.09908667402231212, "grad_norm": 1.6505418520980053, "learning_rate": 9.875372145618633e-06, "loss": 0.6819, "step": 3233 }, { "epoch": 0.09911732254505333, "grad_norm": 1.3490580663466294, "learning_rate": 9.875261999578257e-06, "loss": 0.7274, "step": 3234 }, { "epoch": 0.09914797106779454, "grad_norm": 0.5076664795993541, "learning_rate": 9.875151805500675e-06, "loss": 0.4692, "step": 3235 }, { "epoch": 0.09917861959053574, "grad_norm": 0.516749719551752, "learning_rate": 9.875041563386975e-06, "loss": 0.47, "step": 3236 }, { "epoch": 0.09920926811327695, "grad_norm": 1.509214320595027, "learning_rate": 9.874931273238244e-06, "loss": 0.8048, "step": 3237 }, { "epoch": 0.09923991663601814, "grad_norm": 1.3139401802693398, "learning_rate": 9.874820935055566e-06, "loss": 0.7497, "step": 3238 }, { "epoch": 0.09927056515875934, "grad_norm": 1.5431755899532458, "learning_rate": 9.874710548840032e-06, "loss": 0.682, "step": 3239 }, { "epoch": 0.09930121368150055, "grad_norm": 1.5246876931895403, "learning_rate": 9.874600114592728e-06, "loss": 0.814, "step": 3240 }, { "epoch": 0.09933186220424176, "grad_norm": 1.2237930478322507, "learning_rate": 9.87448963231474e-06, "loss": 0.8159, "step": 3241 }, { "epoch": 0.09936251072698296, "grad_norm": 1.4748090267332294, "learning_rate": 9.874379102007159e-06, "loss": 0.7069, "step": 3242 }, { "epoch": 0.09939315924972417, "grad_norm": 1.5179623290020225, "learning_rate": 9.874268523671074e-06, "loss": 0.717, "step": 3243 }, { "epoch": 0.09942380777246537, "grad_norm": 1.3179590961041752, "learning_rate": 9.874157897307575e-06, "loss": 0.7153, "step": 3244 }, { "epoch": 0.09945445629520656, "grad_norm": 0.5947888700072769, "learning_rate": 9.87404722291775e-06, "loss": 0.4916, "step": 3245 }, { "epoch": 0.09948510481794777, "grad_norm": 1.5420382200936493, "learning_rate": 9.87393650050269e-06, "loss": 0.7353, "step": 3246 }, { "epoch": 0.09951575334068898, "grad_norm": 1.6364786719393396, "learning_rate": 9.873825730063488e-06, "loss": 0.7728, "step": 3247 }, { "epoch": 0.09954640186343018, "grad_norm": 0.4990511667157047, "learning_rate": 9.873714911601234e-06, "loss": 0.4687, "step": 3248 }, { "epoch": 0.09957705038617139, "grad_norm": 1.2568475511562787, "learning_rate": 9.873604045117018e-06, "loss": 0.6665, "step": 3249 }, { "epoch": 0.09960769890891259, "grad_norm": 1.4859143439974427, "learning_rate": 9.873493130611937e-06, "loss": 0.7301, "step": 3250 }, { "epoch": 0.0996383474316538, "grad_norm": 1.3019492700924353, "learning_rate": 9.87338216808708e-06, "loss": 0.8058, "step": 3251 }, { "epoch": 0.099668995954395, "grad_norm": 1.7449032606563852, "learning_rate": 9.87327115754354e-06, "loss": 0.8542, "step": 3252 }, { "epoch": 0.0996996444771362, "grad_norm": 1.397800555118511, "learning_rate": 9.873160098982415e-06, "loss": 0.8214, "step": 3253 }, { "epoch": 0.0997302929998774, "grad_norm": 1.2971614336239201, "learning_rate": 9.873048992404795e-06, "loss": 0.7536, "step": 3254 }, { "epoch": 0.0997609415226186, "grad_norm": 1.4171123894648494, "learning_rate": 9.872937837811778e-06, "loss": 0.6146, "step": 3255 }, { "epoch": 0.09979159004535981, "grad_norm": 1.7180922311585496, "learning_rate": 9.872826635204457e-06, "loss": 0.7673, "step": 3256 }, { "epoch": 0.09982223856810102, "grad_norm": 1.6064698474266983, "learning_rate": 9.872715384583928e-06, "loss": 0.7226, "step": 3257 }, { "epoch": 0.09985288709084222, "grad_norm": 1.6095899822766602, "learning_rate": 9.872604085951288e-06, "loss": 0.8801, "step": 3258 }, { "epoch": 0.09988353561358343, "grad_norm": 1.4231665210409976, "learning_rate": 9.872492739307633e-06, "loss": 0.7595, "step": 3259 }, { "epoch": 0.09991418413632464, "grad_norm": 1.5474651018626948, "learning_rate": 9.87238134465406e-06, "loss": 0.6797, "step": 3260 }, { "epoch": 0.09994483265906583, "grad_norm": 1.3171915957986358, "learning_rate": 9.872269901991668e-06, "loss": 0.7447, "step": 3261 }, { "epoch": 0.09997548118180703, "grad_norm": 1.4890158038411527, "learning_rate": 9.872158411321552e-06, "loss": 0.8329, "step": 3262 }, { "epoch": 0.10000612970454824, "grad_norm": 1.3276352661325266, "learning_rate": 9.872046872644815e-06, "loss": 0.7456, "step": 3263 }, { "epoch": 0.10003677822728944, "grad_norm": 1.5195308469644062, "learning_rate": 9.871935285962553e-06, "loss": 0.6851, "step": 3264 }, { "epoch": 0.10006742675003065, "grad_norm": 1.5602948199012199, "learning_rate": 9.871823651275865e-06, "loss": 0.7377, "step": 3265 }, { "epoch": 0.10009807527277186, "grad_norm": 1.588639243430406, "learning_rate": 9.871711968585854e-06, "loss": 0.7188, "step": 3266 }, { "epoch": 0.10012872379551306, "grad_norm": 1.5042404023386247, "learning_rate": 9.871600237893617e-06, "loss": 0.8498, "step": 3267 }, { "epoch": 0.10015937231825427, "grad_norm": 1.3946907443726844, "learning_rate": 9.871488459200256e-06, "loss": 0.663, "step": 3268 }, { "epoch": 0.10019002084099546, "grad_norm": 1.4718380494157965, "learning_rate": 9.871376632506872e-06, "loss": 0.737, "step": 3269 }, { "epoch": 0.10022066936373666, "grad_norm": 1.3763838037855716, "learning_rate": 9.87126475781457e-06, "loss": 0.7078, "step": 3270 }, { "epoch": 0.10025131788647787, "grad_norm": 0.8949290424898252, "learning_rate": 9.871152835124448e-06, "loss": 0.4914, "step": 3271 }, { "epoch": 0.10028196640921908, "grad_norm": 1.5078804045788115, "learning_rate": 9.871040864437613e-06, "loss": 0.7984, "step": 3272 }, { "epoch": 0.10031261493196028, "grad_norm": 1.673828853991134, "learning_rate": 9.870928845755165e-06, "loss": 0.8667, "step": 3273 }, { "epoch": 0.10034326345470149, "grad_norm": 1.431888556230926, "learning_rate": 9.870816779078209e-06, "loss": 0.6848, "step": 3274 }, { "epoch": 0.10037391197744269, "grad_norm": 1.4594954413949497, "learning_rate": 9.870704664407849e-06, "loss": 0.7639, "step": 3275 }, { "epoch": 0.10040456050018388, "grad_norm": 1.5454660878930988, "learning_rate": 9.870592501745189e-06, "loss": 0.8339, "step": 3276 }, { "epoch": 0.10043520902292509, "grad_norm": 1.384324399239495, "learning_rate": 9.870480291091336e-06, "loss": 0.7316, "step": 3277 }, { "epoch": 0.1004658575456663, "grad_norm": 0.7974853604211377, "learning_rate": 9.870368032447393e-06, "loss": 0.4835, "step": 3278 }, { "epoch": 0.1004965060684075, "grad_norm": 1.3280992087326782, "learning_rate": 9.870255725814468e-06, "loss": 0.7248, "step": 3279 }, { "epoch": 0.1005271545911487, "grad_norm": 1.698478951037314, "learning_rate": 9.870143371193668e-06, "loss": 0.7546, "step": 3280 }, { "epoch": 0.10055780311388991, "grad_norm": 1.5010861411981529, "learning_rate": 9.8700309685861e-06, "loss": 0.7672, "step": 3281 }, { "epoch": 0.10058845163663112, "grad_norm": 1.5833752615163879, "learning_rate": 9.86991851799287e-06, "loss": 0.6914, "step": 3282 }, { "epoch": 0.10061910015937232, "grad_norm": 0.5721774997900052, "learning_rate": 9.869806019415086e-06, "loss": 0.4587, "step": 3283 }, { "epoch": 0.10064974868211352, "grad_norm": 1.3477726492813027, "learning_rate": 9.869693472853858e-06, "loss": 0.6874, "step": 3284 }, { "epoch": 0.10068039720485472, "grad_norm": 1.5263292326022928, "learning_rate": 9.869580878310294e-06, "loss": 0.7034, "step": 3285 }, { "epoch": 0.10071104572759593, "grad_norm": 1.4785537927704704, "learning_rate": 9.869468235785504e-06, "loss": 0.7697, "step": 3286 }, { "epoch": 0.10074169425033713, "grad_norm": 1.4644555954864051, "learning_rate": 9.869355545280596e-06, "loss": 0.7306, "step": 3287 }, { "epoch": 0.10077234277307834, "grad_norm": 1.2539264635324954, "learning_rate": 9.869242806796684e-06, "loss": 0.624, "step": 3288 }, { "epoch": 0.10080299129581954, "grad_norm": 1.5711941246582042, "learning_rate": 9.869130020334876e-06, "loss": 0.7437, "step": 3289 }, { "epoch": 0.10083363981856075, "grad_norm": 1.5286513432940767, "learning_rate": 9.869017185896284e-06, "loss": 0.7343, "step": 3290 }, { "epoch": 0.10086428834130196, "grad_norm": 1.2105903378649483, "learning_rate": 9.86890430348202e-06, "loss": 0.8771, "step": 3291 }, { "epoch": 0.10089493686404315, "grad_norm": 1.3948235735538967, "learning_rate": 9.868791373093197e-06, "loss": 0.7431, "step": 3292 }, { "epoch": 0.10092558538678435, "grad_norm": 1.6030162316866186, "learning_rate": 9.868678394730925e-06, "loss": 0.7414, "step": 3293 }, { "epoch": 0.10095623390952556, "grad_norm": 1.5771358822159471, "learning_rate": 9.868565368396321e-06, "loss": 0.6396, "step": 3294 }, { "epoch": 0.10098688243226676, "grad_norm": 1.4266266815419033, "learning_rate": 9.868452294090496e-06, "loss": 0.6833, "step": 3295 }, { "epoch": 0.10101753095500797, "grad_norm": 1.424140552598686, "learning_rate": 9.868339171814565e-06, "loss": 0.7933, "step": 3296 }, { "epoch": 0.10104817947774918, "grad_norm": 1.3136287936873023, "learning_rate": 9.868226001569643e-06, "loss": 0.7419, "step": 3297 }, { "epoch": 0.10107882800049038, "grad_norm": 1.3755671724152758, "learning_rate": 9.868112783356843e-06, "loss": 0.6746, "step": 3298 }, { "epoch": 0.10110947652323159, "grad_norm": 1.5594232872816096, "learning_rate": 9.867999517177284e-06, "loss": 0.8149, "step": 3299 }, { "epoch": 0.10114012504597278, "grad_norm": 1.6594851587453852, "learning_rate": 9.867886203032079e-06, "loss": 0.7494, "step": 3300 }, { "epoch": 0.10117077356871398, "grad_norm": 1.5365554993260564, "learning_rate": 9.867772840922346e-06, "loss": 0.7617, "step": 3301 }, { "epoch": 0.10120142209145519, "grad_norm": 1.4863950583826648, "learning_rate": 9.8676594308492e-06, "loss": 0.783, "step": 3302 }, { "epoch": 0.1012320706141964, "grad_norm": 1.64974626286479, "learning_rate": 9.867545972813763e-06, "loss": 0.7569, "step": 3303 }, { "epoch": 0.1012627191369376, "grad_norm": 1.5443601759888534, "learning_rate": 9.867432466817151e-06, "loss": 0.7768, "step": 3304 }, { "epoch": 0.1012933676596788, "grad_norm": 1.3895084672580702, "learning_rate": 9.867318912860479e-06, "loss": 0.6165, "step": 3305 }, { "epoch": 0.10132401618242001, "grad_norm": 1.641944039464696, "learning_rate": 9.867205310944868e-06, "loss": 0.7626, "step": 3306 }, { "epoch": 0.1013546647051612, "grad_norm": 1.7710112537826317, "learning_rate": 9.867091661071439e-06, "loss": 0.8238, "step": 3307 }, { "epoch": 0.10138531322790241, "grad_norm": 1.2535935236570446, "learning_rate": 9.866977963241312e-06, "loss": 0.6921, "step": 3308 }, { "epoch": 0.10141596175064362, "grad_norm": 1.3592112687423858, "learning_rate": 9.866864217455603e-06, "loss": 0.7049, "step": 3309 }, { "epoch": 0.10144661027338482, "grad_norm": 1.4743066013204984, "learning_rate": 9.866750423715437e-06, "loss": 0.7496, "step": 3310 }, { "epoch": 0.10147725879612603, "grad_norm": 1.440291013058442, "learning_rate": 9.866636582021934e-06, "loss": 0.7905, "step": 3311 }, { "epoch": 0.10150790731886723, "grad_norm": 1.4099933031807284, "learning_rate": 9.866522692376215e-06, "loss": 0.7499, "step": 3312 }, { "epoch": 0.10153855584160844, "grad_norm": 0.993281059910914, "learning_rate": 9.866408754779402e-06, "loss": 0.4861, "step": 3313 }, { "epoch": 0.10156920436434964, "grad_norm": 0.7918955220080107, "learning_rate": 9.866294769232621e-06, "loss": 0.4683, "step": 3314 }, { "epoch": 0.10159985288709084, "grad_norm": 1.564153502273847, "learning_rate": 9.86618073573699e-06, "loss": 0.7771, "step": 3315 }, { "epoch": 0.10163050140983204, "grad_norm": 0.4790401143693278, "learning_rate": 9.866066654293635e-06, "loss": 0.4865, "step": 3316 }, { "epoch": 0.10166114993257325, "grad_norm": 1.254811819510157, "learning_rate": 9.865952524903682e-06, "loss": 0.6296, "step": 3317 }, { "epoch": 0.10169179845531445, "grad_norm": 0.8747002422778364, "learning_rate": 9.865838347568252e-06, "loss": 0.4978, "step": 3318 }, { "epoch": 0.10172244697805566, "grad_norm": 1.3684327318509737, "learning_rate": 9.865724122288474e-06, "loss": 0.8104, "step": 3319 }, { "epoch": 0.10175309550079686, "grad_norm": 1.3595432971432748, "learning_rate": 9.865609849065471e-06, "loss": 0.7602, "step": 3320 }, { "epoch": 0.10178374402353807, "grad_norm": 1.660579137705899, "learning_rate": 9.865495527900369e-06, "loss": 0.8506, "step": 3321 }, { "epoch": 0.10181439254627928, "grad_norm": 1.33460383099578, "learning_rate": 9.865381158794293e-06, "loss": 0.7332, "step": 3322 }, { "epoch": 0.10184504106902047, "grad_norm": 1.4780060556964167, "learning_rate": 9.865266741748372e-06, "loss": 0.7525, "step": 3323 }, { "epoch": 0.10187568959176167, "grad_norm": 1.436436699399146, "learning_rate": 9.865152276763735e-06, "loss": 0.7301, "step": 3324 }, { "epoch": 0.10190633811450288, "grad_norm": 1.4899298680948518, "learning_rate": 9.865037763841505e-06, "loss": 0.6097, "step": 3325 }, { "epoch": 0.10193698663724408, "grad_norm": 1.0538846335263223, "learning_rate": 9.864923202982815e-06, "loss": 0.4863, "step": 3326 }, { "epoch": 0.10196763515998529, "grad_norm": 1.317229402450383, "learning_rate": 9.864808594188792e-06, "loss": 0.7517, "step": 3327 }, { "epoch": 0.1019982836827265, "grad_norm": 1.5371865556747062, "learning_rate": 9.864693937460565e-06, "loss": 0.7186, "step": 3328 }, { "epoch": 0.1020289322054677, "grad_norm": 1.4667395972674167, "learning_rate": 9.864579232799263e-06, "loss": 0.819, "step": 3329 }, { "epoch": 0.1020595807282089, "grad_norm": 1.3062808013082559, "learning_rate": 9.864464480206017e-06, "loss": 0.7383, "step": 3330 }, { "epoch": 0.1020902292509501, "grad_norm": 1.5362272836870665, "learning_rate": 9.86434967968196e-06, "loss": 0.7781, "step": 3331 }, { "epoch": 0.1021208777736913, "grad_norm": 1.3212831837476247, "learning_rate": 9.864234831228218e-06, "loss": 0.7543, "step": 3332 }, { "epoch": 0.10215152629643251, "grad_norm": 1.432698598518077, "learning_rate": 9.864119934845928e-06, "loss": 0.7268, "step": 3333 }, { "epoch": 0.10218217481917372, "grad_norm": 0.6339663883859813, "learning_rate": 9.86400499053622e-06, "loss": 0.4975, "step": 3334 }, { "epoch": 0.10221282334191492, "grad_norm": 0.5670471648148371, "learning_rate": 9.863889998300225e-06, "loss": 0.4668, "step": 3335 }, { "epoch": 0.10224347186465613, "grad_norm": 1.5688676635691634, "learning_rate": 9.863774958139078e-06, "loss": 0.7379, "step": 3336 }, { "epoch": 0.10227412038739733, "grad_norm": 1.3310826514991667, "learning_rate": 9.863659870053912e-06, "loss": 0.8158, "step": 3337 }, { "epoch": 0.10230476891013854, "grad_norm": 1.36506504383259, "learning_rate": 9.86354473404586e-06, "loss": 0.7653, "step": 3338 }, { "epoch": 0.10233541743287973, "grad_norm": 1.6552726448745405, "learning_rate": 9.863429550116056e-06, "loss": 0.7618, "step": 3339 }, { "epoch": 0.10236606595562094, "grad_norm": 1.4826456763864595, "learning_rate": 9.86331431826564e-06, "loss": 0.7782, "step": 3340 }, { "epoch": 0.10239671447836214, "grad_norm": 1.472521587704657, "learning_rate": 9.863199038495741e-06, "loss": 0.7295, "step": 3341 }, { "epoch": 0.10242736300110335, "grad_norm": 1.316575604327715, "learning_rate": 9.8630837108075e-06, "loss": 0.6619, "step": 3342 }, { "epoch": 0.10245801152384455, "grad_norm": 0.6147588937253246, "learning_rate": 9.862968335202048e-06, "loss": 0.4973, "step": 3343 }, { "epoch": 0.10248866004658576, "grad_norm": 1.400694455488082, "learning_rate": 9.862852911680527e-06, "loss": 0.7703, "step": 3344 }, { "epoch": 0.10251930856932696, "grad_norm": 1.5070683262214368, "learning_rate": 9.86273744024407e-06, "loss": 0.7418, "step": 3345 }, { "epoch": 0.10254995709206816, "grad_norm": 1.6330354457038025, "learning_rate": 9.862621920893817e-06, "loss": 0.674, "step": 3346 }, { "epoch": 0.10258060561480936, "grad_norm": 1.3301692424019707, "learning_rate": 9.862506353630908e-06, "loss": 0.6809, "step": 3347 }, { "epoch": 0.10261125413755057, "grad_norm": 1.4273577200893919, "learning_rate": 9.86239073845648e-06, "loss": 0.7281, "step": 3348 }, { "epoch": 0.10264190266029177, "grad_norm": 1.2032706827782689, "learning_rate": 9.86227507537167e-06, "loss": 0.741, "step": 3349 }, { "epoch": 0.10267255118303298, "grad_norm": 0.5304928729089503, "learning_rate": 9.86215936437762e-06, "loss": 0.4754, "step": 3350 }, { "epoch": 0.10270319970577418, "grad_norm": 1.5181171768351738, "learning_rate": 9.86204360547547e-06, "loss": 0.7035, "step": 3351 }, { "epoch": 0.10273384822851539, "grad_norm": 1.4037091549905771, "learning_rate": 9.861927798666361e-06, "loss": 0.7801, "step": 3352 }, { "epoch": 0.1027644967512566, "grad_norm": 1.3357242972535375, "learning_rate": 9.861811943951432e-06, "loss": 0.7409, "step": 3353 }, { "epoch": 0.10279514527399779, "grad_norm": 1.4133293183372249, "learning_rate": 9.861696041331828e-06, "loss": 0.7094, "step": 3354 }, { "epoch": 0.10282579379673899, "grad_norm": 1.47734186076795, "learning_rate": 9.861580090808687e-06, "loss": 0.7556, "step": 3355 }, { "epoch": 0.1028564423194802, "grad_norm": 1.5255711026060745, "learning_rate": 9.861464092383155e-06, "loss": 0.803, "step": 3356 }, { "epoch": 0.1028870908422214, "grad_norm": 0.49673591582026666, "learning_rate": 9.86134804605637e-06, "loss": 0.4862, "step": 3357 }, { "epoch": 0.10291773936496261, "grad_norm": 1.4921539449181518, "learning_rate": 9.861231951829484e-06, "loss": 0.7963, "step": 3358 }, { "epoch": 0.10294838788770382, "grad_norm": 1.265888404919032, "learning_rate": 9.861115809703633e-06, "loss": 0.7158, "step": 3359 }, { "epoch": 0.10297903641044502, "grad_norm": 1.370118310645838, "learning_rate": 9.860999619679965e-06, "loss": 0.6694, "step": 3360 }, { "epoch": 0.10300968493318623, "grad_norm": 1.5169015795740612, "learning_rate": 9.860883381759622e-06, "loss": 0.8541, "step": 3361 }, { "epoch": 0.10304033345592742, "grad_norm": 1.324951222642727, "learning_rate": 9.860767095943754e-06, "loss": 0.6813, "step": 3362 }, { "epoch": 0.10307098197866862, "grad_norm": 1.3324322216011353, "learning_rate": 9.860650762233504e-06, "loss": 0.7098, "step": 3363 }, { "epoch": 0.10310163050140983, "grad_norm": 1.429494135749233, "learning_rate": 9.860534380630016e-06, "loss": 0.8189, "step": 3364 }, { "epoch": 0.10313227902415104, "grad_norm": 1.5600888378407338, "learning_rate": 9.860417951134441e-06, "loss": 0.8897, "step": 3365 }, { "epoch": 0.10316292754689224, "grad_norm": 0.511609741689785, "learning_rate": 9.860301473747923e-06, "loss": 0.4436, "step": 3366 }, { "epoch": 0.10319357606963345, "grad_norm": 1.3744552461531678, "learning_rate": 9.860184948471613e-06, "loss": 0.7771, "step": 3367 }, { "epoch": 0.10322422459237465, "grad_norm": 1.263067574207378, "learning_rate": 9.860068375306655e-06, "loss": 0.6536, "step": 3368 }, { "epoch": 0.10325487311511586, "grad_norm": 1.3490354238927336, "learning_rate": 9.859951754254203e-06, "loss": 0.7212, "step": 3369 }, { "epoch": 0.10328552163785705, "grad_norm": 1.3726918310519485, "learning_rate": 9.859835085315399e-06, "loss": 0.7845, "step": 3370 }, { "epoch": 0.10331617016059826, "grad_norm": 1.4664544409010396, "learning_rate": 9.859718368491398e-06, "loss": 0.8219, "step": 3371 }, { "epoch": 0.10334681868333946, "grad_norm": 1.2968681604669676, "learning_rate": 9.85960160378335e-06, "loss": 0.7137, "step": 3372 }, { "epoch": 0.10337746720608067, "grad_norm": 1.4282556684339525, "learning_rate": 9.859484791192402e-06, "loss": 0.788, "step": 3373 }, { "epoch": 0.10340811572882187, "grad_norm": 1.3655155992151184, "learning_rate": 9.859367930719708e-06, "loss": 0.6923, "step": 3374 }, { "epoch": 0.10343876425156308, "grad_norm": 1.381471900661171, "learning_rate": 9.859251022366418e-06, "loss": 0.7909, "step": 3375 }, { "epoch": 0.10346941277430428, "grad_norm": 0.5246076379561411, "learning_rate": 9.859134066133685e-06, "loss": 0.4847, "step": 3376 }, { "epoch": 0.10350006129704548, "grad_norm": 1.375433127616139, "learning_rate": 9.85901706202266e-06, "loss": 0.7397, "step": 3377 }, { "epoch": 0.10353070981978668, "grad_norm": 1.3982113590033394, "learning_rate": 9.858900010034498e-06, "loss": 0.7395, "step": 3378 }, { "epoch": 0.10356135834252789, "grad_norm": 1.5710002402707632, "learning_rate": 9.858782910170348e-06, "loss": 0.7608, "step": 3379 }, { "epoch": 0.10359200686526909, "grad_norm": 1.3895610542930543, "learning_rate": 9.85866576243137e-06, "loss": 0.6516, "step": 3380 }, { "epoch": 0.1036226553880103, "grad_norm": 1.3678094844798783, "learning_rate": 9.858548566818712e-06, "loss": 0.7399, "step": 3381 }, { "epoch": 0.1036533039107515, "grad_norm": 1.1987982189372175, "learning_rate": 9.858431323333535e-06, "loss": 0.7074, "step": 3382 }, { "epoch": 0.10368395243349271, "grad_norm": 1.561708634908585, "learning_rate": 9.85831403197699e-06, "loss": 0.7511, "step": 3383 }, { "epoch": 0.10371460095623392, "grad_norm": 1.4046366812041493, "learning_rate": 9.858196692750233e-06, "loss": 0.6745, "step": 3384 }, { "epoch": 0.10374524947897511, "grad_norm": 1.5707877471431204, "learning_rate": 9.858079305654421e-06, "loss": 0.7895, "step": 3385 }, { "epoch": 0.10377589800171631, "grad_norm": 1.0871881632533933, "learning_rate": 9.857961870690712e-06, "loss": 0.6395, "step": 3386 }, { "epoch": 0.10380654652445752, "grad_norm": 1.4935264313753864, "learning_rate": 9.85784438786026e-06, "loss": 0.7807, "step": 3387 }, { "epoch": 0.10383719504719872, "grad_norm": 1.3019850062850389, "learning_rate": 9.857726857164227e-06, "loss": 0.661, "step": 3388 }, { "epoch": 0.10386784356993993, "grad_norm": 1.3636616849321745, "learning_rate": 9.857609278603766e-06, "loss": 0.7214, "step": 3389 }, { "epoch": 0.10389849209268114, "grad_norm": 1.377141233420161, "learning_rate": 9.857491652180038e-06, "loss": 0.7153, "step": 3390 }, { "epoch": 0.10392914061542234, "grad_norm": 1.4091449011903379, "learning_rate": 9.857373977894202e-06, "loss": 0.6971, "step": 3391 }, { "epoch": 0.10395978913816355, "grad_norm": 1.460370236224493, "learning_rate": 9.857256255747418e-06, "loss": 0.8279, "step": 3392 }, { "epoch": 0.10399043766090474, "grad_norm": 1.2318619321026845, "learning_rate": 9.857138485740845e-06, "loss": 0.6798, "step": 3393 }, { "epoch": 0.10402108618364594, "grad_norm": 1.509970045212481, "learning_rate": 9.857020667875645e-06, "loss": 0.6979, "step": 3394 }, { "epoch": 0.10405173470638715, "grad_norm": 1.26642490512228, "learning_rate": 9.856902802152977e-06, "loss": 0.7798, "step": 3395 }, { "epoch": 0.10408238322912836, "grad_norm": 1.3754281737645704, "learning_rate": 9.856784888574e-06, "loss": 0.6972, "step": 3396 }, { "epoch": 0.10411303175186956, "grad_norm": 1.337479207706975, "learning_rate": 9.856666927139882e-06, "loss": 0.757, "step": 3397 }, { "epoch": 0.10414368027461077, "grad_norm": 1.3958541898514818, "learning_rate": 9.856548917851782e-06, "loss": 0.6744, "step": 3398 }, { "epoch": 0.10417432879735197, "grad_norm": 1.315009222029677, "learning_rate": 9.856430860710862e-06, "loss": 0.7043, "step": 3399 }, { "epoch": 0.10420497732009318, "grad_norm": 1.2563519627050441, "learning_rate": 9.856312755718286e-06, "loss": 0.7508, "step": 3400 }, { "epoch": 0.10423562584283437, "grad_norm": 1.3839584806407685, "learning_rate": 9.85619460287522e-06, "loss": 0.6954, "step": 3401 }, { "epoch": 0.10426627436557558, "grad_norm": 1.6153110400972723, "learning_rate": 9.856076402182824e-06, "loss": 0.696, "step": 3402 }, { "epoch": 0.10429692288831678, "grad_norm": 1.5235147757171104, "learning_rate": 9.855958153642265e-06, "loss": 0.763, "step": 3403 }, { "epoch": 0.10432757141105799, "grad_norm": 1.244849076450271, "learning_rate": 9.855839857254709e-06, "loss": 0.5945, "step": 3404 }, { "epoch": 0.10435821993379919, "grad_norm": 1.3570014072006364, "learning_rate": 9.855721513021319e-06, "loss": 0.7079, "step": 3405 }, { "epoch": 0.1043888684565404, "grad_norm": 1.5528340554652198, "learning_rate": 9.855603120943263e-06, "loss": 0.7624, "step": 3406 }, { "epoch": 0.1044195169792816, "grad_norm": 1.318580350518916, "learning_rate": 9.855484681021708e-06, "loss": 0.8135, "step": 3407 }, { "epoch": 0.1044501655020228, "grad_norm": 1.4070025101256989, "learning_rate": 9.855366193257818e-06, "loss": 0.7726, "step": 3408 }, { "epoch": 0.104480814024764, "grad_norm": 6.000636340684537, "learning_rate": 9.855247657652764e-06, "loss": 0.8592, "step": 3409 }, { "epoch": 0.10451146254750521, "grad_norm": 1.364372738332703, "learning_rate": 9.855129074207714e-06, "loss": 0.6776, "step": 3410 }, { "epoch": 0.10454211107024641, "grad_norm": 0.5156655723687663, "learning_rate": 9.855010442923832e-06, "loss": 0.4737, "step": 3411 }, { "epoch": 0.10457275959298762, "grad_norm": 1.652035100663193, "learning_rate": 9.854891763802292e-06, "loss": 0.8624, "step": 3412 }, { "epoch": 0.10460340811572882, "grad_norm": 1.4714273884295632, "learning_rate": 9.85477303684426e-06, "loss": 0.6709, "step": 3413 }, { "epoch": 0.10463405663847003, "grad_norm": 0.5236086453198244, "learning_rate": 9.85465426205091e-06, "loss": 0.4792, "step": 3414 }, { "epoch": 0.10466470516121124, "grad_norm": 1.4944415998281901, "learning_rate": 9.854535439423404e-06, "loss": 0.8374, "step": 3415 }, { "epoch": 0.10469535368395243, "grad_norm": 1.4536744867026736, "learning_rate": 9.854416568962924e-06, "loss": 0.7605, "step": 3416 }, { "epoch": 0.10472600220669363, "grad_norm": 0.4961564429500646, "learning_rate": 9.854297650670632e-06, "loss": 0.4833, "step": 3417 }, { "epoch": 0.10475665072943484, "grad_norm": 1.4801233364900506, "learning_rate": 9.854178684547704e-06, "loss": 0.7151, "step": 3418 }, { "epoch": 0.10478729925217604, "grad_norm": 1.2652730003052872, "learning_rate": 9.85405967059531e-06, "loss": 0.8024, "step": 3419 }, { "epoch": 0.10481794777491725, "grad_norm": 1.4413501909290187, "learning_rate": 9.853940608814628e-06, "loss": 0.7922, "step": 3420 }, { "epoch": 0.10484859629765846, "grad_norm": 1.4167211168437697, "learning_rate": 9.853821499206824e-06, "loss": 0.737, "step": 3421 }, { "epoch": 0.10487924482039966, "grad_norm": 1.4246032586774366, "learning_rate": 9.853702341773075e-06, "loss": 0.7786, "step": 3422 }, { "epoch": 0.10490989334314087, "grad_norm": 1.3824703800169815, "learning_rate": 9.853583136514557e-06, "loss": 0.7347, "step": 3423 }, { "epoch": 0.10494054186588206, "grad_norm": 1.5065788487121403, "learning_rate": 9.85346388343244e-06, "loss": 0.7014, "step": 3424 }, { "epoch": 0.10497119038862326, "grad_norm": 0.5611402919724015, "learning_rate": 9.853344582527903e-06, "loss": 0.4649, "step": 3425 }, { "epoch": 0.10500183891136447, "grad_norm": 1.5178464989034932, "learning_rate": 9.85322523380212e-06, "loss": 0.7798, "step": 3426 }, { "epoch": 0.10503248743410568, "grad_norm": 1.3513929083622735, "learning_rate": 9.853105837256267e-06, "loss": 0.8449, "step": 3427 }, { "epoch": 0.10506313595684688, "grad_norm": 1.4284746098649659, "learning_rate": 9.85298639289152e-06, "loss": 0.7901, "step": 3428 }, { "epoch": 0.10509378447958809, "grad_norm": 1.4295397941513368, "learning_rate": 9.852866900709058e-06, "loss": 0.6998, "step": 3429 }, { "epoch": 0.10512443300232929, "grad_norm": 1.5830370344003326, "learning_rate": 9.852747360710055e-06, "loss": 0.7554, "step": 3430 }, { "epoch": 0.1051550815250705, "grad_norm": 1.3064171295517484, "learning_rate": 9.85262777289569e-06, "loss": 0.7489, "step": 3431 }, { "epoch": 0.10518573004781169, "grad_norm": 1.8325474723133117, "learning_rate": 9.852508137267143e-06, "loss": 0.7232, "step": 3432 }, { "epoch": 0.1052163785705529, "grad_norm": 1.3881573239722254, "learning_rate": 9.852388453825592e-06, "loss": 0.7546, "step": 3433 }, { "epoch": 0.1052470270932941, "grad_norm": 1.4258600714552725, "learning_rate": 9.852268722572216e-06, "loss": 0.7152, "step": 3434 }, { "epoch": 0.10527767561603531, "grad_norm": 0.5344145635583836, "learning_rate": 9.852148943508195e-06, "loss": 0.4764, "step": 3435 }, { "epoch": 0.10530832413877651, "grad_norm": 1.5259973632492196, "learning_rate": 9.852029116634708e-06, "loss": 0.7376, "step": 3436 }, { "epoch": 0.10533897266151772, "grad_norm": 1.4773801674524227, "learning_rate": 9.851909241952938e-06, "loss": 0.7245, "step": 3437 }, { "epoch": 0.10536962118425892, "grad_norm": 0.5077850271316033, "learning_rate": 9.851789319464064e-06, "loss": 0.4791, "step": 3438 }, { "epoch": 0.10540026970700012, "grad_norm": 1.5154769872399292, "learning_rate": 9.851669349169269e-06, "loss": 0.7916, "step": 3439 }, { "epoch": 0.10543091822974132, "grad_norm": 1.5203455018131915, "learning_rate": 9.851549331069734e-06, "loss": 0.7816, "step": 3440 }, { "epoch": 0.10546156675248253, "grad_norm": 1.324729634415685, "learning_rate": 9.85142926516664e-06, "loss": 0.6916, "step": 3441 }, { "epoch": 0.10549221527522373, "grad_norm": 0.4911562854921567, "learning_rate": 9.851309151461176e-06, "loss": 0.4787, "step": 3442 }, { "epoch": 0.10552286379796494, "grad_norm": 1.4713591545166873, "learning_rate": 9.85118898995452e-06, "loss": 0.766, "step": 3443 }, { "epoch": 0.10555351232070614, "grad_norm": 1.4518002886548211, "learning_rate": 9.851068780647857e-06, "loss": 0.7821, "step": 3444 }, { "epoch": 0.10558416084344735, "grad_norm": 1.305902358029406, "learning_rate": 9.850948523542373e-06, "loss": 0.6932, "step": 3445 }, { "epoch": 0.10561480936618856, "grad_norm": 1.3392698904333575, "learning_rate": 9.850828218639252e-06, "loss": 0.6809, "step": 3446 }, { "epoch": 0.10564545788892975, "grad_norm": 0.5200783110668722, "learning_rate": 9.85070786593968e-06, "loss": 0.45, "step": 3447 }, { "epoch": 0.10567610641167095, "grad_norm": 1.6263728828568125, "learning_rate": 9.850587465444841e-06, "loss": 0.7657, "step": 3448 }, { "epoch": 0.10570675493441216, "grad_norm": 1.5072293707745466, "learning_rate": 9.850467017155922e-06, "loss": 0.5834, "step": 3449 }, { "epoch": 0.10573740345715336, "grad_norm": 1.2137810208331605, "learning_rate": 9.850346521074112e-06, "loss": 0.6702, "step": 3450 }, { "epoch": 0.10576805197989457, "grad_norm": 1.4384238813118158, "learning_rate": 9.850225977200596e-06, "loss": 0.8233, "step": 3451 }, { "epoch": 0.10579870050263578, "grad_norm": 1.5398806259095021, "learning_rate": 9.850105385536564e-06, "loss": 0.7587, "step": 3452 }, { "epoch": 0.10582934902537698, "grad_norm": 1.4885158457907095, "learning_rate": 9.849984746083202e-06, "loss": 0.7361, "step": 3453 }, { "epoch": 0.10585999754811819, "grad_norm": 1.5107229173527996, "learning_rate": 9.849864058841699e-06, "loss": 0.7834, "step": 3454 }, { "epoch": 0.10589064607085938, "grad_norm": 1.3847019853455633, "learning_rate": 9.849743323813243e-06, "loss": 0.7255, "step": 3455 }, { "epoch": 0.10592129459360058, "grad_norm": 1.3326908955506473, "learning_rate": 9.849622540999027e-06, "loss": 0.7323, "step": 3456 }, { "epoch": 0.10595194311634179, "grad_norm": 0.5378336626860446, "learning_rate": 9.849501710400238e-06, "loss": 0.4766, "step": 3457 }, { "epoch": 0.105982591639083, "grad_norm": 2.1222294906872636, "learning_rate": 9.84938083201807e-06, "loss": 0.8034, "step": 3458 }, { "epoch": 0.1060132401618242, "grad_norm": 1.5328443626845416, "learning_rate": 9.84925990585371e-06, "loss": 0.813, "step": 3459 }, { "epoch": 0.10604388868456541, "grad_norm": 1.605932961475948, "learning_rate": 9.849138931908352e-06, "loss": 0.7266, "step": 3460 }, { "epoch": 0.10607453720730661, "grad_norm": 1.3590495570379966, "learning_rate": 9.849017910183187e-06, "loss": 0.7486, "step": 3461 }, { "epoch": 0.10610518573004782, "grad_norm": 1.500180147596377, "learning_rate": 9.848896840679408e-06, "loss": 0.6543, "step": 3462 }, { "epoch": 0.10613583425278901, "grad_norm": 1.3610896079468138, "learning_rate": 9.848775723398207e-06, "loss": 0.8478, "step": 3463 }, { "epoch": 0.10616648277553022, "grad_norm": 1.4578063082665196, "learning_rate": 9.84865455834078e-06, "loss": 0.7232, "step": 3464 }, { "epoch": 0.10619713129827142, "grad_norm": 1.332614200338215, "learning_rate": 9.848533345508318e-06, "loss": 0.7179, "step": 3465 }, { "epoch": 0.10622777982101263, "grad_norm": 1.5697115926535472, "learning_rate": 9.848412084902017e-06, "loss": 0.9224, "step": 3466 }, { "epoch": 0.10625842834375383, "grad_norm": 0.585028386844262, "learning_rate": 9.848290776523071e-06, "loss": 0.4569, "step": 3467 }, { "epoch": 0.10628907686649504, "grad_norm": 1.3140560306476607, "learning_rate": 9.848169420372675e-06, "loss": 0.7486, "step": 3468 }, { "epoch": 0.10631972538923624, "grad_norm": 1.4682805442106688, "learning_rate": 9.848048016452025e-06, "loss": 0.7336, "step": 3469 }, { "epoch": 0.10635037391197744, "grad_norm": 1.4240337035583739, "learning_rate": 9.847926564762318e-06, "loss": 0.6063, "step": 3470 }, { "epoch": 0.10638102243471864, "grad_norm": 1.3072054604908114, "learning_rate": 9.84780506530475e-06, "loss": 0.6695, "step": 3471 }, { "epoch": 0.10641167095745985, "grad_norm": 1.484484015112702, "learning_rate": 9.84768351808052e-06, "loss": 0.8065, "step": 3472 }, { "epoch": 0.10644231948020105, "grad_norm": 1.4724770362426178, "learning_rate": 9.847561923090823e-06, "loss": 0.8077, "step": 3473 }, { "epoch": 0.10647296800294226, "grad_norm": 1.5755402295712613, "learning_rate": 9.847440280336856e-06, "loss": 0.8315, "step": 3474 }, { "epoch": 0.10650361652568346, "grad_norm": 1.5593878037817386, "learning_rate": 9.847318589819821e-06, "loss": 0.7373, "step": 3475 }, { "epoch": 0.10653426504842467, "grad_norm": 1.5114716484510125, "learning_rate": 9.847196851540916e-06, "loss": 0.7165, "step": 3476 }, { "epoch": 0.10656491357116588, "grad_norm": 1.3503399789960298, "learning_rate": 9.84707506550134e-06, "loss": 0.7123, "step": 3477 }, { "epoch": 0.10659556209390707, "grad_norm": 1.5661041580384016, "learning_rate": 9.846953231702294e-06, "loss": 0.7955, "step": 3478 }, { "epoch": 0.10662621061664827, "grad_norm": 1.337348996039882, "learning_rate": 9.846831350144977e-06, "loss": 0.7997, "step": 3479 }, { "epoch": 0.10665685913938948, "grad_norm": 1.773218363765627, "learning_rate": 9.84670942083059e-06, "loss": 0.8046, "step": 3480 }, { "epoch": 0.10668750766213068, "grad_norm": 1.544752294637598, "learning_rate": 9.846587443760337e-06, "loss": 0.8524, "step": 3481 }, { "epoch": 0.10671815618487189, "grad_norm": 1.4736871756127734, "learning_rate": 9.846465418935415e-06, "loss": 0.6881, "step": 3482 }, { "epoch": 0.1067488047076131, "grad_norm": 1.3605981143144221, "learning_rate": 9.84634334635703e-06, "loss": 0.7719, "step": 3483 }, { "epoch": 0.1067794532303543, "grad_norm": 1.3756062002667169, "learning_rate": 9.846221226026386e-06, "loss": 0.8105, "step": 3484 }, { "epoch": 0.10681010175309551, "grad_norm": 1.3647359223818845, "learning_rate": 9.846099057944683e-06, "loss": 0.7207, "step": 3485 }, { "epoch": 0.1068407502758367, "grad_norm": 1.4468249852520163, "learning_rate": 9.845976842113125e-06, "loss": 0.6774, "step": 3486 }, { "epoch": 0.1068713987985779, "grad_norm": 1.3897848679092706, "learning_rate": 9.845854578532918e-06, "loss": 0.6846, "step": 3487 }, { "epoch": 0.10690204732131911, "grad_norm": 1.4275972244274635, "learning_rate": 9.845732267205266e-06, "loss": 0.7492, "step": 3488 }, { "epoch": 0.10693269584406032, "grad_norm": 1.5514026630467685, "learning_rate": 9.845609908131374e-06, "loss": 0.726, "step": 3489 }, { "epoch": 0.10696334436680152, "grad_norm": 1.3344894131417202, "learning_rate": 9.845487501312449e-06, "loss": 0.7522, "step": 3490 }, { "epoch": 0.10699399288954273, "grad_norm": 1.9031615870669334, "learning_rate": 9.845365046749695e-06, "loss": 0.8122, "step": 3491 }, { "epoch": 0.10702464141228393, "grad_norm": 1.472020178644004, "learning_rate": 9.84524254444432e-06, "loss": 0.7205, "step": 3492 }, { "epoch": 0.10705528993502514, "grad_norm": 1.5424366000655845, "learning_rate": 9.845119994397529e-06, "loss": 0.7978, "step": 3493 }, { "epoch": 0.10708593845776633, "grad_norm": 1.5517305817354676, "learning_rate": 9.844997396610535e-06, "loss": 0.8417, "step": 3494 }, { "epoch": 0.10711658698050754, "grad_norm": 1.61148990747497, "learning_rate": 9.844874751084536e-06, "loss": 0.6696, "step": 3495 }, { "epoch": 0.10714723550324874, "grad_norm": 1.6132760281668603, "learning_rate": 9.84475205782075e-06, "loss": 0.7596, "step": 3496 }, { "epoch": 0.10717788402598995, "grad_norm": 1.396274645012318, "learning_rate": 9.844629316820382e-06, "loss": 0.7993, "step": 3497 }, { "epoch": 0.10720853254873115, "grad_norm": 1.4960093031108197, "learning_rate": 9.844506528084643e-06, "loss": 0.8758, "step": 3498 }, { "epoch": 0.10723918107147236, "grad_norm": 1.411138176895043, "learning_rate": 9.84438369161474e-06, "loss": 0.6729, "step": 3499 }, { "epoch": 0.10726982959421356, "grad_norm": 1.5500876127032763, "learning_rate": 9.844260807411886e-06, "loss": 0.78, "step": 3500 }, { "epoch": 0.10730047811695476, "grad_norm": 1.449358966094979, "learning_rate": 9.844137875477288e-06, "loss": 0.7535, "step": 3501 }, { "epoch": 0.10733112663969596, "grad_norm": 1.5874455991954515, "learning_rate": 9.844014895812163e-06, "loss": 0.7138, "step": 3502 }, { "epoch": 0.10736177516243717, "grad_norm": 1.5522827475694279, "learning_rate": 9.843891868417718e-06, "loss": 0.694, "step": 3503 }, { "epoch": 0.10739242368517837, "grad_norm": 1.6104571659303688, "learning_rate": 9.84376879329517e-06, "loss": 0.7158, "step": 3504 }, { "epoch": 0.10742307220791958, "grad_norm": 0.6943559254199687, "learning_rate": 9.843645670445726e-06, "loss": 0.4766, "step": 3505 }, { "epoch": 0.10745372073066078, "grad_norm": 1.3427001165245658, "learning_rate": 9.843522499870602e-06, "loss": 0.8095, "step": 3506 }, { "epoch": 0.10748436925340199, "grad_norm": 1.5383734518113679, "learning_rate": 9.843399281571013e-06, "loss": 0.7552, "step": 3507 }, { "epoch": 0.1075150177761432, "grad_norm": 1.4494518587065073, "learning_rate": 9.843276015548171e-06, "loss": 0.6548, "step": 3508 }, { "epoch": 0.10754566629888439, "grad_norm": 1.4052913415577637, "learning_rate": 9.843152701803292e-06, "loss": 0.6762, "step": 3509 }, { "epoch": 0.1075763148216256, "grad_norm": 1.439563658413052, "learning_rate": 9.843029340337589e-06, "loss": 0.6748, "step": 3510 }, { "epoch": 0.1076069633443668, "grad_norm": 1.437072636212407, "learning_rate": 9.84290593115228e-06, "loss": 0.7776, "step": 3511 }, { "epoch": 0.107637611867108, "grad_norm": 1.2677071521919197, "learning_rate": 9.842782474248578e-06, "loss": 0.7168, "step": 3512 }, { "epoch": 0.10766826038984921, "grad_norm": 1.4904680277056401, "learning_rate": 9.842658969627702e-06, "loss": 0.6986, "step": 3513 }, { "epoch": 0.10769890891259042, "grad_norm": 1.7558168090741613, "learning_rate": 9.842535417290868e-06, "loss": 0.7435, "step": 3514 }, { "epoch": 0.10772955743533162, "grad_norm": 1.498659890608375, "learning_rate": 9.842411817239293e-06, "loss": 0.7132, "step": 3515 }, { "epoch": 0.10776020595807283, "grad_norm": 1.6622725559828644, "learning_rate": 9.842288169474197e-06, "loss": 0.8068, "step": 3516 }, { "epoch": 0.10779085448081402, "grad_norm": 1.5713235273292891, "learning_rate": 9.842164473996797e-06, "loss": 0.8251, "step": 3517 }, { "epoch": 0.10782150300355522, "grad_norm": 1.5793044380673182, "learning_rate": 9.842040730808308e-06, "loss": 0.7153, "step": 3518 }, { "epoch": 0.10785215152629643, "grad_norm": 1.6362454091208645, "learning_rate": 9.841916939909956e-06, "loss": 0.6956, "step": 3519 }, { "epoch": 0.10788280004903764, "grad_norm": 1.4173237927924613, "learning_rate": 9.841793101302957e-06, "loss": 0.7426, "step": 3520 }, { "epoch": 0.10791344857177884, "grad_norm": 1.3997199474174105, "learning_rate": 9.841669214988532e-06, "loss": 0.7067, "step": 3521 }, { "epoch": 0.10794409709452005, "grad_norm": 1.305453930001574, "learning_rate": 9.8415452809679e-06, "loss": 0.7562, "step": 3522 }, { "epoch": 0.10797474561726125, "grad_norm": 1.447798707214034, "learning_rate": 9.841421299242284e-06, "loss": 0.6888, "step": 3523 }, { "epoch": 0.10800539414000246, "grad_norm": 1.3546723433484602, "learning_rate": 9.841297269812906e-06, "loss": 0.7961, "step": 3524 }, { "epoch": 0.10803604266274365, "grad_norm": 1.2527913647611253, "learning_rate": 9.841173192680987e-06, "loss": 0.6934, "step": 3525 }, { "epoch": 0.10806669118548486, "grad_norm": 1.3185338217439033, "learning_rate": 9.84104906784775e-06, "loss": 0.7039, "step": 3526 }, { "epoch": 0.10809733970822606, "grad_norm": 1.4078691689307254, "learning_rate": 9.840924895314418e-06, "loss": 0.7293, "step": 3527 }, { "epoch": 0.10812798823096727, "grad_norm": 1.507129979949809, "learning_rate": 9.840800675082214e-06, "loss": 0.8856, "step": 3528 }, { "epoch": 0.10815863675370847, "grad_norm": 1.3499737914650836, "learning_rate": 9.840676407152363e-06, "loss": 0.7279, "step": 3529 }, { "epoch": 0.10818928527644968, "grad_norm": 1.579287188311644, "learning_rate": 9.840552091526088e-06, "loss": 0.7529, "step": 3530 }, { "epoch": 0.10821993379919088, "grad_norm": 1.753472199072513, "learning_rate": 9.840427728204615e-06, "loss": 0.8146, "step": 3531 }, { "epoch": 0.10825058232193208, "grad_norm": 1.2136451559559764, "learning_rate": 9.84030331718917e-06, "loss": 0.6291, "step": 3532 }, { "epoch": 0.10828123084467328, "grad_norm": 1.6052115338125577, "learning_rate": 9.840178858480976e-06, "loss": 0.7191, "step": 3533 }, { "epoch": 0.10831187936741449, "grad_norm": 1.6078186429127788, "learning_rate": 9.840054352081262e-06, "loss": 0.8484, "step": 3534 }, { "epoch": 0.1083425278901557, "grad_norm": 1.6820061516594098, "learning_rate": 9.839929797991256e-06, "loss": 0.7529, "step": 3535 }, { "epoch": 0.1083731764128969, "grad_norm": 1.443607647490959, "learning_rate": 9.839805196212183e-06, "loss": 0.7435, "step": 3536 }, { "epoch": 0.1084038249356381, "grad_norm": 1.431898009122367, "learning_rate": 9.839680546745268e-06, "loss": 0.8623, "step": 3537 }, { "epoch": 0.10843447345837931, "grad_norm": 1.2155029153817145, "learning_rate": 9.839555849591744e-06, "loss": 0.7113, "step": 3538 }, { "epoch": 0.10846512198112052, "grad_norm": 1.3448493287791337, "learning_rate": 9.83943110475284e-06, "loss": 0.743, "step": 3539 }, { "epoch": 0.10849577050386171, "grad_norm": 1.3774393292544553, "learning_rate": 9.839306312229779e-06, "loss": 0.767, "step": 3540 }, { "epoch": 0.10852641902660291, "grad_norm": 1.5085994944516967, "learning_rate": 9.839181472023798e-06, "loss": 0.6687, "step": 3541 }, { "epoch": 0.10855706754934412, "grad_norm": 1.5626666248776586, "learning_rate": 9.839056584136123e-06, "loss": 0.6749, "step": 3542 }, { "epoch": 0.10858771607208532, "grad_norm": 1.3442810466351718, "learning_rate": 9.838931648567986e-06, "loss": 0.7534, "step": 3543 }, { "epoch": 0.10861836459482653, "grad_norm": 1.4184308583620477, "learning_rate": 9.838806665320615e-06, "loss": 0.814, "step": 3544 }, { "epoch": 0.10864901311756774, "grad_norm": 1.4208836076265288, "learning_rate": 9.838681634395245e-06, "loss": 0.6754, "step": 3545 }, { "epoch": 0.10867966164030894, "grad_norm": 1.478956058221595, "learning_rate": 9.838556555793108e-06, "loss": 0.6215, "step": 3546 }, { "epoch": 0.10871031016305015, "grad_norm": 1.5041399469713252, "learning_rate": 9.838431429515434e-06, "loss": 0.7015, "step": 3547 }, { "epoch": 0.10874095868579134, "grad_norm": 1.5571078734947046, "learning_rate": 9.838306255563459e-06, "loss": 0.8105, "step": 3548 }, { "epoch": 0.10877160720853254, "grad_norm": 1.4599633489034896, "learning_rate": 9.838181033938413e-06, "loss": 0.6883, "step": 3549 }, { "epoch": 0.10880225573127375, "grad_norm": 1.4189350357331323, "learning_rate": 9.838055764641533e-06, "loss": 0.749, "step": 3550 }, { "epoch": 0.10883290425401496, "grad_norm": 0.9460229880843364, "learning_rate": 9.83793044767405e-06, "loss": 0.477, "step": 3551 }, { "epoch": 0.10886355277675616, "grad_norm": 1.4548472818792393, "learning_rate": 9.837805083037199e-06, "loss": 0.7656, "step": 3552 }, { "epoch": 0.10889420129949737, "grad_norm": 1.4884600362201899, "learning_rate": 9.83767967073222e-06, "loss": 0.755, "step": 3553 }, { "epoch": 0.10892484982223857, "grad_norm": 0.479941627575936, "learning_rate": 9.837554210760344e-06, "loss": 0.4824, "step": 3554 }, { "epoch": 0.10895549834497978, "grad_norm": 1.508771570106034, "learning_rate": 9.837428703122807e-06, "loss": 0.7252, "step": 3555 }, { "epoch": 0.10898614686772097, "grad_norm": 1.3143769969925725, "learning_rate": 9.837303147820849e-06, "loss": 0.7355, "step": 3556 }, { "epoch": 0.10901679539046218, "grad_norm": 1.4670216820537623, "learning_rate": 9.837177544855705e-06, "loss": 0.6778, "step": 3557 }, { "epoch": 0.10904744391320338, "grad_norm": 1.4019360104900676, "learning_rate": 9.837051894228614e-06, "loss": 0.8079, "step": 3558 }, { "epoch": 0.10907809243594459, "grad_norm": 0.7885445574735598, "learning_rate": 9.836926195940811e-06, "loss": 0.4951, "step": 3559 }, { "epoch": 0.1091087409586858, "grad_norm": 1.3420224466831032, "learning_rate": 9.836800449993538e-06, "loss": 0.7153, "step": 3560 }, { "epoch": 0.109139389481427, "grad_norm": 1.5347594154166981, "learning_rate": 9.836674656388032e-06, "loss": 0.7686, "step": 3561 }, { "epoch": 0.1091700380041682, "grad_norm": 0.6521749930010029, "learning_rate": 9.836548815125536e-06, "loss": 0.5003, "step": 3562 }, { "epoch": 0.1092006865269094, "grad_norm": 1.409605085530475, "learning_rate": 9.836422926207283e-06, "loss": 0.5863, "step": 3563 }, { "epoch": 0.1092313350496506, "grad_norm": 1.6453053532726092, "learning_rate": 9.83629698963452e-06, "loss": 0.694, "step": 3564 }, { "epoch": 0.10926198357239181, "grad_norm": 1.5401414859328137, "learning_rate": 9.836171005408483e-06, "loss": 0.6337, "step": 3565 }, { "epoch": 0.10929263209513301, "grad_norm": 1.3548060454702615, "learning_rate": 9.836044973530417e-06, "loss": 0.7226, "step": 3566 }, { "epoch": 0.10932328061787422, "grad_norm": 1.5100103164556367, "learning_rate": 9.835918894001564e-06, "loss": 0.7525, "step": 3567 }, { "epoch": 0.10935392914061542, "grad_norm": 0.5312519187105282, "learning_rate": 9.835792766823162e-06, "loss": 0.484, "step": 3568 }, { "epoch": 0.10938457766335663, "grad_norm": 1.5279028758461726, "learning_rate": 9.835666591996458e-06, "loss": 0.7378, "step": 3569 }, { "epoch": 0.10941522618609784, "grad_norm": 1.3085672619278526, "learning_rate": 9.835540369522694e-06, "loss": 0.7182, "step": 3570 }, { "epoch": 0.10944587470883903, "grad_norm": 1.6035116807755434, "learning_rate": 9.835414099403113e-06, "loss": 0.7307, "step": 3571 }, { "epoch": 0.10947652323158023, "grad_norm": 1.6765190787349349, "learning_rate": 9.83528778163896e-06, "loss": 0.7538, "step": 3572 }, { "epoch": 0.10950717175432144, "grad_norm": 1.5364650274843208, "learning_rate": 9.83516141623148e-06, "loss": 0.7708, "step": 3573 }, { "epoch": 0.10953782027706264, "grad_norm": 0.547885494047939, "learning_rate": 9.835035003181917e-06, "loss": 0.4982, "step": 3574 }, { "epoch": 0.10956846879980385, "grad_norm": 1.4623814865976263, "learning_rate": 9.834908542491517e-06, "loss": 0.6951, "step": 3575 }, { "epoch": 0.10959911732254506, "grad_norm": 1.3752351018125113, "learning_rate": 9.834782034161525e-06, "loss": 0.7694, "step": 3576 }, { "epoch": 0.10962976584528626, "grad_norm": 1.439737360251926, "learning_rate": 9.834655478193188e-06, "loss": 0.709, "step": 3577 }, { "epoch": 0.10966041436802747, "grad_norm": 1.3536649470074822, "learning_rate": 9.834528874587756e-06, "loss": 0.6998, "step": 3578 }, { "epoch": 0.10969106289076866, "grad_norm": 2.0521136298770006, "learning_rate": 9.834402223346475e-06, "loss": 0.7444, "step": 3579 }, { "epoch": 0.10972171141350986, "grad_norm": 1.5694129898466733, "learning_rate": 9.834275524470588e-06, "loss": 0.9014, "step": 3580 }, { "epoch": 0.10975235993625107, "grad_norm": 0.4693870093657769, "learning_rate": 9.83414877796135e-06, "loss": 0.4692, "step": 3581 }, { "epoch": 0.10978300845899228, "grad_norm": 0.5092537274377446, "learning_rate": 9.834021983820007e-06, "loss": 0.4998, "step": 3582 }, { "epoch": 0.10981365698173348, "grad_norm": 1.443848607320248, "learning_rate": 9.833895142047809e-06, "loss": 0.728, "step": 3583 }, { "epoch": 0.10984430550447469, "grad_norm": 1.5861485510610343, "learning_rate": 9.833768252646003e-06, "loss": 0.8137, "step": 3584 }, { "epoch": 0.1098749540272159, "grad_norm": 1.7046658325548851, "learning_rate": 9.833641315615844e-06, "loss": 0.7616, "step": 3585 }, { "epoch": 0.1099056025499571, "grad_norm": 1.442230255022279, "learning_rate": 9.83351433095858e-06, "loss": 0.7101, "step": 3586 }, { "epoch": 0.10993625107269829, "grad_norm": 1.4099911063490769, "learning_rate": 9.833387298675461e-06, "loss": 0.6868, "step": 3587 }, { "epoch": 0.1099668995954395, "grad_norm": 1.4310499161690304, "learning_rate": 9.833260218767741e-06, "loss": 0.6835, "step": 3588 }, { "epoch": 0.1099975481181807, "grad_norm": 1.5019321033651967, "learning_rate": 9.833133091236673e-06, "loss": 0.7245, "step": 3589 }, { "epoch": 0.11002819664092191, "grad_norm": 1.3439076319885825, "learning_rate": 9.833005916083506e-06, "loss": 0.7197, "step": 3590 }, { "epoch": 0.11005884516366311, "grad_norm": 1.3790954972903582, "learning_rate": 9.832878693309495e-06, "loss": 0.7808, "step": 3591 }, { "epoch": 0.11008949368640432, "grad_norm": 1.4286313732276559, "learning_rate": 9.832751422915896e-06, "loss": 0.7007, "step": 3592 }, { "epoch": 0.11012014220914552, "grad_norm": 1.5536381308376352, "learning_rate": 9.83262410490396e-06, "loss": 0.814, "step": 3593 }, { "epoch": 0.11015079073188672, "grad_norm": 1.3843475409728756, "learning_rate": 9.832496739274942e-06, "loss": 0.6662, "step": 3594 }, { "epoch": 0.11018143925462792, "grad_norm": 1.418124182963991, "learning_rate": 9.832369326030096e-06, "loss": 0.6941, "step": 3595 }, { "epoch": 0.11021208777736913, "grad_norm": 1.3636790079085914, "learning_rate": 9.83224186517068e-06, "loss": 0.675, "step": 3596 }, { "epoch": 0.11024273630011033, "grad_norm": 1.5843650673976695, "learning_rate": 9.832114356697948e-06, "loss": 0.6718, "step": 3597 }, { "epoch": 0.11027338482285154, "grad_norm": 0.5360894220831193, "learning_rate": 9.831986800613157e-06, "loss": 0.4773, "step": 3598 }, { "epoch": 0.11030403334559274, "grad_norm": 1.3328548096444357, "learning_rate": 9.831859196917563e-06, "loss": 0.7275, "step": 3599 }, { "epoch": 0.11033468186833395, "grad_norm": 1.275951196303165, "learning_rate": 9.831731545612423e-06, "loss": 0.6741, "step": 3600 }, { "epoch": 0.11036533039107516, "grad_norm": 1.4617906044283033, "learning_rate": 9.831603846698998e-06, "loss": 0.799, "step": 3601 }, { "epoch": 0.11039597891381635, "grad_norm": 1.419898804175721, "learning_rate": 9.831476100178543e-06, "loss": 0.7228, "step": 3602 }, { "epoch": 0.11042662743655755, "grad_norm": 1.5891356682775297, "learning_rate": 9.83134830605232e-06, "loss": 0.7241, "step": 3603 }, { "epoch": 0.11045727595929876, "grad_norm": 1.5871200502240868, "learning_rate": 9.831220464321584e-06, "loss": 0.6991, "step": 3604 }, { "epoch": 0.11048792448203996, "grad_norm": 1.2148549026684228, "learning_rate": 9.831092574987596e-06, "loss": 0.6489, "step": 3605 }, { "epoch": 0.11051857300478117, "grad_norm": 1.655031841504178, "learning_rate": 9.830964638051618e-06, "loss": 0.7196, "step": 3606 }, { "epoch": 0.11054922152752238, "grad_norm": 1.3850917265688076, "learning_rate": 9.830836653514909e-06, "loss": 0.6942, "step": 3607 }, { "epoch": 0.11057987005026358, "grad_norm": 1.3038303720653337, "learning_rate": 9.830708621378731e-06, "loss": 0.7369, "step": 3608 }, { "epoch": 0.11061051857300479, "grad_norm": 1.3386747337380465, "learning_rate": 9.830580541644343e-06, "loss": 0.7893, "step": 3609 }, { "epoch": 0.11064116709574598, "grad_norm": 0.5399570057390197, "learning_rate": 9.830452414313012e-06, "loss": 0.5088, "step": 3610 }, { "epoch": 0.11067181561848718, "grad_norm": 1.334388505114644, "learning_rate": 9.830324239385996e-06, "loss": 0.737, "step": 3611 }, { "epoch": 0.11070246414122839, "grad_norm": 1.4224773277197365, "learning_rate": 9.830196016864558e-06, "loss": 0.8107, "step": 3612 }, { "epoch": 0.1107331126639696, "grad_norm": 1.477033496964899, "learning_rate": 9.830067746749964e-06, "loss": 0.7655, "step": 3613 }, { "epoch": 0.1107637611867108, "grad_norm": 1.5985666594287158, "learning_rate": 9.829939429043478e-06, "loss": 0.7549, "step": 3614 }, { "epoch": 0.11079440970945201, "grad_norm": 1.4069354015061408, "learning_rate": 9.82981106374636e-06, "loss": 0.7148, "step": 3615 }, { "epoch": 0.11082505823219321, "grad_norm": 1.448550072799484, "learning_rate": 9.82968265085988e-06, "loss": 0.746, "step": 3616 }, { "epoch": 0.11085570675493442, "grad_norm": 1.5238015581475155, "learning_rate": 9.8295541903853e-06, "loss": 0.7488, "step": 3617 }, { "epoch": 0.11088635527767561, "grad_norm": 1.546045646564679, "learning_rate": 9.829425682323889e-06, "loss": 0.7823, "step": 3618 }, { "epoch": 0.11091700380041682, "grad_norm": 1.8416556776558364, "learning_rate": 9.829297126676909e-06, "loss": 0.6888, "step": 3619 }, { "epoch": 0.11094765232315802, "grad_norm": 1.316315726097507, "learning_rate": 9.82916852344563e-06, "loss": 0.6758, "step": 3620 }, { "epoch": 0.11097830084589923, "grad_norm": 1.3970012853893183, "learning_rate": 9.829039872631317e-06, "loss": 0.7419, "step": 3621 }, { "epoch": 0.11100894936864043, "grad_norm": 0.498517884461205, "learning_rate": 9.828911174235238e-06, "loss": 0.4623, "step": 3622 }, { "epoch": 0.11103959789138164, "grad_norm": 1.3187152923815093, "learning_rate": 9.828782428258663e-06, "loss": 0.7811, "step": 3623 }, { "epoch": 0.11107024641412284, "grad_norm": 1.599181443690254, "learning_rate": 9.828653634702858e-06, "loss": 0.7406, "step": 3624 }, { "epoch": 0.11110089493686404, "grad_norm": 1.3499984267298148, "learning_rate": 9.828524793569095e-06, "loss": 0.7348, "step": 3625 }, { "epoch": 0.11113154345960524, "grad_norm": 1.4568495177356948, "learning_rate": 9.82839590485864e-06, "loss": 0.719, "step": 3626 }, { "epoch": 0.11116219198234645, "grad_norm": 1.351983856034592, "learning_rate": 9.828266968572765e-06, "loss": 0.7608, "step": 3627 }, { "epoch": 0.11119284050508765, "grad_norm": 1.6093424509956062, "learning_rate": 9.828137984712741e-06, "loss": 0.7219, "step": 3628 }, { "epoch": 0.11122348902782886, "grad_norm": 1.2699584711967322, "learning_rate": 9.828008953279839e-06, "loss": 0.6438, "step": 3629 }, { "epoch": 0.11125413755057006, "grad_norm": 1.3768244858188217, "learning_rate": 9.827879874275328e-06, "loss": 0.8046, "step": 3630 }, { "epoch": 0.11128478607331127, "grad_norm": 1.4843259902555737, "learning_rate": 9.827750747700481e-06, "loss": 0.7732, "step": 3631 }, { "epoch": 0.11131543459605248, "grad_norm": 0.506145214139227, "learning_rate": 9.827621573556573e-06, "loss": 0.5136, "step": 3632 }, { "epoch": 0.11134608311879367, "grad_norm": 1.374775530497064, "learning_rate": 9.827492351844872e-06, "loss": 0.7626, "step": 3633 }, { "epoch": 0.11137673164153487, "grad_norm": 0.4910657567787789, "learning_rate": 9.827363082566655e-06, "loss": 0.4745, "step": 3634 }, { "epoch": 0.11140738016427608, "grad_norm": 1.1418059027688798, "learning_rate": 9.827233765723193e-06, "loss": 0.7144, "step": 3635 }, { "epoch": 0.11143802868701728, "grad_norm": 1.318307312151209, "learning_rate": 9.827104401315764e-06, "loss": 0.6801, "step": 3636 }, { "epoch": 0.11146867720975849, "grad_norm": 1.4287535530265838, "learning_rate": 9.82697498934564e-06, "loss": 0.6736, "step": 3637 }, { "epoch": 0.1114993257324997, "grad_norm": 1.3473972615395398, "learning_rate": 9.826845529814093e-06, "loss": 0.7448, "step": 3638 }, { "epoch": 0.1115299742552409, "grad_norm": 1.4220838284975836, "learning_rate": 9.826716022722405e-06, "loss": 0.7164, "step": 3639 }, { "epoch": 0.11156062277798211, "grad_norm": 1.4780253579998235, "learning_rate": 9.826586468071848e-06, "loss": 0.7721, "step": 3640 }, { "epoch": 0.1115912713007233, "grad_norm": 1.3343981795010111, "learning_rate": 9.826456865863699e-06, "loss": 0.8021, "step": 3641 }, { "epoch": 0.1116219198234645, "grad_norm": 1.3570230728152328, "learning_rate": 9.826327216099237e-06, "loss": 0.689, "step": 3642 }, { "epoch": 0.11165256834620571, "grad_norm": 1.4860670962316609, "learning_rate": 9.826197518779738e-06, "loss": 0.8676, "step": 3643 }, { "epoch": 0.11168321686894692, "grad_norm": 1.5964696052472391, "learning_rate": 9.826067773906479e-06, "loss": 0.8436, "step": 3644 }, { "epoch": 0.11171386539168812, "grad_norm": 1.3304135303953373, "learning_rate": 9.82593798148074e-06, "loss": 0.7495, "step": 3645 }, { "epoch": 0.11174451391442933, "grad_norm": 1.3616856394911399, "learning_rate": 9.825808141503798e-06, "loss": 0.7226, "step": 3646 }, { "epoch": 0.11177516243717053, "grad_norm": 1.4925040752686982, "learning_rate": 9.825678253976935e-06, "loss": 0.7288, "step": 3647 }, { "epoch": 0.11180581095991174, "grad_norm": 1.257551030370916, "learning_rate": 9.82554831890143e-06, "loss": 0.6923, "step": 3648 }, { "epoch": 0.11183645948265293, "grad_norm": 1.4652703832687364, "learning_rate": 9.825418336278563e-06, "loss": 0.6672, "step": 3649 }, { "epoch": 0.11186710800539414, "grad_norm": 1.80290515590746, "learning_rate": 9.825288306109612e-06, "loss": 0.6904, "step": 3650 }, { "epoch": 0.11189775652813534, "grad_norm": 1.5077031471038234, "learning_rate": 9.825158228395863e-06, "loss": 0.8178, "step": 3651 }, { "epoch": 0.11192840505087655, "grad_norm": 1.5696073956357972, "learning_rate": 9.825028103138596e-06, "loss": 0.7744, "step": 3652 }, { "epoch": 0.11195905357361775, "grad_norm": 1.5304447574934457, "learning_rate": 9.82489793033909e-06, "loss": 0.6967, "step": 3653 }, { "epoch": 0.11198970209635896, "grad_norm": 1.5725620716710833, "learning_rate": 9.824767709998632e-06, "loss": 0.6341, "step": 3654 }, { "epoch": 0.11202035061910016, "grad_norm": 1.490064365554516, "learning_rate": 9.824637442118503e-06, "loss": 0.7395, "step": 3655 }, { "epoch": 0.11205099914184136, "grad_norm": 1.2241969629086757, "learning_rate": 9.824507126699986e-06, "loss": 0.6644, "step": 3656 }, { "epoch": 0.11208164766458256, "grad_norm": 1.400819077097427, "learning_rate": 9.824376763744367e-06, "loss": 0.7261, "step": 3657 }, { "epoch": 0.11211229618732377, "grad_norm": 1.6281342390454954, "learning_rate": 9.824246353252928e-06, "loss": 0.7646, "step": 3658 }, { "epoch": 0.11214294471006497, "grad_norm": 1.4914186608282263, "learning_rate": 9.824115895226956e-06, "loss": 0.6857, "step": 3659 }, { "epoch": 0.11217359323280618, "grad_norm": 0.5958284931711767, "learning_rate": 9.823985389667736e-06, "loss": 0.4732, "step": 3660 }, { "epoch": 0.11220424175554738, "grad_norm": 1.4898929560833085, "learning_rate": 9.823854836576554e-06, "loss": 0.7271, "step": 3661 }, { "epoch": 0.11223489027828859, "grad_norm": 1.3199792757394828, "learning_rate": 9.823724235954696e-06, "loss": 0.7737, "step": 3662 }, { "epoch": 0.1122655388010298, "grad_norm": 1.3951918931715444, "learning_rate": 9.823593587803448e-06, "loss": 0.7618, "step": 3663 }, { "epoch": 0.11229618732377099, "grad_norm": 1.497004418101236, "learning_rate": 9.823462892124098e-06, "loss": 0.7934, "step": 3664 }, { "epoch": 0.1123268358465122, "grad_norm": 1.3937986889629794, "learning_rate": 9.823332148917937e-06, "loss": 0.7515, "step": 3665 }, { "epoch": 0.1123574843692534, "grad_norm": 1.437774643677947, "learning_rate": 9.823201358186248e-06, "loss": 0.71, "step": 3666 }, { "epoch": 0.1123881328919946, "grad_norm": 1.3890896153553092, "learning_rate": 9.823070519930321e-06, "loss": 0.7839, "step": 3667 }, { "epoch": 0.11241878141473581, "grad_norm": 1.8925229276666462, "learning_rate": 9.822939634151447e-06, "loss": 0.7457, "step": 3668 }, { "epoch": 0.11244942993747702, "grad_norm": 1.592961440646377, "learning_rate": 9.822808700850914e-06, "loss": 0.6761, "step": 3669 }, { "epoch": 0.11248007846021822, "grad_norm": 1.5406403936864492, "learning_rate": 9.822677720030015e-06, "loss": 0.6532, "step": 3670 }, { "epoch": 0.11251072698295943, "grad_norm": 0.5127014669333404, "learning_rate": 9.822546691690038e-06, "loss": 0.4636, "step": 3671 }, { "epoch": 0.11254137550570062, "grad_norm": 1.1929891622963427, "learning_rate": 9.822415615832272e-06, "loss": 0.6904, "step": 3672 }, { "epoch": 0.11257202402844182, "grad_norm": 1.503635961203795, "learning_rate": 9.822284492458014e-06, "loss": 0.7567, "step": 3673 }, { "epoch": 0.11260267255118303, "grad_norm": 1.3486351245740855, "learning_rate": 9.822153321568552e-06, "loss": 0.7544, "step": 3674 }, { "epoch": 0.11263332107392424, "grad_norm": 1.579601868843188, "learning_rate": 9.822022103165178e-06, "loss": 0.7852, "step": 3675 }, { "epoch": 0.11266396959666544, "grad_norm": 1.4831963830980024, "learning_rate": 9.821890837249189e-06, "loss": 0.7674, "step": 3676 }, { "epoch": 0.11269461811940665, "grad_norm": 1.351301781820859, "learning_rate": 9.821759523821874e-06, "loss": 0.7015, "step": 3677 }, { "epoch": 0.11272526664214785, "grad_norm": 1.2665247963751416, "learning_rate": 9.821628162884532e-06, "loss": 0.6754, "step": 3678 }, { "epoch": 0.11275591516488906, "grad_norm": 1.4589774688088735, "learning_rate": 9.82149675443845e-06, "loss": 0.7775, "step": 3679 }, { "epoch": 0.11278656368763025, "grad_norm": 0.5000088715169034, "learning_rate": 9.821365298484929e-06, "loss": 0.4672, "step": 3680 }, { "epoch": 0.11281721221037146, "grad_norm": 1.477967451720392, "learning_rate": 9.82123379502526e-06, "loss": 0.7513, "step": 3681 }, { "epoch": 0.11284786073311266, "grad_norm": 1.4572853693002508, "learning_rate": 9.821102244060743e-06, "loss": 0.7655, "step": 3682 }, { "epoch": 0.11287850925585387, "grad_norm": 1.4344336963675806, "learning_rate": 9.82097064559267e-06, "loss": 0.6574, "step": 3683 }, { "epoch": 0.11290915777859507, "grad_norm": 1.3509583698484995, "learning_rate": 9.820838999622341e-06, "loss": 0.7178, "step": 3684 }, { "epoch": 0.11293980630133628, "grad_norm": 1.5322131245299893, "learning_rate": 9.820707306151055e-06, "loss": 0.7958, "step": 3685 }, { "epoch": 0.11297045482407748, "grad_norm": 1.5479701479580195, "learning_rate": 9.820575565180102e-06, "loss": 0.7796, "step": 3686 }, { "epoch": 0.11300110334681868, "grad_norm": 1.575432345719521, "learning_rate": 9.820443776710786e-06, "loss": 0.8682, "step": 3687 }, { "epoch": 0.11303175186955988, "grad_norm": 1.4013326412470135, "learning_rate": 9.820311940744405e-06, "loss": 0.7522, "step": 3688 }, { "epoch": 0.11306240039230109, "grad_norm": 1.4092282919534438, "learning_rate": 9.820180057282256e-06, "loss": 0.6968, "step": 3689 }, { "epoch": 0.1130930489150423, "grad_norm": 1.5427122195202956, "learning_rate": 9.82004812632564e-06, "loss": 0.7267, "step": 3690 }, { "epoch": 0.1131236974377835, "grad_norm": 1.3965328922762872, "learning_rate": 9.819916147875857e-06, "loss": 0.7843, "step": 3691 }, { "epoch": 0.1131543459605247, "grad_norm": 1.5313257986046447, "learning_rate": 9.819784121934206e-06, "loss": 0.8987, "step": 3692 }, { "epoch": 0.11318499448326591, "grad_norm": 1.3046167192914504, "learning_rate": 9.819652048501988e-06, "loss": 0.7184, "step": 3693 }, { "epoch": 0.11321564300600712, "grad_norm": 1.530864483134691, "learning_rate": 9.819519927580507e-06, "loss": 0.7372, "step": 3694 }, { "epoch": 0.11324629152874831, "grad_norm": 1.4179322832158658, "learning_rate": 9.819387759171062e-06, "loss": 0.6967, "step": 3695 }, { "epoch": 0.11327694005148951, "grad_norm": 1.468414973468053, "learning_rate": 9.819255543274957e-06, "loss": 0.7129, "step": 3696 }, { "epoch": 0.11330758857423072, "grad_norm": 0.5403492083927753, "learning_rate": 9.819123279893494e-06, "loss": 0.4755, "step": 3697 }, { "epoch": 0.11333823709697192, "grad_norm": 1.3376541418667656, "learning_rate": 9.818990969027977e-06, "loss": 0.7439, "step": 3698 }, { "epoch": 0.11336888561971313, "grad_norm": 1.3755730949547083, "learning_rate": 9.818858610679706e-06, "loss": 0.7412, "step": 3699 }, { "epoch": 0.11339953414245434, "grad_norm": 1.411405927910907, "learning_rate": 9.818726204849991e-06, "loss": 0.8349, "step": 3700 }, { "epoch": 0.11343018266519554, "grad_norm": 0.5029240601757051, "learning_rate": 9.818593751540134e-06, "loss": 0.484, "step": 3701 }, { "epoch": 0.11346083118793675, "grad_norm": 0.49440848170921003, "learning_rate": 9.818461250751438e-06, "loss": 0.4821, "step": 3702 }, { "epoch": 0.11349147971067794, "grad_norm": 1.5292521608636398, "learning_rate": 9.818328702485212e-06, "loss": 0.8454, "step": 3703 }, { "epoch": 0.11352212823341915, "grad_norm": 0.5128118152534197, "learning_rate": 9.81819610674276e-06, "loss": 0.48, "step": 3704 }, { "epoch": 0.11355277675616035, "grad_norm": 1.4356890998066236, "learning_rate": 9.81806346352539e-06, "loss": 0.8171, "step": 3705 }, { "epoch": 0.11358342527890156, "grad_norm": 0.47935556672940616, "learning_rate": 9.817930772834406e-06, "loss": 0.4728, "step": 3706 }, { "epoch": 0.11361407380164276, "grad_norm": 1.310895725763089, "learning_rate": 9.817798034671117e-06, "loss": 0.8065, "step": 3707 }, { "epoch": 0.11364472232438397, "grad_norm": 1.5290066841066956, "learning_rate": 9.817665249036833e-06, "loss": 0.8129, "step": 3708 }, { "epoch": 0.11367537084712517, "grad_norm": 1.3981208499526363, "learning_rate": 9.81753241593286e-06, "loss": 0.6207, "step": 3709 }, { "epoch": 0.11370601936986638, "grad_norm": 0.4950435183619038, "learning_rate": 9.817399535360507e-06, "loss": 0.4587, "step": 3710 }, { "epoch": 0.11373666789260757, "grad_norm": 1.4998269956182853, "learning_rate": 9.817266607321085e-06, "loss": 0.8385, "step": 3711 }, { "epoch": 0.11376731641534878, "grad_norm": 0.5012410099458258, "learning_rate": 9.817133631815902e-06, "loss": 0.4744, "step": 3712 }, { "epoch": 0.11379796493808998, "grad_norm": 0.4712922931927885, "learning_rate": 9.817000608846269e-06, "loss": 0.4768, "step": 3713 }, { "epoch": 0.11382861346083119, "grad_norm": 0.49892827493392694, "learning_rate": 9.816867538413497e-06, "loss": 0.4566, "step": 3714 }, { "epoch": 0.1138592619835724, "grad_norm": 1.3674872301825372, "learning_rate": 9.816734420518895e-06, "loss": 0.7726, "step": 3715 }, { "epoch": 0.1138899105063136, "grad_norm": 1.4496800503662781, "learning_rate": 9.816601255163777e-06, "loss": 0.8392, "step": 3716 }, { "epoch": 0.1139205590290548, "grad_norm": 1.4458870830931134, "learning_rate": 9.816468042349456e-06, "loss": 0.8199, "step": 3717 }, { "epoch": 0.113951207551796, "grad_norm": 1.3633689840449275, "learning_rate": 9.81633478207724e-06, "loss": 0.7448, "step": 3718 }, { "epoch": 0.1139818560745372, "grad_norm": 1.3734541178975932, "learning_rate": 9.816201474348448e-06, "loss": 0.7156, "step": 3719 }, { "epoch": 0.11401250459727841, "grad_norm": 1.37294380047313, "learning_rate": 9.81606811916439e-06, "loss": 0.6878, "step": 3720 }, { "epoch": 0.11404315312001961, "grad_norm": 1.334765900094853, "learning_rate": 9.815934716526378e-06, "loss": 0.7244, "step": 3721 }, { "epoch": 0.11407380164276082, "grad_norm": 1.3466701820365876, "learning_rate": 9.815801266435731e-06, "loss": 0.7722, "step": 3722 }, { "epoch": 0.11410445016550202, "grad_norm": 1.5248291399729166, "learning_rate": 9.815667768893763e-06, "loss": 0.8281, "step": 3723 }, { "epoch": 0.11413509868824323, "grad_norm": 1.438224045702674, "learning_rate": 9.815534223901788e-06, "loss": 0.7419, "step": 3724 }, { "epoch": 0.11416574721098444, "grad_norm": 1.4537138521816682, "learning_rate": 9.81540063146112e-06, "loss": 0.72, "step": 3725 }, { "epoch": 0.11419639573372563, "grad_norm": 1.5257094628707517, "learning_rate": 9.81526699157308e-06, "loss": 0.7095, "step": 3726 }, { "epoch": 0.11422704425646683, "grad_norm": 1.3608223191054796, "learning_rate": 9.815133304238982e-06, "loss": 0.6068, "step": 3727 }, { "epoch": 0.11425769277920804, "grad_norm": 1.6189308869251022, "learning_rate": 9.814999569460143e-06, "loss": 0.7138, "step": 3728 }, { "epoch": 0.11428834130194925, "grad_norm": 1.468473830885154, "learning_rate": 9.81486578723788e-06, "loss": 0.7416, "step": 3729 }, { "epoch": 0.11431898982469045, "grad_norm": 1.262421866416767, "learning_rate": 9.814731957573514e-06, "loss": 0.6952, "step": 3730 }, { "epoch": 0.11434963834743166, "grad_norm": 1.3462393862376618, "learning_rate": 9.81459808046836e-06, "loss": 0.7279, "step": 3731 }, { "epoch": 0.11438028687017286, "grad_norm": 0.8210405721126303, "learning_rate": 9.814464155923741e-06, "loss": 0.4804, "step": 3732 }, { "epoch": 0.11441093539291407, "grad_norm": 1.4005506989747005, "learning_rate": 9.814330183940976e-06, "loss": 0.6813, "step": 3733 }, { "epoch": 0.11444158391565526, "grad_norm": 1.2751783904494436, "learning_rate": 9.814196164521384e-06, "loss": 0.7426, "step": 3734 }, { "epoch": 0.11447223243839647, "grad_norm": 1.4607510730447433, "learning_rate": 9.814062097666284e-06, "loss": 0.6932, "step": 3735 }, { "epoch": 0.11450288096113767, "grad_norm": 0.500746590595629, "learning_rate": 9.813927983376998e-06, "loss": 0.4542, "step": 3736 }, { "epoch": 0.11453352948387888, "grad_norm": 1.6028247791382495, "learning_rate": 9.813793821654849e-06, "loss": 0.8741, "step": 3737 }, { "epoch": 0.11456417800662008, "grad_norm": 1.367450284331781, "learning_rate": 9.813659612501156e-06, "loss": 0.7831, "step": 3738 }, { "epoch": 0.11459482652936129, "grad_norm": 1.3651530677514778, "learning_rate": 9.813525355917244e-06, "loss": 0.6687, "step": 3739 }, { "epoch": 0.1146254750521025, "grad_norm": 1.6531795192180667, "learning_rate": 9.813391051904436e-06, "loss": 0.8337, "step": 3740 }, { "epoch": 0.1146561235748437, "grad_norm": 1.3765595333361276, "learning_rate": 9.813256700464054e-06, "loss": 0.6863, "step": 3741 }, { "epoch": 0.11468677209758489, "grad_norm": 1.4638926707545512, "learning_rate": 9.813122301597422e-06, "loss": 0.7779, "step": 3742 }, { "epoch": 0.1147174206203261, "grad_norm": 1.5103875735726175, "learning_rate": 9.812987855305864e-06, "loss": 0.7954, "step": 3743 }, { "epoch": 0.1147480691430673, "grad_norm": 1.4324287356179948, "learning_rate": 9.812853361590707e-06, "loss": 0.7611, "step": 3744 }, { "epoch": 0.11477871766580851, "grad_norm": 1.5299493731645806, "learning_rate": 9.812718820453273e-06, "loss": 0.6897, "step": 3745 }, { "epoch": 0.11480936618854971, "grad_norm": 1.570441217922952, "learning_rate": 9.81258423189489e-06, "loss": 0.7293, "step": 3746 }, { "epoch": 0.11484001471129092, "grad_norm": 1.4348625260139234, "learning_rate": 9.812449595916883e-06, "loss": 0.701, "step": 3747 }, { "epoch": 0.11487066323403212, "grad_norm": 1.3232020853353106, "learning_rate": 9.812314912520577e-06, "loss": 0.7174, "step": 3748 }, { "epoch": 0.11490131175677332, "grad_norm": 1.4314933779590042, "learning_rate": 9.812180181707303e-06, "loss": 0.832, "step": 3749 }, { "epoch": 0.11493196027951452, "grad_norm": 1.4481523545434747, "learning_rate": 9.812045403478385e-06, "loss": 0.6796, "step": 3750 }, { "epoch": 0.11496260880225573, "grad_norm": 1.4925529988877722, "learning_rate": 9.811910577835154e-06, "loss": 0.7338, "step": 3751 }, { "epoch": 0.11499325732499693, "grad_norm": 1.0171013561978106, "learning_rate": 9.811775704778934e-06, "loss": 0.4933, "step": 3752 }, { "epoch": 0.11502390584773814, "grad_norm": 1.3898523402569158, "learning_rate": 9.81164078431106e-06, "loss": 0.6341, "step": 3753 }, { "epoch": 0.11505455437047934, "grad_norm": 1.564123507798484, "learning_rate": 9.811505816432857e-06, "loss": 0.7925, "step": 3754 }, { "epoch": 0.11508520289322055, "grad_norm": 1.4722746916109903, "learning_rate": 9.811370801145656e-06, "loss": 0.6878, "step": 3755 }, { "epoch": 0.11511585141596176, "grad_norm": 1.5533889915229933, "learning_rate": 9.811235738450787e-06, "loss": 0.7534, "step": 3756 }, { "epoch": 0.11514649993870295, "grad_norm": 1.391831431271255, "learning_rate": 9.811100628349582e-06, "loss": 0.7927, "step": 3757 }, { "epoch": 0.11517714846144415, "grad_norm": 1.446792209171585, "learning_rate": 9.810965470843373e-06, "loss": 0.7229, "step": 3758 }, { "epoch": 0.11520779698418536, "grad_norm": 0.8803305930329646, "learning_rate": 9.810830265933488e-06, "loss": 0.495, "step": 3759 }, { "epoch": 0.11523844550692657, "grad_norm": 1.4309733949063093, "learning_rate": 9.810695013621261e-06, "loss": 0.7666, "step": 3760 }, { "epoch": 0.11526909402966777, "grad_norm": 1.5829343295027687, "learning_rate": 9.810559713908027e-06, "loss": 0.6864, "step": 3761 }, { "epoch": 0.11529974255240898, "grad_norm": 1.4272657348033897, "learning_rate": 9.810424366795116e-06, "loss": 0.8324, "step": 3762 }, { "epoch": 0.11533039107515018, "grad_norm": 1.4003136734190869, "learning_rate": 9.810288972283864e-06, "loss": 0.7706, "step": 3763 }, { "epoch": 0.11536103959789139, "grad_norm": 1.2590457115718667, "learning_rate": 9.810153530375604e-06, "loss": 0.7363, "step": 3764 }, { "epoch": 0.11539168812063258, "grad_norm": 1.4011112919828101, "learning_rate": 9.810018041071669e-06, "loss": 0.7463, "step": 3765 }, { "epoch": 0.11542233664337379, "grad_norm": 1.5222028500740838, "learning_rate": 9.809882504373397e-06, "loss": 0.7263, "step": 3766 }, { "epoch": 0.11545298516611499, "grad_norm": 0.6088485765438538, "learning_rate": 9.809746920282121e-06, "loss": 0.4736, "step": 3767 }, { "epoch": 0.1154836336888562, "grad_norm": 1.2600596824151484, "learning_rate": 9.809611288799178e-06, "loss": 0.7058, "step": 3768 }, { "epoch": 0.1155142822115974, "grad_norm": 1.3874276639311642, "learning_rate": 9.809475609925908e-06, "loss": 0.7276, "step": 3769 }, { "epoch": 0.11554493073433861, "grad_norm": 0.4846360435096548, "learning_rate": 9.80933988366364e-06, "loss": 0.492, "step": 3770 }, { "epoch": 0.11557557925707981, "grad_norm": 1.3869134945183543, "learning_rate": 9.809204110013717e-06, "loss": 0.7719, "step": 3771 }, { "epoch": 0.11560622777982102, "grad_norm": 1.4053518754682686, "learning_rate": 9.809068288977475e-06, "loss": 0.757, "step": 3772 }, { "epoch": 0.11563687630256221, "grad_norm": 1.3695483369686459, "learning_rate": 9.808932420556252e-06, "loss": 0.6766, "step": 3773 }, { "epoch": 0.11566752482530342, "grad_norm": 1.514355366699533, "learning_rate": 9.80879650475139e-06, "loss": 0.7564, "step": 3774 }, { "epoch": 0.11569817334804462, "grad_norm": 0.6185947319274725, "learning_rate": 9.808660541564224e-06, "loss": 0.475, "step": 3775 }, { "epoch": 0.11572882187078583, "grad_norm": 0.5614877415076838, "learning_rate": 9.808524530996095e-06, "loss": 0.4843, "step": 3776 }, { "epoch": 0.11575947039352703, "grad_norm": 0.4891359954108257, "learning_rate": 9.808388473048343e-06, "loss": 0.4866, "step": 3777 }, { "epoch": 0.11579011891626824, "grad_norm": 1.2654146908387247, "learning_rate": 9.808252367722311e-06, "loss": 0.702, "step": 3778 }, { "epoch": 0.11582076743900944, "grad_norm": 1.444644697913943, "learning_rate": 9.808116215019336e-06, "loss": 0.7124, "step": 3779 }, { "epoch": 0.11585141596175064, "grad_norm": 0.5640738810138854, "learning_rate": 9.807980014940764e-06, "loss": 0.4646, "step": 3780 }, { "epoch": 0.11588206448449184, "grad_norm": 1.2431450155405612, "learning_rate": 9.807843767487933e-06, "loss": 0.7516, "step": 3781 }, { "epoch": 0.11591271300723305, "grad_norm": 0.577769676829732, "learning_rate": 9.807707472662188e-06, "loss": 0.4647, "step": 3782 }, { "epoch": 0.11594336152997425, "grad_norm": 1.5595095223980977, "learning_rate": 9.80757113046487e-06, "loss": 0.7457, "step": 3783 }, { "epoch": 0.11597401005271546, "grad_norm": 0.519595116084155, "learning_rate": 9.807434740897325e-06, "loss": 0.4577, "step": 3784 }, { "epoch": 0.11600465857545667, "grad_norm": 3.0033655803939583, "learning_rate": 9.807298303960895e-06, "loss": 0.7031, "step": 3785 }, { "epoch": 0.11603530709819787, "grad_norm": 1.3257549590273068, "learning_rate": 9.807161819656925e-06, "loss": 0.7325, "step": 3786 }, { "epoch": 0.11606595562093908, "grad_norm": 1.4451310550218714, "learning_rate": 9.80702528798676e-06, "loss": 0.7073, "step": 3787 }, { "epoch": 0.11609660414368027, "grad_norm": 1.4053101525274505, "learning_rate": 9.806888708951743e-06, "loss": 0.7951, "step": 3788 }, { "epoch": 0.11612725266642147, "grad_norm": 1.5107164006082487, "learning_rate": 9.806752082553223e-06, "loss": 0.6998, "step": 3789 }, { "epoch": 0.11615790118916268, "grad_norm": 1.3641438073033245, "learning_rate": 9.806615408792545e-06, "loss": 0.7049, "step": 3790 }, { "epoch": 0.11618854971190389, "grad_norm": 1.415959187014773, "learning_rate": 9.806478687671055e-06, "loss": 0.7002, "step": 3791 }, { "epoch": 0.11621919823464509, "grad_norm": 1.4274365991424756, "learning_rate": 9.806341919190102e-06, "loss": 0.7045, "step": 3792 }, { "epoch": 0.1162498467573863, "grad_norm": 1.4243624131174395, "learning_rate": 9.806205103351031e-06, "loss": 0.7745, "step": 3793 }, { "epoch": 0.1162804952801275, "grad_norm": 1.80655445029848, "learning_rate": 9.806068240155193e-06, "loss": 0.7184, "step": 3794 }, { "epoch": 0.11631114380286871, "grad_norm": 0.7893563076765883, "learning_rate": 9.805931329603932e-06, "loss": 0.4878, "step": 3795 }, { "epoch": 0.1163417923256099, "grad_norm": 1.5795346270819637, "learning_rate": 9.805794371698603e-06, "loss": 0.7925, "step": 3796 }, { "epoch": 0.1163724408483511, "grad_norm": 1.382537907754453, "learning_rate": 9.80565736644055e-06, "loss": 0.8487, "step": 3797 }, { "epoch": 0.11640308937109231, "grad_norm": 1.2486612126969854, "learning_rate": 9.805520313831127e-06, "loss": 0.7283, "step": 3798 }, { "epoch": 0.11643373789383352, "grad_norm": 1.828363621323074, "learning_rate": 9.805383213871683e-06, "loss": 0.8263, "step": 3799 }, { "epoch": 0.11646438641657472, "grad_norm": 0.5125010828796354, "learning_rate": 9.805246066563569e-06, "loss": 0.4579, "step": 3800 }, { "epoch": 0.11649503493931593, "grad_norm": 1.3415084798093277, "learning_rate": 9.805108871908134e-06, "loss": 0.7374, "step": 3801 }, { "epoch": 0.11652568346205713, "grad_norm": 1.43613573608132, "learning_rate": 9.804971629906733e-06, "loss": 0.7078, "step": 3802 }, { "epoch": 0.11655633198479834, "grad_norm": 1.4401513386665574, "learning_rate": 9.804834340560717e-06, "loss": 0.7726, "step": 3803 }, { "epoch": 0.11658698050753953, "grad_norm": 0.532950786907006, "learning_rate": 9.80469700387144e-06, "loss": 0.4736, "step": 3804 }, { "epoch": 0.11661762903028074, "grad_norm": 1.4124642040376605, "learning_rate": 9.804559619840253e-06, "loss": 0.7486, "step": 3805 }, { "epoch": 0.11664827755302194, "grad_norm": 2.4488691037479, "learning_rate": 9.80442218846851e-06, "loss": 0.6924, "step": 3806 }, { "epoch": 0.11667892607576315, "grad_norm": 1.4173516744039907, "learning_rate": 9.804284709757567e-06, "loss": 0.7032, "step": 3807 }, { "epoch": 0.11670957459850435, "grad_norm": 0.5222614988724439, "learning_rate": 9.804147183708776e-06, "loss": 0.459, "step": 3808 }, { "epoch": 0.11674022312124556, "grad_norm": 1.4847779942096708, "learning_rate": 9.804009610323496e-06, "loss": 0.7045, "step": 3809 }, { "epoch": 0.11677087164398676, "grad_norm": 1.5499412824930008, "learning_rate": 9.803871989603078e-06, "loss": 0.7702, "step": 3810 }, { "epoch": 0.11680152016672796, "grad_norm": 1.4126993203537932, "learning_rate": 9.803734321548883e-06, "loss": 0.7582, "step": 3811 }, { "epoch": 0.11683216868946916, "grad_norm": 0.48934577732888207, "learning_rate": 9.803596606162262e-06, "loss": 0.4722, "step": 3812 }, { "epoch": 0.11686281721221037, "grad_norm": 1.3183734546869166, "learning_rate": 9.803458843444576e-06, "loss": 0.7481, "step": 3813 }, { "epoch": 0.11689346573495157, "grad_norm": 1.4207105474318271, "learning_rate": 9.80332103339718e-06, "loss": 0.7, "step": 3814 }, { "epoch": 0.11692411425769278, "grad_norm": 1.3555594101446045, "learning_rate": 9.803183176021434e-06, "loss": 0.6259, "step": 3815 }, { "epoch": 0.11695476278043399, "grad_norm": 1.4768386466238863, "learning_rate": 9.803045271318694e-06, "loss": 0.7079, "step": 3816 }, { "epoch": 0.11698541130317519, "grad_norm": 1.4150410591553075, "learning_rate": 9.80290731929032e-06, "loss": 0.7721, "step": 3817 }, { "epoch": 0.1170160598259164, "grad_norm": 1.4022852079367742, "learning_rate": 9.802769319937672e-06, "loss": 0.7954, "step": 3818 }, { "epoch": 0.11704670834865759, "grad_norm": 1.4759168776739868, "learning_rate": 9.802631273262109e-06, "loss": 0.6744, "step": 3819 }, { "epoch": 0.1170773568713988, "grad_norm": 1.4840496349054484, "learning_rate": 9.802493179264991e-06, "loss": 0.738, "step": 3820 }, { "epoch": 0.11710800539414, "grad_norm": 1.552300740580689, "learning_rate": 9.802355037947679e-06, "loss": 0.7685, "step": 3821 }, { "epoch": 0.1171386539168812, "grad_norm": 1.6541607874234936, "learning_rate": 9.802216849311535e-06, "loss": 0.6863, "step": 3822 }, { "epoch": 0.11716930243962241, "grad_norm": 1.3769600000054651, "learning_rate": 9.802078613357916e-06, "loss": 0.7066, "step": 3823 }, { "epoch": 0.11719995096236362, "grad_norm": 0.5369757591745895, "learning_rate": 9.801940330088192e-06, "loss": 0.459, "step": 3824 }, { "epoch": 0.11723059948510482, "grad_norm": 1.3664240180360192, "learning_rate": 9.801801999503719e-06, "loss": 0.7527, "step": 3825 }, { "epoch": 0.11726124800784603, "grad_norm": 1.2200434885943996, "learning_rate": 9.801663621605864e-06, "loss": 0.6851, "step": 3826 }, { "epoch": 0.11729189653058722, "grad_norm": 1.2959842349661612, "learning_rate": 9.801525196395987e-06, "loss": 0.7901, "step": 3827 }, { "epoch": 0.11732254505332843, "grad_norm": 1.6462874253236839, "learning_rate": 9.801386723875455e-06, "loss": 0.8775, "step": 3828 }, { "epoch": 0.11735319357606963, "grad_norm": 1.5818367656589087, "learning_rate": 9.80124820404563e-06, "loss": 0.7669, "step": 3829 }, { "epoch": 0.11738384209881084, "grad_norm": 0.510714235556664, "learning_rate": 9.801109636907881e-06, "loss": 0.4851, "step": 3830 }, { "epoch": 0.11741449062155204, "grad_norm": 1.5769762943420003, "learning_rate": 9.800971022463568e-06, "loss": 0.7279, "step": 3831 }, { "epoch": 0.11744513914429325, "grad_norm": 1.3838172485580214, "learning_rate": 9.800832360714058e-06, "loss": 0.6717, "step": 3832 }, { "epoch": 0.11747578766703445, "grad_norm": 0.4903524104086108, "learning_rate": 9.80069365166072e-06, "loss": 0.4734, "step": 3833 }, { "epoch": 0.11750643618977566, "grad_norm": 1.431647666527293, "learning_rate": 9.80055489530492e-06, "loss": 0.7992, "step": 3834 }, { "epoch": 0.11753708471251685, "grad_norm": 1.4266770996021807, "learning_rate": 9.800416091648022e-06, "loss": 0.7404, "step": 3835 }, { "epoch": 0.11756773323525806, "grad_norm": 0.47440178675647626, "learning_rate": 9.800277240691399e-06, "loss": 0.4919, "step": 3836 }, { "epoch": 0.11759838175799926, "grad_norm": 0.4589150203963605, "learning_rate": 9.800138342436413e-06, "loss": 0.4794, "step": 3837 }, { "epoch": 0.11762903028074047, "grad_norm": 0.46932068462804893, "learning_rate": 9.799999396884436e-06, "loss": 0.473, "step": 3838 }, { "epoch": 0.11765967880348167, "grad_norm": 1.5121850816106088, "learning_rate": 9.799860404036838e-06, "loss": 0.7648, "step": 3839 }, { "epoch": 0.11769032732622288, "grad_norm": 0.46864796511058826, "learning_rate": 9.799721363894988e-06, "loss": 0.4784, "step": 3840 }, { "epoch": 0.11772097584896409, "grad_norm": 1.2086259713365566, "learning_rate": 9.799582276460254e-06, "loss": 0.6806, "step": 3841 }, { "epoch": 0.11775162437170529, "grad_norm": 1.4929355592789515, "learning_rate": 9.799443141734009e-06, "loss": 0.8095, "step": 3842 }, { "epoch": 0.11778227289444648, "grad_norm": 1.5878202695037935, "learning_rate": 9.79930395971762e-06, "loss": 0.7433, "step": 3843 }, { "epoch": 0.11781292141718769, "grad_norm": 1.3625762795492238, "learning_rate": 9.799164730412464e-06, "loss": 0.73, "step": 3844 }, { "epoch": 0.1178435699399289, "grad_norm": 1.3740723282525036, "learning_rate": 9.79902545381991e-06, "loss": 0.8018, "step": 3845 }, { "epoch": 0.1178742184626701, "grad_norm": 1.3793344288966596, "learning_rate": 9.79888612994133e-06, "loss": 0.6512, "step": 3846 }, { "epoch": 0.1179048669854113, "grad_norm": 1.3429737346084794, "learning_rate": 9.798746758778097e-06, "loss": 0.8497, "step": 3847 }, { "epoch": 0.11793551550815251, "grad_norm": 1.5854149854191824, "learning_rate": 9.798607340331583e-06, "loss": 0.74, "step": 3848 }, { "epoch": 0.11796616403089372, "grad_norm": 0.5749205737988686, "learning_rate": 9.798467874603164e-06, "loss": 0.4804, "step": 3849 }, { "epoch": 0.11799681255363491, "grad_norm": 0.5691550200999955, "learning_rate": 9.798328361594214e-06, "loss": 0.4659, "step": 3850 }, { "epoch": 0.11802746107637611, "grad_norm": 1.4324318831671696, "learning_rate": 9.798188801306105e-06, "loss": 0.7051, "step": 3851 }, { "epoch": 0.11805810959911732, "grad_norm": 1.378885345718533, "learning_rate": 9.798049193740215e-06, "loss": 0.7191, "step": 3852 }, { "epoch": 0.11808875812185853, "grad_norm": 1.3602738151426288, "learning_rate": 9.79790953889792e-06, "loss": 0.7909, "step": 3853 }, { "epoch": 0.11811940664459973, "grad_norm": 1.2949229763079033, "learning_rate": 9.797769836780594e-06, "loss": 0.7781, "step": 3854 }, { "epoch": 0.11815005516734094, "grad_norm": 1.3538777017578147, "learning_rate": 9.797630087389614e-06, "loss": 0.6425, "step": 3855 }, { "epoch": 0.11818070369008214, "grad_norm": 1.4709338092013609, "learning_rate": 9.797490290726356e-06, "loss": 0.6809, "step": 3856 }, { "epoch": 0.11821135221282335, "grad_norm": 1.5782008193669845, "learning_rate": 9.797350446792202e-06, "loss": 0.8658, "step": 3857 }, { "epoch": 0.11824200073556454, "grad_norm": 1.365675926330116, "learning_rate": 9.797210555588523e-06, "loss": 0.7649, "step": 3858 }, { "epoch": 0.11827264925830575, "grad_norm": 1.3306174933936703, "learning_rate": 9.797070617116704e-06, "loss": 0.718, "step": 3859 }, { "epoch": 0.11830329778104695, "grad_norm": 1.3409095726245273, "learning_rate": 9.796930631378118e-06, "loss": 0.7844, "step": 3860 }, { "epoch": 0.11833394630378816, "grad_norm": 1.5355299181179876, "learning_rate": 9.796790598374149e-06, "loss": 0.8597, "step": 3861 }, { "epoch": 0.11836459482652936, "grad_norm": 1.33695092735441, "learning_rate": 9.796650518106175e-06, "loss": 0.7832, "step": 3862 }, { "epoch": 0.11839524334927057, "grad_norm": 1.3184415738427286, "learning_rate": 9.796510390575575e-06, "loss": 0.8386, "step": 3863 }, { "epoch": 0.11842589187201177, "grad_norm": 1.4953869944646654, "learning_rate": 9.796370215783732e-06, "loss": 0.7756, "step": 3864 }, { "epoch": 0.11845654039475298, "grad_norm": 1.3840914708161587, "learning_rate": 9.796229993732026e-06, "loss": 0.7539, "step": 3865 }, { "epoch": 0.11848718891749417, "grad_norm": 1.5835100111417, "learning_rate": 9.796089724421837e-06, "loss": 0.8014, "step": 3866 }, { "epoch": 0.11851783744023538, "grad_norm": 1.425603644331093, "learning_rate": 9.795949407854551e-06, "loss": 0.7238, "step": 3867 }, { "epoch": 0.11854848596297658, "grad_norm": 1.5106379839784037, "learning_rate": 9.795809044031546e-06, "loss": 0.7999, "step": 3868 }, { "epoch": 0.11857913448571779, "grad_norm": 1.2911740245768135, "learning_rate": 9.795668632954209e-06, "loss": 0.6594, "step": 3869 }, { "epoch": 0.118609783008459, "grad_norm": 1.4182064533020142, "learning_rate": 9.795528174623922e-06, "loss": 0.6651, "step": 3870 }, { "epoch": 0.1186404315312002, "grad_norm": 1.8284446960841783, "learning_rate": 9.795387669042069e-06, "loss": 0.7866, "step": 3871 }, { "epoch": 0.1186710800539414, "grad_norm": 1.6025660672547484, "learning_rate": 9.795247116210035e-06, "loss": 0.7002, "step": 3872 }, { "epoch": 0.11870172857668261, "grad_norm": 1.3556089365601596, "learning_rate": 9.795106516129203e-06, "loss": 0.7993, "step": 3873 }, { "epoch": 0.1187323770994238, "grad_norm": 1.356681978218108, "learning_rate": 9.794965868800958e-06, "loss": 0.7099, "step": 3874 }, { "epoch": 0.11876302562216501, "grad_norm": 1.3972654018425645, "learning_rate": 9.79482517422669e-06, "loss": 0.7633, "step": 3875 }, { "epoch": 0.11879367414490621, "grad_norm": 1.6275588322780665, "learning_rate": 9.794684432407781e-06, "loss": 0.7561, "step": 3876 }, { "epoch": 0.11882432266764742, "grad_norm": 1.322311289979466, "learning_rate": 9.794543643345622e-06, "loss": 0.8077, "step": 3877 }, { "epoch": 0.11885497119038863, "grad_norm": 1.3182796960649512, "learning_rate": 9.794402807041596e-06, "loss": 0.7824, "step": 3878 }, { "epoch": 0.11888561971312983, "grad_norm": 1.477196331250637, "learning_rate": 9.794261923497092e-06, "loss": 0.7654, "step": 3879 }, { "epoch": 0.11891626823587104, "grad_norm": 1.7298192461910022, "learning_rate": 9.7941209927135e-06, "loss": 0.7955, "step": 3880 }, { "epoch": 0.11894691675861223, "grad_norm": 1.7518769354787518, "learning_rate": 9.793980014692207e-06, "loss": 0.6431, "step": 3881 }, { "epoch": 0.11897756528135343, "grad_norm": 0.6789137543959803, "learning_rate": 9.793838989434602e-06, "loss": 0.4663, "step": 3882 }, { "epoch": 0.11900821380409464, "grad_norm": 1.4311984523797354, "learning_rate": 9.793697916942074e-06, "loss": 0.7667, "step": 3883 }, { "epoch": 0.11903886232683585, "grad_norm": 1.3411623551553378, "learning_rate": 9.793556797216016e-06, "loss": 0.7042, "step": 3884 }, { "epoch": 0.11906951084957705, "grad_norm": 1.2502687388965652, "learning_rate": 9.793415630257815e-06, "loss": 0.7106, "step": 3885 }, { "epoch": 0.11910015937231826, "grad_norm": 1.3679055248782026, "learning_rate": 9.793274416068862e-06, "loss": 0.7216, "step": 3886 }, { "epoch": 0.11913080789505946, "grad_norm": 1.2685550176232356, "learning_rate": 9.793133154650552e-06, "loss": 0.6696, "step": 3887 }, { "epoch": 0.11916145641780067, "grad_norm": 1.414254699204618, "learning_rate": 9.792991846004274e-06, "loss": 0.7305, "step": 3888 }, { "epoch": 0.11919210494054186, "grad_norm": 1.484459460485501, "learning_rate": 9.792850490131421e-06, "loss": 0.6859, "step": 3889 }, { "epoch": 0.11922275346328307, "grad_norm": 0.5987704409691094, "learning_rate": 9.792709087033386e-06, "loss": 0.4764, "step": 3890 }, { "epoch": 0.11925340198602427, "grad_norm": 1.4831219596176095, "learning_rate": 9.792567636711561e-06, "loss": 0.6891, "step": 3891 }, { "epoch": 0.11928405050876548, "grad_norm": 1.4081257664738458, "learning_rate": 9.792426139167341e-06, "loss": 0.861, "step": 3892 }, { "epoch": 0.11931469903150668, "grad_norm": 0.5494252913877594, "learning_rate": 9.792284594402122e-06, "loss": 0.4776, "step": 3893 }, { "epoch": 0.11934534755424789, "grad_norm": 1.2838241028193373, "learning_rate": 9.792143002417295e-06, "loss": 0.6888, "step": 3894 }, { "epoch": 0.1193759960769891, "grad_norm": 1.4870592987589777, "learning_rate": 9.792001363214257e-06, "loss": 0.7352, "step": 3895 }, { "epoch": 0.1194066445997303, "grad_norm": 0.47815571849265, "learning_rate": 9.791859676794403e-06, "loss": 0.4718, "step": 3896 }, { "epoch": 0.11943729312247149, "grad_norm": 1.360748655639062, "learning_rate": 9.791717943159131e-06, "loss": 0.7748, "step": 3897 }, { "epoch": 0.1194679416452127, "grad_norm": 1.446835445038957, "learning_rate": 9.791576162309835e-06, "loss": 0.7099, "step": 3898 }, { "epoch": 0.1194985901679539, "grad_norm": 1.4026585879203943, "learning_rate": 9.791434334247914e-06, "loss": 0.7628, "step": 3899 }, { "epoch": 0.11952923869069511, "grad_norm": 0.49778914947760716, "learning_rate": 9.791292458974764e-06, "loss": 0.4798, "step": 3900 }, { "epoch": 0.11955988721343631, "grad_norm": 1.2521967778522147, "learning_rate": 9.791150536491784e-06, "loss": 0.6425, "step": 3901 }, { "epoch": 0.11959053573617752, "grad_norm": 1.3942948120221574, "learning_rate": 9.79100856680037e-06, "loss": 0.7518, "step": 3902 }, { "epoch": 0.11962118425891873, "grad_norm": 1.4320440013671372, "learning_rate": 9.790866549901924e-06, "loss": 0.7349, "step": 3903 }, { "epoch": 0.11965183278165993, "grad_norm": 1.4398790693324335, "learning_rate": 9.790724485797846e-06, "loss": 0.7804, "step": 3904 }, { "epoch": 0.11968248130440112, "grad_norm": 1.4236755340825213, "learning_rate": 9.790582374489532e-06, "loss": 0.7363, "step": 3905 }, { "epoch": 0.11971312982714233, "grad_norm": 1.440181570160929, "learning_rate": 9.790440215978383e-06, "loss": 0.7246, "step": 3906 }, { "epoch": 0.11974377834988353, "grad_norm": 1.431416226187689, "learning_rate": 9.790298010265803e-06, "loss": 0.6619, "step": 3907 }, { "epoch": 0.11977442687262474, "grad_norm": 0.5109911016009427, "learning_rate": 9.79015575735319e-06, "loss": 0.4565, "step": 3908 }, { "epoch": 0.11980507539536595, "grad_norm": 1.3345575225486437, "learning_rate": 9.790013457241948e-06, "loss": 0.763, "step": 3909 }, { "epoch": 0.11983572391810715, "grad_norm": 1.4464951237947463, "learning_rate": 9.789871109933477e-06, "loss": 0.7813, "step": 3910 }, { "epoch": 0.11986637244084836, "grad_norm": 1.4783027987508977, "learning_rate": 9.78972871542918e-06, "loss": 0.7501, "step": 3911 }, { "epoch": 0.11989702096358955, "grad_norm": 1.353544594798109, "learning_rate": 9.78958627373046e-06, "loss": 0.7513, "step": 3912 }, { "epoch": 0.11992766948633075, "grad_norm": 1.5415094101915312, "learning_rate": 9.789443784838722e-06, "loss": 0.7777, "step": 3913 }, { "epoch": 0.11995831800907196, "grad_norm": 0.500265894095057, "learning_rate": 9.789301248755368e-06, "loss": 0.472, "step": 3914 }, { "epoch": 0.11998896653181317, "grad_norm": 2.107931613879974, "learning_rate": 9.789158665481804e-06, "loss": 0.7575, "step": 3915 }, { "epoch": 0.12001961505455437, "grad_norm": 1.3153948143677907, "learning_rate": 9.789016035019435e-06, "loss": 0.695, "step": 3916 }, { "epoch": 0.12005026357729558, "grad_norm": 1.3641726315016152, "learning_rate": 9.788873357369665e-06, "loss": 0.7127, "step": 3917 }, { "epoch": 0.12008091210003678, "grad_norm": 1.7240357826713357, "learning_rate": 9.7887306325339e-06, "loss": 0.7745, "step": 3918 }, { "epoch": 0.12011156062277799, "grad_norm": 1.5595423196152536, "learning_rate": 9.788587860513547e-06, "loss": 0.6358, "step": 3919 }, { "epoch": 0.12014220914551918, "grad_norm": 1.4154184083406964, "learning_rate": 9.788445041310013e-06, "loss": 0.6709, "step": 3920 }, { "epoch": 0.12017285766826039, "grad_norm": 1.415252201748325, "learning_rate": 9.788302174924705e-06, "loss": 0.8231, "step": 3921 }, { "epoch": 0.12020350619100159, "grad_norm": 1.3582373099794731, "learning_rate": 9.788159261359031e-06, "loss": 0.7562, "step": 3922 }, { "epoch": 0.1202341547137428, "grad_norm": 0.5284601720779457, "learning_rate": 9.788016300614397e-06, "loss": 0.4921, "step": 3923 }, { "epoch": 0.120264803236484, "grad_norm": 0.49662780888912994, "learning_rate": 9.787873292692216e-06, "loss": 0.4734, "step": 3924 }, { "epoch": 0.12029545175922521, "grad_norm": 0.45672860750486977, "learning_rate": 9.78773023759389e-06, "loss": 0.4467, "step": 3925 }, { "epoch": 0.12032610028196641, "grad_norm": 1.4083147831777099, "learning_rate": 9.787587135320837e-06, "loss": 0.749, "step": 3926 }, { "epoch": 0.12035674880470762, "grad_norm": 1.4555190209727427, "learning_rate": 9.787443985874463e-06, "loss": 0.7474, "step": 3927 }, { "epoch": 0.12038739732744881, "grad_norm": 1.4442142062945065, "learning_rate": 9.787300789256176e-06, "loss": 0.7953, "step": 3928 }, { "epoch": 0.12041804585019002, "grad_norm": 1.451059610403337, "learning_rate": 9.787157545467392e-06, "loss": 0.7541, "step": 3929 }, { "epoch": 0.12044869437293122, "grad_norm": 1.360631345223156, "learning_rate": 9.787014254509517e-06, "loss": 0.7691, "step": 3930 }, { "epoch": 0.12047934289567243, "grad_norm": 1.3873524794722, "learning_rate": 9.786870916383969e-06, "loss": 0.7212, "step": 3931 }, { "epoch": 0.12050999141841363, "grad_norm": 1.1945506579311826, "learning_rate": 9.786727531092154e-06, "loss": 0.6695, "step": 3932 }, { "epoch": 0.12054063994115484, "grad_norm": 1.3267560010611994, "learning_rate": 9.78658409863549e-06, "loss": 0.7227, "step": 3933 }, { "epoch": 0.12057128846389605, "grad_norm": 1.375501342343645, "learning_rate": 9.786440619015387e-06, "loss": 0.761, "step": 3934 }, { "epoch": 0.12060193698663725, "grad_norm": 1.3143629476149268, "learning_rate": 9.78629709223326e-06, "loss": 0.7234, "step": 3935 }, { "epoch": 0.12063258550937844, "grad_norm": 0.5920112716725618, "learning_rate": 9.786153518290524e-06, "loss": 0.4815, "step": 3936 }, { "epoch": 0.12066323403211965, "grad_norm": 1.3604331642393108, "learning_rate": 9.78600989718859e-06, "loss": 0.6679, "step": 3937 }, { "epoch": 0.12069388255486085, "grad_norm": 1.6889962843503314, "learning_rate": 9.78586622892888e-06, "loss": 0.8068, "step": 3938 }, { "epoch": 0.12072453107760206, "grad_norm": 1.5903756256919694, "learning_rate": 9.785722513512803e-06, "loss": 0.7731, "step": 3939 }, { "epoch": 0.12075517960034327, "grad_norm": 1.4740026698203526, "learning_rate": 9.785578750941779e-06, "loss": 0.7912, "step": 3940 }, { "epoch": 0.12078582812308447, "grad_norm": 1.4224327408792745, "learning_rate": 9.785434941217222e-06, "loss": 0.7215, "step": 3941 }, { "epoch": 0.12081647664582568, "grad_norm": 1.6503206916512037, "learning_rate": 9.785291084340551e-06, "loss": 0.8144, "step": 3942 }, { "epoch": 0.12084712516856687, "grad_norm": 1.5784450051432146, "learning_rate": 9.785147180313181e-06, "loss": 0.7373, "step": 3943 }, { "epoch": 0.12087777369130807, "grad_norm": 0.5202228859252501, "learning_rate": 9.785003229136534e-06, "loss": 0.4677, "step": 3944 }, { "epoch": 0.12090842221404928, "grad_norm": 1.627364178311063, "learning_rate": 9.784859230812024e-06, "loss": 0.7567, "step": 3945 }, { "epoch": 0.12093907073679049, "grad_norm": 1.3711074484172634, "learning_rate": 9.784715185341072e-06, "loss": 0.6933, "step": 3946 }, { "epoch": 0.12096971925953169, "grad_norm": 1.1882851361273818, "learning_rate": 9.784571092725097e-06, "loss": 0.7289, "step": 3947 }, { "epoch": 0.1210003677822729, "grad_norm": 1.4975002623202647, "learning_rate": 9.78442695296552e-06, "loss": 0.7552, "step": 3948 }, { "epoch": 0.1210310163050141, "grad_norm": 0.483368106180356, "learning_rate": 9.784282766063758e-06, "loss": 0.4338, "step": 3949 }, { "epoch": 0.12106166482775531, "grad_norm": 1.4609650579621571, "learning_rate": 9.784138532021236e-06, "loss": 0.7744, "step": 3950 }, { "epoch": 0.1210923133504965, "grad_norm": 1.2737317471269565, "learning_rate": 9.783994250839371e-06, "loss": 0.6556, "step": 3951 }, { "epoch": 0.1211229618732377, "grad_norm": 0.49410704432797964, "learning_rate": 9.783849922519589e-06, "loss": 0.4864, "step": 3952 }, { "epoch": 0.12115361039597891, "grad_norm": 1.4165693035400155, "learning_rate": 9.783705547063306e-06, "loss": 0.796, "step": 3953 }, { "epoch": 0.12118425891872012, "grad_norm": 1.469481592469198, "learning_rate": 9.783561124471951e-06, "loss": 0.7471, "step": 3954 }, { "epoch": 0.12121490744146132, "grad_norm": 1.3373575348129105, "learning_rate": 9.783416654746945e-06, "loss": 0.6774, "step": 3955 }, { "epoch": 0.12124555596420253, "grad_norm": 1.482985556062466, "learning_rate": 9.78327213788971e-06, "loss": 0.7592, "step": 3956 }, { "epoch": 0.12127620448694373, "grad_norm": 1.3460222709585588, "learning_rate": 9.78312757390167e-06, "loss": 0.7495, "step": 3957 }, { "epoch": 0.12130685300968494, "grad_norm": 1.3545318523533565, "learning_rate": 9.782982962784252e-06, "loss": 0.7081, "step": 3958 }, { "epoch": 0.12133750153242613, "grad_norm": 1.561950503006349, "learning_rate": 9.782838304538878e-06, "loss": 0.7958, "step": 3959 }, { "epoch": 0.12136815005516734, "grad_norm": 0.5244972965789882, "learning_rate": 9.782693599166973e-06, "loss": 0.4884, "step": 3960 }, { "epoch": 0.12139879857790854, "grad_norm": 1.6144507979535623, "learning_rate": 9.782548846669966e-06, "loss": 0.7549, "step": 3961 }, { "epoch": 0.12142944710064975, "grad_norm": 1.4088465331491062, "learning_rate": 9.782404047049281e-06, "loss": 0.7451, "step": 3962 }, { "epoch": 0.12146009562339095, "grad_norm": 1.4935835336520829, "learning_rate": 9.782259200306345e-06, "loss": 0.7002, "step": 3963 }, { "epoch": 0.12149074414613216, "grad_norm": 1.5821892975366365, "learning_rate": 9.782114306442586e-06, "loss": 0.7144, "step": 3964 }, { "epoch": 0.12152139266887337, "grad_norm": 1.3223824714840977, "learning_rate": 9.78196936545943e-06, "loss": 0.6339, "step": 3965 }, { "epoch": 0.12155204119161457, "grad_norm": 1.338211530466088, "learning_rate": 9.781824377358308e-06, "loss": 0.7315, "step": 3966 }, { "epoch": 0.12158268971435576, "grad_norm": 1.4850182516132546, "learning_rate": 9.781679342140647e-06, "loss": 0.7275, "step": 3967 }, { "epoch": 0.12161333823709697, "grad_norm": 1.4189299706301466, "learning_rate": 9.781534259807874e-06, "loss": 0.7466, "step": 3968 }, { "epoch": 0.12164398675983817, "grad_norm": 1.3205960032913124, "learning_rate": 9.781389130361422e-06, "loss": 0.7005, "step": 3969 }, { "epoch": 0.12167463528257938, "grad_norm": 1.4029660551588619, "learning_rate": 9.781243953802719e-06, "loss": 0.8241, "step": 3970 }, { "epoch": 0.12170528380532059, "grad_norm": 0.5139826967572162, "learning_rate": 9.781098730133196e-06, "loss": 0.4808, "step": 3971 }, { "epoch": 0.12173593232806179, "grad_norm": 0.5261380651534135, "learning_rate": 9.780953459354285e-06, "loss": 0.5006, "step": 3972 }, { "epoch": 0.121766580850803, "grad_norm": 1.356568888376587, "learning_rate": 9.780808141467414e-06, "loss": 0.6653, "step": 3973 }, { "epoch": 0.12179722937354419, "grad_norm": 1.3129942960434466, "learning_rate": 9.78066277647402e-06, "loss": 0.7059, "step": 3974 }, { "epoch": 0.1218278778962854, "grad_norm": 1.468443833642375, "learning_rate": 9.780517364375531e-06, "loss": 0.8688, "step": 3975 }, { "epoch": 0.1218585264190266, "grad_norm": 0.4965382960294472, "learning_rate": 9.780371905173381e-06, "loss": 0.4938, "step": 3976 }, { "epoch": 0.1218891749417678, "grad_norm": 1.42276727144642, "learning_rate": 9.780226398869004e-06, "loss": 0.7586, "step": 3977 }, { "epoch": 0.12191982346450901, "grad_norm": 1.3952254874811614, "learning_rate": 9.780080845463832e-06, "loss": 0.8177, "step": 3978 }, { "epoch": 0.12195047198725022, "grad_norm": 1.520108221998069, "learning_rate": 9.779935244959303e-06, "loss": 0.782, "step": 3979 }, { "epoch": 0.12198112050999142, "grad_norm": 1.307734520853954, "learning_rate": 9.779789597356848e-06, "loss": 0.7301, "step": 3980 }, { "epoch": 0.12201176903273263, "grad_norm": 1.406572319975402, "learning_rate": 9.779643902657902e-06, "loss": 0.8405, "step": 3981 }, { "epoch": 0.12204241755547382, "grad_norm": 1.270693611784277, "learning_rate": 9.779498160863903e-06, "loss": 0.739, "step": 3982 }, { "epoch": 0.12207306607821503, "grad_norm": 1.2956339368570247, "learning_rate": 9.779352371976284e-06, "loss": 0.655, "step": 3983 }, { "epoch": 0.12210371460095623, "grad_norm": 0.5297544018481323, "learning_rate": 9.779206535996487e-06, "loss": 0.4873, "step": 3984 }, { "epoch": 0.12213436312369744, "grad_norm": 1.3590649697378476, "learning_rate": 9.77906065292594e-06, "loss": 0.6351, "step": 3985 }, { "epoch": 0.12216501164643864, "grad_norm": 1.5255261010439165, "learning_rate": 9.778914722766089e-06, "loss": 0.7074, "step": 3986 }, { "epoch": 0.12219566016917985, "grad_norm": 1.4064080886309, "learning_rate": 9.778768745518367e-06, "loss": 0.8491, "step": 3987 }, { "epoch": 0.12222630869192105, "grad_norm": 1.4693654670527447, "learning_rate": 9.778622721184216e-06, "loss": 0.7696, "step": 3988 }, { "epoch": 0.12225695721466226, "grad_norm": 1.4100859441573212, "learning_rate": 9.778476649765071e-06, "loss": 0.7934, "step": 3989 }, { "epoch": 0.12228760573740345, "grad_norm": 1.3449757587041151, "learning_rate": 9.778330531262373e-06, "loss": 0.7802, "step": 3990 }, { "epoch": 0.12231825426014466, "grad_norm": 1.2654157868477551, "learning_rate": 9.778184365677561e-06, "loss": 0.6152, "step": 3991 }, { "epoch": 0.12234890278288586, "grad_norm": 0.49647782733895784, "learning_rate": 9.778038153012078e-06, "loss": 0.4798, "step": 3992 }, { "epoch": 0.12237955130562707, "grad_norm": 1.4315946511045976, "learning_rate": 9.77789189326736e-06, "loss": 0.7184, "step": 3993 }, { "epoch": 0.12241019982836827, "grad_norm": 1.282153236099706, "learning_rate": 9.777745586444853e-06, "loss": 0.7372, "step": 3994 }, { "epoch": 0.12244084835110948, "grad_norm": 1.6107690480899377, "learning_rate": 9.777599232545994e-06, "loss": 0.8314, "step": 3995 }, { "epoch": 0.12247149687385069, "grad_norm": 1.362971274898408, "learning_rate": 9.777452831572229e-06, "loss": 0.6837, "step": 3996 }, { "epoch": 0.12250214539659189, "grad_norm": 1.3048418552538945, "learning_rate": 9.777306383524999e-06, "loss": 0.7732, "step": 3997 }, { "epoch": 0.12253279391933308, "grad_norm": 1.4022943218217707, "learning_rate": 9.777159888405746e-06, "loss": 0.7105, "step": 3998 }, { "epoch": 0.12256344244207429, "grad_norm": 1.4059131308459318, "learning_rate": 9.777013346215915e-06, "loss": 0.7088, "step": 3999 }, { "epoch": 0.1225940909648155, "grad_norm": 1.3168628587709401, "learning_rate": 9.776866756956948e-06, "loss": 0.709, "step": 4000 }, { "epoch": 0.1226247394875567, "grad_norm": 1.2827623785550821, "learning_rate": 9.776720120630293e-06, "loss": 0.6757, "step": 4001 }, { "epoch": 0.1226553880102979, "grad_norm": 1.4713194917991184, "learning_rate": 9.776573437237391e-06, "loss": 0.6944, "step": 4002 }, { "epoch": 0.12268603653303911, "grad_norm": 1.5243862090337745, "learning_rate": 9.776426706779688e-06, "loss": 0.7963, "step": 4003 }, { "epoch": 0.12271668505578032, "grad_norm": 1.527544886587404, "learning_rate": 9.776279929258632e-06, "loss": 0.7057, "step": 4004 }, { "epoch": 0.12274733357852151, "grad_norm": 1.605614315925203, "learning_rate": 9.776133104675667e-06, "loss": 0.7708, "step": 4005 }, { "epoch": 0.12277798210126271, "grad_norm": 1.6472635328038454, "learning_rate": 9.775986233032241e-06, "loss": 0.7208, "step": 4006 }, { "epoch": 0.12280863062400392, "grad_norm": 1.3177270249362762, "learning_rate": 9.7758393143298e-06, "loss": 0.7399, "step": 4007 }, { "epoch": 0.12283927914674513, "grad_norm": 0.508383973315156, "learning_rate": 9.775692348569792e-06, "loss": 0.4518, "step": 4008 }, { "epoch": 0.12286992766948633, "grad_norm": 1.2960354756888137, "learning_rate": 9.775545335753667e-06, "loss": 0.7876, "step": 4009 }, { "epoch": 0.12290057619222754, "grad_norm": 1.2045815793759713, "learning_rate": 9.77539827588287e-06, "loss": 0.6943, "step": 4010 }, { "epoch": 0.12293122471496874, "grad_norm": 1.40318175356356, "learning_rate": 9.775251168958853e-06, "loss": 0.6748, "step": 4011 }, { "epoch": 0.12296187323770995, "grad_norm": 1.3552771429769939, "learning_rate": 9.775104014983066e-06, "loss": 0.6826, "step": 4012 }, { "epoch": 0.12299252176045114, "grad_norm": 1.3810990024506613, "learning_rate": 9.774956813956954e-06, "loss": 0.7221, "step": 4013 }, { "epoch": 0.12302317028319235, "grad_norm": 0.5348313101613975, "learning_rate": 9.774809565881973e-06, "loss": 0.4904, "step": 4014 }, { "epoch": 0.12305381880593355, "grad_norm": 1.272646856785744, "learning_rate": 9.774662270759571e-06, "loss": 0.7399, "step": 4015 }, { "epoch": 0.12308446732867476, "grad_norm": 1.7865459265931545, "learning_rate": 9.7745149285912e-06, "loss": 0.7833, "step": 4016 }, { "epoch": 0.12311511585141596, "grad_norm": 1.4031814006379904, "learning_rate": 9.774367539378313e-06, "loss": 0.6803, "step": 4017 }, { "epoch": 0.12314576437415717, "grad_norm": 1.5067750524577317, "learning_rate": 9.77422010312236e-06, "loss": 0.7685, "step": 4018 }, { "epoch": 0.12317641289689837, "grad_norm": 1.354121335065784, "learning_rate": 9.774072619824794e-06, "loss": 0.726, "step": 4019 }, { "epoch": 0.12320706141963958, "grad_norm": 1.4448858060397132, "learning_rate": 9.773925089487069e-06, "loss": 0.6621, "step": 4020 }, { "epoch": 0.12323770994238077, "grad_norm": 1.498434040127427, "learning_rate": 9.773777512110641e-06, "loss": 0.698, "step": 4021 }, { "epoch": 0.12326835846512198, "grad_norm": 1.5819851068267623, "learning_rate": 9.77362988769696e-06, "loss": 0.6853, "step": 4022 }, { "epoch": 0.12329900698786318, "grad_norm": 1.280745448710758, "learning_rate": 9.773482216247482e-06, "loss": 0.7652, "step": 4023 }, { "epoch": 0.12332965551060439, "grad_norm": 1.3978788155242496, "learning_rate": 9.773334497763663e-06, "loss": 0.8032, "step": 4024 }, { "epoch": 0.1233603040333456, "grad_norm": 0.5227884846271589, "learning_rate": 9.773186732246957e-06, "loss": 0.4811, "step": 4025 }, { "epoch": 0.1233909525560868, "grad_norm": 1.4869107767862122, "learning_rate": 9.773038919698821e-06, "loss": 0.6834, "step": 4026 }, { "epoch": 0.123421601078828, "grad_norm": 2.3885481298022753, "learning_rate": 9.772891060120713e-06, "loss": 0.6476, "step": 4027 }, { "epoch": 0.12345224960156921, "grad_norm": 1.4118575375844002, "learning_rate": 9.772743153514088e-06, "loss": 0.707, "step": 4028 }, { "epoch": 0.1234828981243104, "grad_norm": 0.4520375636926507, "learning_rate": 9.772595199880402e-06, "loss": 0.4226, "step": 4029 }, { "epoch": 0.12351354664705161, "grad_norm": 1.382944328286959, "learning_rate": 9.772447199221114e-06, "loss": 0.6997, "step": 4030 }, { "epoch": 0.12354419516979281, "grad_norm": 1.3473615286318548, "learning_rate": 9.772299151537684e-06, "loss": 0.7117, "step": 4031 }, { "epoch": 0.12357484369253402, "grad_norm": 1.4825376322069403, "learning_rate": 9.77215105683157e-06, "loss": 0.7335, "step": 4032 }, { "epoch": 0.12360549221527523, "grad_norm": 0.47525114947138053, "learning_rate": 9.772002915104228e-06, "loss": 0.4488, "step": 4033 }, { "epoch": 0.12363614073801643, "grad_norm": 1.4435521300390712, "learning_rate": 9.771854726357123e-06, "loss": 0.726, "step": 4034 }, { "epoch": 0.12366678926075764, "grad_norm": 1.2639764882630011, "learning_rate": 9.771706490591711e-06, "loss": 0.8208, "step": 4035 }, { "epoch": 0.12369743778349883, "grad_norm": 1.2748695236404228, "learning_rate": 9.771558207809455e-06, "loss": 0.6968, "step": 4036 }, { "epoch": 0.12372808630624003, "grad_norm": 1.4826679449223936, "learning_rate": 9.771409878011814e-06, "loss": 0.7764, "step": 4037 }, { "epoch": 0.12375873482898124, "grad_norm": 1.31690596847317, "learning_rate": 9.771261501200251e-06, "loss": 0.6122, "step": 4038 }, { "epoch": 0.12378938335172245, "grad_norm": 1.3665522189868584, "learning_rate": 9.771113077376229e-06, "loss": 0.7961, "step": 4039 }, { "epoch": 0.12382003187446365, "grad_norm": 1.3497972175688366, "learning_rate": 9.770964606541208e-06, "loss": 0.7391, "step": 4040 }, { "epoch": 0.12385068039720486, "grad_norm": 1.4782989671756288, "learning_rate": 9.770816088696652e-06, "loss": 0.8308, "step": 4041 }, { "epoch": 0.12388132891994606, "grad_norm": 1.2605130860976155, "learning_rate": 9.770667523844024e-06, "loss": 0.7135, "step": 4042 }, { "epoch": 0.12391197744268727, "grad_norm": 1.2821764715561508, "learning_rate": 9.77051891198479e-06, "loss": 0.7719, "step": 4043 }, { "epoch": 0.12394262596542846, "grad_norm": 1.3993444943985833, "learning_rate": 9.770370253120411e-06, "loss": 0.6437, "step": 4044 }, { "epoch": 0.12397327448816967, "grad_norm": 1.4142754290287007, "learning_rate": 9.770221547252354e-06, "loss": 0.7472, "step": 4045 }, { "epoch": 0.12400392301091087, "grad_norm": 1.4978369980343862, "learning_rate": 9.770072794382082e-06, "loss": 0.8081, "step": 4046 }, { "epoch": 0.12403457153365208, "grad_norm": 1.3182983901928187, "learning_rate": 9.769923994511064e-06, "loss": 0.7041, "step": 4047 }, { "epoch": 0.12406522005639328, "grad_norm": 1.2480793830023542, "learning_rate": 9.769775147640762e-06, "loss": 0.7188, "step": 4048 }, { "epoch": 0.12409586857913449, "grad_norm": 1.705797932853928, "learning_rate": 9.769626253772648e-06, "loss": 0.7681, "step": 4049 }, { "epoch": 0.1241265171018757, "grad_norm": 1.5107182863679267, "learning_rate": 9.769477312908186e-06, "loss": 0.7152, "step": 4050 }, { "epoch": 0.1241571656246169, "grad_norm": 1.4524333413820218, "learning_rate": 9.769328325048844e-06, "loss": 0.6745, "step": 4051 }, { "epoch": 0.12418781414735809, "grad_norm": 1.465787586674722, "learning_rate": 9.769179290196089e-06, "loss": 0.7734, "step": 4052 }, { "epoch": 0.1242184626700993, "grad_norm": 1.3170445234920256, "learning_rate": 9.769030208351389e-06, "loss": 0.6889, "step": 4053 }, { "epoch": 0.1242491111928405, "grad_norm": 1.3869872398141974, "learning_rate": 9.768881079516214e-06, "loss": 0.8035, "step": 4054 }, { "epoch": 0.12427975971558171, "grad_norm": 0.5290853631538156, "learning_rate": 9.768731903692035e-06, "loss": 0.4622, "step": 4055 }, { "epoch": 0.12431040823832291, "grad_norm": 1.566128069675057, "learning_rate": 9.76858268088032e-06, "loss": 0.7245, "step": 4056 }, { "epoch": 0.12434105676106412, "grad_norm": 1.5220097442548057, "learning_rate": 9.76843341108254e-06, "loss": 0.8189, "step": 4057 }, { "epoch": 0.12437170528380533, "grad_norm": 1.7191930516626308, "learning_rate": 9.768284094300165e-06, "loss": 0.7536, "step": 4058 }, { "epoch": 0.12440235380654653, "grad_norm": 1.5566392624693248, "learning_rate": 9.768134730534667e-06, "loss": 0.7075, "step": 4059 }, { "epoch": 0.12443300232928772, "grad_norm": 1.58874069183159, "learning_rate": 9.767985319787519e-06, "loss": 0.7221, "step": 4060 }, { "epoch": 0.12446365085202893, "grad_norm": 1.4250232836108758, "learning_rate": 9.767835862060188e-06, "loss": 0.7413, "step": 4061 }, { "epoch": 0.12449429937477013, "grad_norm": 1.2838567000439651, "learning_rate": 9.767686357354154e-06, "loss": 0.7982, "step": 4062 }, { "epoch": 0.12452494789751134, "grad_norm": 1.5441787500598887, "learning_rate": 9.767536805670884e-06, "loss": 0.7872, "step": 4063 }, { "epoch": 0.12455559642025255, "grad_norm": 0.501525315926565, "learning_rate": 9.767387207011856e-06, "loss": 0.476, "step": 4064 }, { "epoch": 0.12458624494299375, "grad_norm": 1.477822195420104, "learning_rate": 9.767237561378541e-06, "loss": 0.7329, "step": 4065 }, { "epoch": 0.12461689346573496, "grad_norm": 1.4272511130422212, "learning_rate": 9.767087868772415e-06, "loss": 0.6829, "step": 4066 }, { "epoch": 0.12464754198847615, "grad_norm": 1.7297116617857922, "learning_rate": 9.766938129194952e-06, "loss": 0.7853, "step": 4067 }, { "epoch": 0.12467819051121735, "grad_norm": 1.1985277675618864, "learning_rate": 9.76678834264763e-06, "loss": 0.8034, "step": 4068 }, { "epoch": 0.12470883903395856, "grad_norm": 0.49715247958587383, "learning_rate": 9.766638509131919e-06, "loss": 0.4741, "step": 4069 }, { "epoch": 0.12473948755669977, "grad_norm": 1.4995787069172157, "learning_rate": 9.766488628649303e-06, "loss": 0.7244, "step": 4070 }, { "epoch": 0.12477013607944097, "grad_norm": 1.4090043079961296, "learning_rate": 9.766338701201252e-06, "loss": 0.7693, "step": 4071 }, { "epoch": 0.12480078460218218, "grad_norm": 1.594099399395593, "learning_rate": 9.766188726789248e-06, "loss": 0.7686, "step": 4072 }, { "epoch": 0.12483143312492338, "grad_norm": 1.3292943436955147, "learning_rate": 9.766038705414766e-06, "loss": 0.7262, "step": 4073 }, { "epoch": 0.12486208164766459, "grad_norm": 1.2973349516152337, "learning_rate": 9.765888637079287e-06, "loss": 0.6716, "step": 4074 }, { "epoch": 0.12489273017040578, "grad_norm": 1.267860462682815, "learning_rate": 9.765738521784285e-06, "loss": 0.7133, "step": 4075 }, { "epoch": 0.12492337869314699, "grad_norm": 1.3314363753155267, "learning_rate": 9.765588359531243e-06, "loss": 0.6848, "step": 4076 }, { "epoch": 0.12495402721588819, "grad_norm": 1.4352987107486421, "learning_rate": 9.76543815032164e-06, "loss": 0.753, "step": 4077 }, { "epoch": 0.1249846757386294, "grad_norm": 1.3685972847133459, "learning_rate": 9.765287894156957e-06, "loss": 0.6554, "step": 4078 }, { "epoch": 0.1250153242613706, "grad_norm": 1.3758970902581384, "learning_rate": 9.765137591038671e-06, "loss": 0.6353, "step": 4079 }, { "epoch": 0.1250459727841118, "grad_norm": 1.44782561310011, "learning_rate": 9.764987240968266e-06, "loss": 0.7185, "step": 4080 }, { "epoch": 0.12507662130685301, "grad_norm": 0.5255647218893393, "learning_rate": 9.764836843947222e-06, "loss": 0.4675, "step": 4081 }, { "epoch": 0.12510726982959422, "grad_norm": 1.381773126054093, "learning_rate": 9.764686399977021e-06, "loss": 0.8145, "step": 4082 }, { "epoch": 0.12513791835233543, "grad_norm": 1.2979763674094174, "learning_rate": 9.764535909059147e-06, "loss": 0.7901, "step": 4083 }, { "epoch": 0.12516856687507663, "grad_norm": 1.393764349297821, "learning_rate": 9.764385371195082e-06, "loss": 0.7632, "step": 4084 }, { "epoch": 0.12519921539781784, "grad_norm": 1.3491621343836007, "learning_rate": 9.764234786386308e-06, "loss": 0.722, "step": 4085 }, { "epoch": 0.12522986392055904, "grad_norm": 1.3150804347617495, "learning_rate": 9.764084154634311e-06, "loss": 0.7242, "step": 4086 }, { "epoch": 0.12526051244330022, "grad_norm": 1.751031679884412, "learning_rate": 9.763933475940571e-06, "loss": 0.7665, "step": 4087 }, { "epoch": 0.12529116096604143, "grad_norm": 1.470349443536469, "learning_rate": 9.763782750306578e-06, "loss": 0.7859, "step": 4088 }, { "epoch": 0.12532180948878263, "grad_norm": 1.3791984979077532, "learning_rate": 9.763631977733815e-06, "loss": 0.7088, "step": 4089 }, { "epoch": 0.12535245801152384, "grad_norm": 1.338131121594351, "learning_rate": 9.763481158223764e-06, "loss": 0.7322, "step": 4090 }, { "epoch": 0.12538310653426504, "grad_norm": 1.5556612034608186, "learning_rate": 9.763330291777918e-06, "loss": 0.6144, "step": 4091 }, { "epoch": 0.12541375505700625, "grad_norm": 1.2722478224007547, "learning_rate": 9.763179378397759e-06, "loss": 0.6767, "step": 4092 }, { "epoch": 0.12544440357974745, "grad_norm": 1.4022619227269475, "learning_rate": 9.763028418084773e-06, "loss": 0.7672, "step": 4093 }, { "epoch": 0.12547505210248866, "grad_norm": 1.3633889450403722, "learning_rate": 9.76287741084045e-06, "loss": 0.7929, "step": 4094 }, { "epoch": 0.12550570062522987, "grad_norm": 1.5300267348283436, "learning_rate": 9.762726356666279e-06, "loss": 0.7898, "step": 4095 }, { "epoch": 0.12553634914797107, "grad_norm": 1.5520142177220184, "learning_rate": 9.762575255563747e-06, "loss": 0.7751, "step": 4096 }, { "epoch": 0.12556699767071228, "grad_norm": 1.4229104127071266, "learning_rate": 9.76242410753434e-06, "loss": 0.7724, "step": 4097 }, { "epoch": 0.12559764619345348, "grad_norm": 1.4292229921657016, "learning_rate": 9.762272912579551e-06, "loss": 0.6558, "step": 4098 }, { "epoch": 0.1256282947161947, "grad_norm": 0.5761782053965505, "learning_rate": 9.762121670700867e-06, "loss": 0.4914, "step": 4099 }, { "epoch": 0.1256589432389359, "grad_norm": 1.4195165418927858, "learning_rate": 9.761970381899782e-06, "loss": 0.7774, "step": 4100 }, { "epoch": 0.1256895917616771, "grad_norm": 1.3407682831767223, "learning_rate": 9.761819046177782e-06, "loss": 0.6877, "step": 4101 }, { "epoch": 0.12572024028441828, "grad_norm": 1.5608283686107007, "learning_rate": 9.761667663536363e-06, "loss": 0.7813, "step": 4102 }, { "epoch": 0.12575088880715948, "grad_norm": 1.473920752585584, "learning_rate": 9.761516233977014e-06, "loss": 0.6806, "step": 4103 }, { "epoch": 0.1257815373299007, "grad_norm": 1.288610714743151, "learning_rate": 9.761364757501227e-06, "loss": 0.7313, "step": 4104 }, { "epoch": 0.1258121858526419, "grad_norm": 1.5056182645689786, "learning_rate": 9.761213234110494e-06, "loss": 0.6522, "step": 4105 }, { "epoch": 0.1258428343753831, "grad_norm": 1.489076907629732, "learning_rate": 9.761061663806308e-06, "loss": 0.6684, "step": 4106 }, { "epoch": 0.1258734828981243, "grad_norm": 1.439045458453329, "learning_rate": 9.760910046590164e-06, "loss": 0.7703, "step": 4107 }, { "epoch": 0.1259041314208655, "grad_norm": 1.306061955742449, "learning_rate": 9.760758382463555e-06, "loss": 0.6878, "step": 4108 }, { "epoch": 0.12593477994360672, "grad_norm": 1.4299578434215285, "learning_rate": 9.760606671427976e-06, "loss": 0.677, "step": 4109 }, { "epoch": 0.12596542846634792, "grad_norm": 1.4866823873388528, "learning_rate": 9.760454913484923e-06, "loss": 0.6571, "step": 4110 }, { "epoch": 0.12599607698908913, "grad_norm": 0.5760608659209299, "learning_rate": 9.760303108635887e-06, "loss": 0.4774, "step": 4111 }, { "epoch": 0.12602672551183033, "grad_norm": 0.5359693343931723, "learning_rate": 9.760151256882368e-06, "loss": 0.4594, "step": 4112 }, { "epoch": 0.12605737403457154, "grad_norm": 1.3860283942608695, "learning_rate": 9.759999358225861e-06, "loss": 0.7805, "step": 4113 }, { "epoch": 0.12608802255731275, "grad_norm": 1.2991758288573145, "learning_rate": 9.759847412667862e-06, "loss": 0.766, "step": 4114 }, { "epoch": 0.12611867108005395, "grad_norm": 1.4516751959783019, "learning_rate": 9.759695420209867e-06, "loss": 0.7547, "step": 4115 }, { "epoch": 0.12614931960279516, "grad_norm": 1.5651326512857489, "learning_rate": 9.759543380853379e-06, "loss": 0.6463, "step": 4116 }, { "epoch": 0.12617996812553636, "grad_norm": 1.3937825536337445, "learning_rate": 9.759391294599889e-06, "loss": 0.7194, "step": 4117 }, { "epoch": 0.12621061664827754, "grad_norm": 1.487634251592637, "learning_rate": 9.7592391614509e-06, "loss": 0.6984, "step": 4118 }, { "epoch": 0.12624126517101875, "grad_norm": 1.4595444392673567, "learning_rate": 9.759086981407909e-06, "loss": 0.7901, "step": 4119 }, { "epoch": 0.12627191369375995, "grad_norm": 1.5194989409065003, "learning_rate": 9.758934754472418e-06, "loss": 0.6811, "step": 4120 }, { "epoch": 0.12630256221650116, "grad_norm": 1.534678380409517, "learning_rate": 9.758782480645923e-06, "loss": 0.7391, "step": 4121 }, { "epoch": 0.12633321073924236, "grad_norm": 1.3840147891067738, "learning_rate": 9.758630159929928e-06, "loss": 0.696, "step": 4122 }, { "epoch": 0.12636385926198357, "grad_norm": 1.3260802192223933, "learning_rate": 9.758477792325932e-06, "loss": 0.7896, "step": 4123 }, { "epoch": 0.12639450778472477, "grad_norm": 1.482966448263461, "learning_rate": 9.758325377835437e-06, "loss": 0.7056, "step": 4124 }, { "epoch": 0.12642515630746598, "grad_norm": 1.6168105724143098, "learning_rate": 9.758172916459944e-06, "loss": 0.8407, "step": 4125 }, { "epoch": 0.12645580483020719, "grad_norm": 0.9002314202740533, "learning_rate": 9.758020408200956e-06, "loss": 0.4871, "step": 4126 }, { "epoch": 0.1264864533529484, "grad_norm": 1.314428859038665, "learning_rate": 9.757867853059976e-06, "loss": 0.6456, "step": 4127 }, { "epoch": 0.1265171018756896, "grad_norm": 1.3592836374827564, "learning_rate": 9.757715251038508e-06, "loss": 0.7035, "step": 4128 }, { "epoch": 0.1265477503984308, "grad_norm": 1.2716066883518737, "learning_rate": 9.757562602138054e-06, "loss": 0.7281, "step": 4129 }, { "epoch": 0.126578398921172, "grad_norm": 1.2948177345114973, "learning_rate": 9.757409906360116e-06, "loss": 0.698, "step": 4130 }, { "epoch": 0.12660904744391321, "grad_norm": 1.35296514164454, "learning_rate": 9.757257163706203e-06, "loss": 0.8222, "step": 4131 }, { "epoch": 0.12663969596665442, "grad_norm": 1.415017824784417, "learning_rate": 9.757104374177817e-06, "loss": 0.7502, "step": 4132 }, { "epoch": 0.1266703444893956, "grad_norm": 1.3506720144790325, "learning_rate": 9.756951537776464e-06, "loss": 0.7211, "step": 4133 }, { "epoch": 0.1267009930121368, "grad_norm": 1.4227811687793392, "learning_rate": 9.756798654503652e-06, "loss": 0.7091, "step": 4134 }, { "epoch": 0.126731641534878, "grad_norm": 1.620307953035467, "learning_rate": 9.756645724360884e-06, "loss": 0.6527, "step": 4135 }, { "epoch": 0.12676229005761921, "grad_norm": 1.215545781072814, "learning_rate": 9.75649274734967e-06, "loss": 0.6737, "step": 4136 }, { "epoch": 0.12679293858036042, "grad_norm": 1.426980044713085, "learning_rate": 9.756339723471516e-06, "loss": 0.7776, "step": 4137 }, { "epoch": 0.12682358710310163, "grad_norm": 0.8110637471591468, "learning_rate": 9.75618665272793e-06, "loss": 0.4975, "step": 4138 }, { "epoch": 0.12685423562584283, "grad_norm": 1.3408758490995354, "learning_rate": 9.75603353512042e-06, "loss": 0.7401, "step": 4139 }, { "epoch": 0.12688488414858404, "grad_norm": 1.5215900279029069, "learning_rate": 9.755880370650492e-06, "loss": 0.6488, "step": 4140 }, { "epoch": 0.12691553267132524, "grad_norm": 1.3123183290066096, "learning_rate": 9.755727159319661e-06, "loss": 0.7548, "step": 4141 }, { "epoch": 0.12694618119406645, "grad_norm": 1.4190094524995567, "learning_rate": 9.755573901129431e-06, "loss": 0.6942, "step": 4142 }, { "epoch": 0.12697682971680765, "grad_norm": 1.3494321569863856, "learning_rate": 9.755420596081316e-06, "loss": 0.7618, "step": 4143 }, { "epoch": 0.12700747823954886, "grad_norm": 1.2429495142515834, "learning_rate": 9.755267244176826e-06, "loss": 0.7683, "step": 4144 }, { "epoch": 0.12703812676229007, "grad_norm": 1.4775087661580788, "learning_rate": 9.75511384541747e-06, "loss": 0.7606, "step": 4145 }, { "epoch": 0.12706877528503127, "grad_norm": 1.1409832560569229, "learning_rate": 9.75496039980476e-06, "loss": 0.699, "step": 4146 }, { "epoch": 0.12709942380777248, "grad_norm": 1.5115370934695984, "learning_rate": 9.75480690734021e-06, "loss": 0.7384, "step": 4147 }, { "epoch": 0.12713007233051368, "grad_norm": 1.3648128920956035, "learning_rate": 9.75465336802533e-06, "loss": 0.647, "step": 4148 }, { "epoch": 0.12716072085325486, "grad_norm": 1.3927554734806857, "learning_rate": 9.754499781861634e-06, "loss": 0.7001, "step": 4149 }, { "epoch": 0.12719136937599607, "grad_norm": 1.6733912226081586, "learning_rate": 9.754346148850635e-06, "loss": 0.7479, "step": 4150 }, { "epoch": 0.12722201789873727, "grad_norm": 1.6124966005373937, "learning_rate": 9.754192468993849e-06, "loss": 0.7411, "step": 4151 }, { "epoch": 0.12725266642147848, "grad_norm": 1.389468186855369, "learning_rate": 9.754038742292786e-06, "loss": 0.7524, "step": 4152 }, { "epoch": 0.12728331494421968, "grad_norm": 1.4809283750237174, "learning_rate": 9.753884968748964e-06, "loss": 0.7838, "step": 4153 }, { "epoch": 0.1273139634669609, "grad_norm": 1.515372435632484, "learning_rate": 9.753731148363897e-06, "loss": 0.7178, "step": 4154 }, { "epoch": 0.1273446119897021, "grad_norm": 1.5706478960744368, "learning_rate": 9.7535772811391e-06, "loss": 0.819, "step": 4155 }, { "epoch": 0.1273752605124433, "grad_norm": 1.2846295497846045, "learning_rate": 9.753423367076088e-06, "loss": 0.6847, "step": 4156 }, { "epoch": 0.1274059090351845, "grad_norm": 1.4394451331448175, "learning_rate": 9.753269406176382e-06, "loss": 0.7698, "step": 4157 }, { "epoch": 0.1274365575579257, "grad_norm": 1.3652899066159518, "learning_rate": 9.753115398441496e-06, "loss": 0.7058, "step": 4158 }, { "epoch": 0.12746720608066692, "grad_norm": 1.3838985908186645, "learning_rate": 9.752961343872947e-06, "loss": 0.7303, "step": 4159 }, { "epoch": 0.12749785460340812, "grad_norm": 1.2772591628858767, "learning_rate": 9.752807242472255e-06, "loss": 0.7156, "step": 4160 }, { "epoch": 0.12752850312614933, "grad_norm": 1.3067923251412876, "learning_rate": 9.752653094240935e-06, "loss": 0.6456, "step": 4161 }, { "epoch": 0.12755915164889053, "grad_norm": 0.639605850289485, "learning_rate": 9.752498899180512e-06, "loss": 0.4847, "step": 4162 }, { "epoch": 0.12758980017163174, "grad_norm": 1.3460560824714136, "learning_rate": 9.752344657292497e-06, "loss": 0.6966, "step": 4163 }, { "epoch": 0.12762044869437292, "grad_norm": 0.49543177708314456, "learning_rate": 9.752190368578415e-06, "loss": 0.4726, "step": 4164 }, { "epoch": 0.12765109721711412, "grad_norm": 1.5374022697967802, "learning_rate": 9.752036033039788e-06, "loss": 0.7659, "step": 4165 }, { "epoch": 0.12768174573985533, "grad_norm": 0.4700393251146483, "learning_rate": 9.751881650678132e-06, "loss": 0.4604, "step": 4166 }, { "epoch": 0.12771239426259653, "grad_norm": 1.5053577297225484, "learning_rate": 9.751727221494971e-06, "loss": 0.7788, "step": 4167 }, { "epoch": 0.12774304278533774, "grad_norm": 1.5178092986826746, "learning_rate": 9.751572745491827e-06, "loss": 0.7943, "step": 4168 }, { "epoch": 0.12777369130807895, "grad_norm": 0.5523109949308805, "learning_rate": 9.751418222670219e-06, "loss": 0.4958, "step": 4169 }, { "epoch": 0.12780433983082015, "grad_norm": 1.5349742788818956, "learning_rate": 9.751263653031673e-06, "loss": 0.836, "step": 4170 }, { "epoch": 0.12783498835356136, "grad_norm": 1.588722343546351, "learning_rate": 9.751109036577709e-06, "loss": 0.7299, "step": 4171 }, { "epoch": 0.12786563687630256, "grad_norm": 1.4262809697915968, "learning_rate": 9.750954373309854e-06, "loss": 0.7848, "step": 4172 }, { "epoch": 0.12789628539904377, "grad_norm": 1.4241265423782319, "learning_rate": 9.750799663229627e-06, "loss": 0.7492, "step": 4173 }, { "epoch": 0.12792693392178497, "grad_norm": 1.4807107511549928, "learning_rate": 9.750644906338559e-06, "loss": 0.7653, "step": 4174 }, { "epoch": 0.12795758244452618, "grad_norm": 1.5337691888965737, "learning_rate": 9.750490102638169e-06, "loss": 0.7206, "step": 4175 }, { "epoch": 0.12798823096726739, "grad_norm": 1.4925638103189618, "learning_rate": 9.750335252129985e-06, "loss": 0.6758, "step": 4176 }, { "epoch": 0.1280188794900086, "grad_norm": 1.2562790283616432, "learning_rate": 9.750180354815531e-06, "loss": 0.6929, "step": 4177 }, { "epoch": 0.1280495280127498, "grad_norm": 1.273437368246884, "learning_rate": 9.750025410696337e-06, "loss": 0.7678, "step": 4178 }, { "epoch": 0.128080176535491, "grad_norm": 1.3247261089610567, "learning_rate": 9.749870419773926e-06, "loss": 0.7192, "step": 4179 }, { "epoch": 0.12811082505823218, "grad_norm": 1.3049511195915118, "learning_rate": 9.749715382049827e-06, "loss": 0.6455, "step": 4180 }, { "epoch": 0.1281414735809734, "grad_norm": 1.5551402325202106, "learning_rate": 9.749560297525567e-06, "loss": 0.7462, "step": 4181 }, { "epoch": 0.1281721221037146, "grad_norm": 1.2990270554768772, "learning_rate": 9.749405166202673e-06, "loss": 0.7127, "step": 4182 }, { "epoch": 0.1282027706264558, "grad_norm": 1.4042204250626371, "learning_rate": 9.749249988082677e-06, "loss": 0.7627, "step": 4183 }, { "epoch": 0.128233419149197, "grad_norm": 0.5754920169269943, "learning_rate": 9.749094763167104e-06, "loss": 0.4597, "step": 4184 }, { "epoch": 0.1282640676719382, "grad_norm": 1.138301914951398, "learning_rate": 9.748939491457485e-06, "loss": 0.6779, "step": 4185 }, { "epoch": 0.12829471619467941, "grad_norm": 1.297637355355711, "learning_rate": 9.74878417295535e-06, "loss": 0.6669, "step": 4186 }, { "epoch": 0.12832536471742062, "grad_norm": 1.4287046195841107, "learning_rate": 9.74862880766223e-06, "loss": 0.7936, "step": 4187 }, { "epoch": 0.12835601324016183, "grad_norm": 0.4833799260283443, "learning_rate": 9.748473395579656e-06, "loss": 0.4631, "step": 4188 }, { "epoch": 0.12838666176290303, "grad_norm": 1.2257343476394365, "learning_rate": 9.748317936709158e-06, "loss": 0.6868, "step": 4189 }, { "epoch": 0.12841731028564424, "grad_norm": 1.5233411733636133, "learning_rate": 9.748162431052269e-06, "loss": 0.7903, "step": 4190 }, { "epoch": 0.12844795880838544, "grad_norm": 1.7335010203527705, "learning_rate": 9.74800687861052e-06, "loss": 0.7642, "step": 4191 }, { "epoch": 0.12847860733112665, "grad_norm": 0.4772270203810507, "learning_rate": 9.747851279385445e-06, "loss": 0.4699, "step": 4192 }, { "epoch": 0.12850925585386785, "grad_norm": 1.4583547993198689, "learning_rate": 9.747695633378576e-06, "loss": 0.7339, "step": 4193 }, { "epoch": 0.12853990437660906, "grad_norm": 1.7538630606301087, "learning_rate": 9.747539940591449e-06, "loss": 0.7251, "step": 4194 }, { "epoch": 0.12857055289935024, "grad_norm": 1.2887701121379431, "learning_rate": 9.747384201025593e-06, "loss": 0.7029, "step": 4195 }, { "epoch": 0.12860120142209144, "grad_norm": 1.2422988720224073, "learning_rate": 9.747228414682547e-06, "loss": 0.7174, "step": 4196 }, { "epoch": 0.12863184994483265, "grad_norm": 1.373159130224373, "learning_rate": 9.747072581563845e-06, "loss": 0.6786, "step": 4197 }, { "epoch": 0.12866249846757385, "grad_norm": 0.4922824769745912, "learning_rate": 9.746916701671023e-06, "loss": 0.4674, "step": 4198 }, { "epoch": 0.12869314699031506, "grad_norm": 0.5135505194976444, "learning_rate": 9.746760775005618e-06, "loss": 0.4839, "step": 4199 }, { "epoch": 0.12872379551305627, "grad_norm": 1.483949790650281, "learning_rate": 9.746604801569162e-06, "loss": 0.7555, "step": 4200 }, { "epoch": 0.12875444403579747, "grad_norm": 1.501889425769715, "learning_rate": 9.746448781363194e-06, "loss": 0.711, "step": 4201 }, { "epoch": 0.12878509255853868, "grad_norm": 1.4415338062301488, "learning_rate": 9.746292714389251e-06, "loss": 0.7676, "step": 4202 }, { "epoch": 0.12881574108127988, "grad_norm": 1.485201865456311, "learning_rate": 9.746136600648874e-06, "loss": 0.8279, "step": 4203 }, { "epoch": 0.1288463896040211, "grad_norm": 1.4353347474612814, "learning_rate": 9.745980440143598e-06, "loss": 0.7176, "step": 4204 }, { "epoch": 0.1288770381267623, "grad_norm": 0.4868013329125074, "learning_rate": 9.745824232874963e-06, "loss": 0.4672, "step": 4205 }, { "epoch": 0.1289076866495035, "grad_norm": 1.4254926850999277, "learning_rate": 9.745667978844506e-06, "loss": 0.6985, "step": 4206 }, { "epoch": 0.1289383351722447, "grad_norm": 1.4851398334907129, "learning_rate": 9.745511678053769e-06, "loss": 0.8172, "step": 4207 }, { "epoch": 0.1289689836949859, "grad_norm": 1.8416504965392824, "learning_rate": 9.74535533050429e-06, "loss": 0.7819, "step": 4208 }, { "epoch": 0.12899963221772712, "grad_norm": 0.4787340903163742, "learning_rate": 9.745198936197613e-06, "loss": 0.4455, "step": 4209 }, { "epoch": 0.12903028074046832, "grad_norm": 0.493960865984719, "learning_rate": 9.745042495135275e-06, "loss": 0.4852, "step": 4210 }, { "epoch": 0.1290609292632095, "grad_norm": 1.5097388547100865, "learning_rate": 9.744886007318821e-06, "loss": 0.8392, "step": 4211 }, { "epoch": 0.1290915777859507, "grad_norm": 1.4916117376786742, "learning_rate": 9.74472947274979e-06, "loss": 0.8625, "step": 4212 }, { "epoch": 0.1291222263086919, "grad_norm": 1.5328936965460467, "learning_rate": 9.744572891429725e-06, "loss": 0.8549, "step": 4213 }, { "epoch": 0.12915287483143312, "grad_norm": 1.4233542440747466, "learning_rate": 9.74441626336017e-06, "loss": 0.7298, "step": 4214 }, { "epoch": 0.12918352335417432, "grad_norm": 1.449934832134973, "learning_rate": 9.744259588542666e-06, "loss": 0.7895, "step": 4215 }, { "epoch": 0.12921417187691553, "grad_norm": 1.5837037651108778, "learning_rate": 9.744102866978761e-06, "loss": 0.7423, "step": 4216 }, { "epoch": 0.12924482039965673, "grad_norm": 1.3012288860337642, "learning_rate": 9.743946098669996e-06, "loss": 0.658, "step": 4217 }, { "epoch": 0.12927546892239794, "grad_norm": 1.399059739750469, "learning_rate": 9.743789283617915e-06, "loss": 0.7241, "step": 4218 }, { "epoch": 0.12930611744513915, "grad_norm": 1.4432853627350317, "learning_rate": 9.743632421824065e-06, "loss": 0.7402, "step": 4219 }, { "epoch": 0.12933676596788035, "grad_norm": 1.3903967140718274, "learning_rate": 9.743475513289993e-06, "loss": 0.7063, "step": 4220 }, { "epoch": 0.12936741449062156, "grad_norm": 0.5724745240029526, "learning_rate": 9.743318558017242e-06, "loss": 0.4489, "step": 4221 }, { "epoch": 0.12939806301336276, "grad_norm": 1.372421989866246, "learning_rate": 9.743161556007358e-06, "loss": 0.7318, "step": 4222 }, { "epoch": 0.12942871153610397, "grad_norm": 1.3723967068416687, "learning_rate": 9.74300450726189e-06, "loss": 0.7467, "step": 4223 }, { "epoch": 0.12945936005884517, "grad_norm": 1.4530368039078985, "learning_rate": 9.742847411782385e-06, "loss": 0.6996, "step": 4224 }, { "epoch": 0.12949000858158638, "grad_norm": 1.400035022912617, "learning_rate": 9.742690269570392e-06, "loss": 0.7096, "step": 4225 }, { "epoch": 0.12952065710432756, "grad_norm": 1.2790953531795106, "learning_rate": 9.74253308062746e-06, "loss": 0.7321, "step": 4226 }, { "epoch": 0.12955130562706876, "grad_norm": 1.3255356453881655, "learning_rate": 9.742375844955131e-06, "loss": 0.7601, "step": 4227 }, { "epoch": 0.12958195414980997, "grad_norm": 1.1968753342791822, "learning_rate": 9.742218562554964e-06, "loss": 0.6916, "step": 4228 }, { "epoch": 0.12961260267255117, "grad_norm": 0.530553164576682, "learning_rate": 9.742061233428502e-06, "loss": 0.4637, "step": 4229 }, { "epoch": 0.12964325119529238, "grad_norm": 1.3689101324692252, "learning_rate": 9.741903857577298e-06, "loss": 0.7678, "step": 4230 }, { "epoch": 0.1296738997180336, "grad_norm": 1.490153360144114, "learning_rate": 9.741746435002904e-06, "loss": 0.7589, "step": 4231 }, { "epoch": 0.1297045482407748, "grad_norm": 0.4875385043673223, "learning_rate": 9.741588965706865e-06, "loss": 0.4613, "step": 4232 }, { "epoch": 0.129735196763516, "grad_norm": 1.2175438070773852, "learning_rate": 9.74143144969074e-06, "loss": 0.7241, "step": 4233 }, { "epoch": 0.1297658452862572, "grad_norm": 1.4561306534228644, "learning_rate": 9.741273886956077e-06, "loss": 0.7085, "step": 4234 }, { "epoch": 0.1297964938089984, "grad_norm": 1.2995317234316375, "learning_rate": 9.741116277504427e-06, "loss": 0.6736, "step": 4235 }, { "epoch": 0.12982714233173961, "grad_norm": 0.507062355968895, "learning_rate": 9.740958621337348e-06, "loss": 0.4944, "step": 4236 }, { "epoch": 0.12985779085448082, "grad_norm": 0.5009106965664071, "learning_rate": 9.74080091845639e-06, "loss": 0.4761, "step": 4237 }, { "epoch": 0.12988843937722203, "grad_norm": 1.6632065766391901, "learning_rate": 9.740643168863108e-06, "loss": 0.795, "step": 4238 }, { "epoch": 0.12991908789996323, "grad_norm": 1.4394299589488353, "learning_rate": 9.740485372559056e-06, "loss": 0.7988, "step": 4239 }, { "epoch": 0.12994973642270444, "grad_norm": 1.3384702845055918, "learning_rate": 9.740327529545787e-06, "loss": 0.6561, "step": 4240 }, { "epoch": 0.12998038494544564, "grad_norm": 0.47792807000968945, "learning_rate": 9.740169639824858e-06, "loss": 0.4893, "step": 4241 }, { "epoch": 0.13001103346818682, "grad_norm": 1.9083856280064795, "learning_rate": 9.740011703397827e-06, "loss": 0.6921, "step": 4242 }, { "epoch": 0.13004168199092803, "grad_norm": 1.4545465625166607, "learning_rate": 9.739853720266247e-06, "loss": 0.8122, "step": 4243 }, { "epoch": 0.13007233051366923, "grad_norm": 1.5515898271492115, "learning_rate": 9.739695690431674e-06, "loss": 0.7318, "step": 4244 }, { "epoch": 0.13010297903641044, "grad_norm": 1.631067001215654, "learning_rate": 9.739537613895668e-06, "loss": 0.7856, "step": 4245 }, { "epoch": 0.13013362755915164, "grad_norm": 1.3911313819880937, "learning_rate": 9.739379490659786e-06, "loss": 0.6718, "step": 4246 }, { "epoch": 0.13016427608189285, "grad_norm": 1.519295718550949, "learning_rate": 9.739221320725585e-06, "loss": 0.7657, "step": 4247 }, { "epoch": 0.13019492460463405, "grad_norm": 1.2516204820361048, "learning_rate": 9.739063104094622e-06, "loss": 0.7138, "step": 4248 }, { "epoch": 0.13022557312737526, "grad_norm": 1.5463040891734705, "learning_rate": 9.73890484076846e-06, "loss": 0.7848, "step": 4249 }, { "epoch": 0.13025622165011647, "grad_norm": 1.2147925683755056, "learning_rate": 9.738746530748654e-06, "loss": 0.7488, "step": 4250 }, { "epoch": 0.13028687017285767, "grad_norm": 1.3451362990455125, "learning_rate": 9.738588174036767e-06, "loss": 0.6857, "step": 4251 }, { "epoch": 0.13031751869559888, "grad_norm": 1.4533269394560993, "learning_rate": 9.738429770634359e-06, "loss": 0.7965, "step": 4252 }, { "epoch": 0.13034816721834008, "grad_norm": 1.5203744718251608, "learning_rate": 9.738271320542989e-06, "loss": 0.7147, "step": 4253 }, { "epoch": 0.1303788157410813, "grad_norm": 1.230304099673113, "learning_rate": 9.738112823764219e-06, "loss": 0.6312, "step": 4254 }, { "epoch": 0.1304094642638225, "grad_norm": 1.3284053544584695, "learning_rate": 9.737954280299612e-06, "loss": 0.6701, "step": 4255 }, { "epoch": 0.1304401127865637, "grad_norm": 1.3146922870527082, "learning_rate": 9.737795690150729e-06, "loss": 0.6803, "step": 4256 }, { "epoch": 0.13047076130930488, "grad_norm": 1.3188861290960845, "learning_rate": 9.737637053319133e-06, "loss": 0.796, "step": 4257 }, { "epoch": 0.13050140983204608, "grad_norm": 1.4162087903033622, "learning_rate": 9.737478369806387e-06, "loss": 0.7104, "step": 4258 }, { "epoch": 0.1305320583547873, "grad_norm": 1.3979286643895732, "learning_rate": 9.737319639614053e-06, "loss": 0.7742, "step": 4259 }, { "epoch": 0.1305627068775285, "grad_norm": 1.4775187297419565, "learning_rate": 9.737160862743697e-06, "loss": 0.7705, "step": 4260 }, { "epoch": 0.1305933554002697, "grad_norm": 1.3548226751015784, "learning_rate": 9.737002039196884e-06, "loss": 0.8415, "step": 4261 }, { "epoch": 0.1306240039230109, "grad_norm": 1.3639526538220623, "learning_rate": 9.73684316897518e-06, "loss": 0.7727, "step": 4262 }, { "epoch": 0.1306546524457521, "grad_norm": 1.3843959927210279, "learning_rate": 9.736684252080145e-06, "loss": 0.7966, "step": 4263 }, { "epoch": 0.13068530096849332, "grad_norm": 1.3250962718069677, "learning_rate": 9.736525288513348e-06, "loss": 0.8161, "step": 4264 }, { "epoch": 0.13071594949123452, "grad_norm": 1.2779689284494054, "learning_rate": 9.736366278276355e-06, "loss": 0.6692, "step": 4265 }, { "epoch": 0.13074659801397573, "grad_norm": 1.3908664467221028, "learning_rate": 9.736207221370735e-06, "loss": 0.7771, "step": 4266 }, { "epoch": 0.13077724653671693, "grad_norm": 0.6007962237430003, "learning_rate": 9.736048117798054e-06, "loss": 0.4755, "step": 4267 }, { "epoch": 0.13080789505945814, "grad_norm": 1.6547261515001848, "learning_rate": 9.735888967559877e-06, "loss": 0.8328, "step": 4268 }, { "epoch": 0.13083854358219935, "grad_norm": 1.3915415180247472, "learning_rate": 9.735729770657775e-06, "loss": 0.7117, "step": 4269 }, { "epoch": 0.13086919210494055, "grad_norm": 0.5122386704313809, "learning_rate": 9.735570527093316e-06, "loss": 0.4656, "step": 4270 }, { "epoch": 0.13089984062768176, "grad_norm": 1.4219448816028109, "learning_rate": 9.735411236868071e-06, "loss": 0.6859, "step": 4271 }, { "epoch": 0.13093048915042296, "grad_norm": 1.4271545139746677, "learning_rate": 9.735251899983605e-06, "loss": 0.6507, "step": 4272 }, { "epoch": 0.13096113767316414, "grad_norm": 1.38042287528526, "learning_rate": 9.735092516441491e-06, "loss": 0.7944, "step": 4273 }, { "epoch": 0.13099178619590535, "grad_norm": 0.49757209799328866, "learning_rate": 9.7349330862433e-06, "loss": 0.458, "step": 4274 }, { "epoch": 0.13102243471864655, "grad_norm": 1.4883616724969009, "learning_rate": 9.7347736093906e-06, "loss": 0.7466, "step": 4275 }, { "epoch": 0.13105308324138776, "grad_norm": 1.339459226460241, "learning_rate": 9.734614085884967e-06, "loss": 0.7133, "step": 4276 }, { "epoch": 0.13108373176412896, "grad_norm": 1.5725800490083937, "learning_rate": 9.734454515727967e-06, "loss": 0.7517, "step": 4277 }, { "epoch": 0.13111438028687017, "grad_norm": 1.319884212189316, "learning_rate": 9.734294898921175e-06, "loss": 0.6281, "step": 4278 }, { "epoch": 0.13114502880961137, "grad_norm": 1.4668353760368062, "learning_rate": 9.734135235466167e-06, "loss": 0.7799, "step": 4279 }, { "epoch": 0.13117567733235258, "grad_norm": 1.4138707707759022, "learning_rate": 9.73397552536451e-06, "loss": 0.6409, "step": 4280 }, { "epoch": 0.13120632585509379, "grad_norm": 1.6669293500331925, "learning_rate": 9.733815768617784e-06, "loss": 0.7485, "step": 4281 }, { "epoch": 0.131236974377835, "grad_norm": 0.4833785093007418, "learning_rate": 9.733655965227557e-06, "loss": 0.4624, "step": 4282 }, { "epoch": 0.1312676229005762, "grad_norm": 1.4169647158095198, "learning_rate": 9.733496115195408e-06, "loss": 0.8006, "step": 4283 }, { "epoch": 0.1312982714233174, "grad_norm": 1.4837712528153482, "learning_rate": 9.733336218522914e-06, "loss": 0.7287, "step": 4284 }, { "epoch": 0.1313289199460586, "grad_norm": 1.4532829308436264, "learning_rate": 9.733176275211643e-06, "loss": 0.7566, "step": 4285 }, { "epoch": 0.13135956846879981, "grad_norm": 1.4524855031951056, "learning_rate": 9.733016285263175e-06, "loss": 0.6199, "step": 4286 }, { "epoch": 0.13139021699154102, "grad_norm": 1.1916549769423057, "learning_rate": 9.732856248679088e-06, "loss": 0.7465, "step": 4287 }, { "epoch": 0.1314208655142822, "grad_norm": 1.5761646168544567, "learning_rate": 9.732696165460957e-06, "loss": 0.6651, "step": 4288 }, { "epoch": 0.1314515140370234, "grad_norm": 1.4651353407426178, "learning_rate": 9.73253603561036e-06, "loss": 0.85, "step": 4289 }, { "epoch": 0.1314821625597646, "grad_norm": 1.3653401333793749, "learning_rate": 9.732375859128876e-06, "loss": 0.6802, "step": 4290 }, { "epoch": 0.13151281108250581, "grad_norm": 1.459067659564869, "learning_rate": 9.73221563601808e-06, "loss": 0.7263, "step": 4291 }, { "epoch": 0.13154345960524702, "grad_norm": 1.2443756022550352, "learning_rate": 9.732055366279552e-06, "loss": 0.6432, "step": 4292 }, { "epoch": 0.13157410812798823, "grad_norm": 1.4329146076212522, "learning_rate": 9.731895049914873e-06, "loss": 0.7674, "step": 4293 }, { "epoch": 0.13160475665072943, "grad_norm": 1.530818425881689, "learning_rate": 9.73173468692562e-06, "loss": 0.7797, "step": 4294 }, { "epoch": 0.13163540517347064, "grad_norm": 1.374517968798833, "learning_rate": 9.731574277313377e-06, "loss": 0.7837, "step": 4295 }, { "epoch": 0.13166605369621184, "grad_norm": 0.5105698359842955, "learning_rate": 9.731413821079719e-06, "loss": 0.4332, "step": 4296 }, { "epoch": 0.13169670221895305, "grad_norm": 1.2641207932315424, "learning_rate": 9.731253318226232e-06, "loss": 0.6993, "step": 4297 }, { "epoch": 0.13172735074169425, "grad_norm": 1.3857581788622155, "learning_rate": 9.731092768754496e-06, "loss": 0.6644, "step": 4298 }, { "epoch": 0.13175799926443546, "grad_norm": 1.5127351653852137, "learning_rate": 9.73093217266609e-06, "loss": 0.7312, "step": 4299 }, { "epoch": 0.13178864778717667, "grad_norm": 1.8204811109718788, "learning_rate": 9.7307715299626e-06, "loss": 0.8152, "step": 4300 }, { "epoch": 0.13181929630991787, "grad_norm": 1.402264766666627, "learning_rate": 9.73061084064561e-06, "loss": 0.7904, "step": 4301 }, { "epoch": 0.13184994483265908, "grad_norm": 1.3870335212529163, "learning_rate": 9.730450104716697e-06, "loss": 0.7738, "step": 4302 }, { "epoch": 0.13188059335540028, "grad_norm": 1.4105657080115601, "learning_rate": 9.73028932217745e-06, "loss": 0.6445, "step": 4303 }, { "epoch": 0.13191124187814146, "grad_norm": 0.5429978174378928, "learning_rate": 9.730128493029454e-06, "loss": 0.4906, "step": 4304 }, { "epoch": 0.13194189040088267, "grad_norm": 1.357233732325531, "learning_rate": 9.729967617274291e-06, "loss": 0.6558, "step": 4305 }, { "epoch": 0.13197253892362387, "grad_norm": 1.4270893731918037, "learning_rate": 9.729806694913546e-06, "loss": 0.8164, "step": 4306 }, { "epoch": 0.13200318744636508, "grad_norm": 1.2383571197136056, "learning_rate": 9.729645725948807e-06, "loss": 0.7413, "step": 4307 }, { "epoch": 0.13203383596910628, "grad_norm": 1.2060775241062978, "learning_rate": 9.729484710381656e-06, "loss": 0.6582, "step": 4308 }, { "epoch": 0.1320644844918475, "grad_norm": 1.3257440171889276, "learning_rate": 9.729323648213684e-06, "loss": 0.73, "step": 4309 }, { "epoch": 0.1320951330145887, "grad_norm": 1.4216070676950765, "learning_rate": 9.729162539446476e-06, "loss": 0.7399, "step": 4310 }, { "epoch": 0.1321257815373299, "grad_norm": 1.552831708637919, "learning_rate": 9.729001384081617e-06, "loss": 0.7839, "step": 4311 }, { "epoch": 0.1321564300600711, "grad_norm": 1.4056016984892634, "learning_rate": 9.7288401821207e-06, "loss": 0.7496, "step": 4312 }, { "epoch": 0.1321870785828123, "grad_norm": 1.5718740088085144, "learning_rate": 9.72867893356531e-06, "loss": 0.7069, "step": 4313 }, { "epoch": 0.13221772710555352, "grad_norm": 0.5212931967245019, "learning_rate": 9.728517638417037e-06, "loss": 0.4819, "step": 4314 }, { "epoch": 0.13224837562829472, "grad_norm": 0.511068227439169, "learning_rate": 9.728356296677469e-06, "loss": 0.4737, "step": 4315 }, { "epoch": 0.13227902415103593, "grad_norm": 1.2310971747358612, "learning_rate": 9.728194908348197e-06, "loss": 0.572, "step": 4316 }, { "epoch": 0.13230967267377713, "grad_norm": 1.4063574835579842, "learning_rate": 9.728033473430812e-06, "loss": 0.7532, "step": 4317 }, { "epoch": 0.13234032119651834, "grad_norm": 1.4008269841312806, "learning_rate": 9.7278719919269e-06, "loss": 0.8123, "step": 4318 }, { "epoch": 0.13237096971925952, "grad_norm": 1.3284585309745682, "learning_rate": 9.72771046383806e-06, "loss": 0.7786, "step": 4319 }, { "epoch": 0.13240161824200072, "grad_norm": 1.3683990431734976, "learning_rate": 9.727548889165876e-06, "loss": 0.7108, "step": 4320 }, { "epoch": 0.13243226676474193, "grad_norm": 1.4369857436805593, "learning_rate": 9.727387267911944e-06, "loss": 0.8291, "step": 4321 }, { "epoch": 0.13246291528748314, "grad_norm": 1.6004808614143848, "learning_rate": 9.727225600077856e-06, "loss": 0.6335, "step": 4322 }, { "epoch": 0.13249356381022434, "grad_norm": 1.2752671797773674, "learning_rate": 9.727063885665206e-06, "loss": 0.7206, "step": 4323 }, { "epoch": 0.13252421233296555, "grad_norm": 1.5478281229005169, "learning_rate": 9.726902124675585e-06, "loss": 0.8182, "step": 4324 }, { "epoch": 0.13255486085570675, "grad_norm": 1.472524384925318, "learning_rate": 9.726740317110588e-06, "loss": 0.8062, "step": 4325 }, { "epoch": 0.13258550937844796, "grad_norm": 1.500020439237134, "learning_rate": 9.726578462971808e-06, "loss": 0.7383, "step": 4326 }, { "epoch": 0.13261615790118916, "grad_norm": 1.266608111387654, "learning_rate": 9.726416562260842e-06, "loss": 0.7194, "step": 4327 }, { "epoch": 0.13264680642393037, "grad_norm": 1.1952505327774205, "learning_rate": 9.726254614979284e-06, "loss": 0.7185, "step": 4328 }, { "epoch": 0.13267745494667157, "grad_norm": 1.4602718513566402, "learning_rate": 9.726092621128731e-06, "loss": 0.7916, "step": 4329 }, { "epoch": 0.13270810346941278, "grad_norm": 1.3987317896699738, "learning_rate": 9.725930580710777e-06, "loss": 0.7537, "step": 4330 }, { "epoch": 0.13273875199215399, "grad_norm": 1.6373290034758627, "learning_rate": 9.725768493727021e-06, "loss": 0.6978, "step": 4331 }, { "epoch": 0.1327694005148952, "grad_norm": 1.3791527977042968, "learning_rate": 9.725606360179058e-06, "loss": 0.7302, "step": 4332 }, { "epoch": 0.1328000490376364, "grad_norm": 1.3861412902035728, "learning_rate": 9.725444180068487e-06, "loss": 0.6815, "step": 4333 }, { "epoch": 0.1328306975603776, "grad_norm": 1.3907186829279974, "learning_rate": 9.725281953396905e-06, "loss": 0.6798, "step": 4334 }, { "epoch": 0.13286134608311878, "grad_norm": 1.5761171642717642, "learning_rate": 9.725119680165911e-06, "loss": 0.6863, "step": 4335 }, { "epoch": 0.13289199460586, "grad_norm": 1.527446673216569, "learning_rate": 9.724957360377103e-06, "loss": 0.6928, "step": 4336 }, { "epoch": 0.1329226431286012, "grad_norm": 1.4298459719596148, "learning_rate": 9.724794994032082e-06, "loss": 0.7282, "step": 4337 }, { "epoch": 0.1329532916513424, "grad_norm": 1.3896541776815534, "learning_rate": 9.724632581132447e-06, "loss": 0.5772, "step": 4338 }, { "epoch": 0.1329839401740836, "grad_norm": 1.4288538487395246, "learning_rate": 9.7244701216798e-06, "loss": 0.7809, "step": 4339 }, { "epoch": 0.1330145886968248, "grad_norm": 1.337137642894924, "learning_rate": 9.724307615675737e-06, "loss": 0.6284, "step": 4340 }, { "epoch": 0.13304523721956601, "grad_norm": 1.3528711542172718, "learning_rate": 9.724145063121863e-06, "loss": 0.699, "step": 4341 }, { "epoch": 0.13307588574230722, "grad_norm": 1.3789663620463664, "learning_rate": 9.723982464019781e-06, "loss": 0.7031, "step": 4342 }, { "epoch": 0.13310653426504843, "grad_norm": 1.4566494158906913, "learning_rate": 9.723819818371089e-06, "loss": 0.7357, "step": 4343 }, { "epoch": 0.13313718278778963, "grad_norm": 1.4647912839154162, "learning_rate": 9.723657126177393e-06, "loss": 0.7411, "step": 4344 }, { "epoch": 0.13316783131053084, "grad_norm": 1.547486375785804, "learning_rate": 9.723494387440295e-06, "loss": 0.716, "step": 4345 }, { "epoch": 0.13319847983327204, "grad_norm": 1.2884890035187393, "learning_rate": 9.723331602161396e-06, "loss": 0.6642, "step": 4346 }, { "epoch": 0.13322912835601325, "grad_norm": 1.4470343511995507, "learning_rate": 9.723168770342304e-06, "loss": 0.8338, "step": 4347 }, { "epoch": 0.13325977687875445, "grad_norm": 1.2666954134385402, "learning_rate": 9.723005891984622e-06, "loss": 0.6858, "step": 4348 }, { "epoch": 0.13329042540149566, "grad_norm": 1.4593689390423987, "learning_rate": 9.722842967089953e-06, "loss": 0.7578, "step": 4349 }, { "epoch": 0.13332107392423684, "grad_norm": 1.2691148983007752, "learning_rate": 9.722679995659904e-06, "loss": 0.7491, "step": 4350 }, { "epoch": 0.13335172244697804, "grad_norm": 1.4154367453756183, "learning_rate": 9.722516977696083e-06, "loss": 0.7375, "step": 4351 }, { "epoch": 0.13338237096971925, "grad_norm": 1.3117875363228237, "learning_rate": 9.722353913200091e-06, "loss": 0.7661, "step": 4352 }, { "epoch": 0.13341301949246046, "grad_norm": 1.4810421239899447, "learning_rate": 9.72219080217354e-06, "loss": 0.8065, "step": 4353 }, { "epoch": 0.13344366801520166, "grad_norm": 1.2638000803453704, "learning_rate": 9.722027644618033e-06, "loss": 0.7206, "step": 4354 }, { "epoch": 0.13347431653794287, "grad_norm": 0.8542250717617355, "learning_rate": 9.72186444053518e-06, "loss": 0.466, "step": 4355 }, { "epoch": 0.13350496506068407, "grad_norm": 1.6174202200382937, "learning_rate": 9.72170118992659e-06, "loss": 0.8197, "step": 4356 }, { "epoch": 0.13353561358342528, "grad_norm": 1.4964130214721738, "learning_rate": 9.721537892793868e-06, "loss": 0.7508, "step": 4357 }, { "epoch": 0.13356626210616648, "grad_norm": 1.2472565309783565, "learning_rate": 9.721374549138626e-06, "loss": 0.7049, "step": 4358 }, { "epoch": 0.1335969106289077, "grad_norm": 1.1484671990294102, "learning_rate": 9.721211158962471e-06, "loss": 0.7088, "step": 4359 }, { "epoch": 0.1336275591516489, "grad_norm": 1.3720724208786192, "learning_rate": 9.721047722267016e-06, "loss": 0.7025, "step": 4360 }, { "epoch": 0.1336582076743901, "grad_norm": 2.110937531317516, "learning_rate": 9.72088423905387e-06, "loss": 0.7955, "step": 4361 }, { "epoch": 0.1336888561971313, "grad_norm": 1.4303466608624624, "learning_rate": 9.720720709324644e-06, "loss": 0.6881, "step": 4362 }, { "epoch": 0.1337195047198725, "grad_norm": 1.4932137735319044, "learning_rate": 9.720557133080948e-06, "loss": 0.7331, "step": 4363 }, { "epoch": 0.13375015324261372, "grad_norm": 1.4363738442150655, "learning_rate": 9.720393510324395e-06, "loss": 0.7213, "step": 4364 }, { "epoch": 0.13378080176535492, "grad_norm": 1.553637385167116, "learning_rate": 9.720229841056598e-06, "loss": 0.8097, "step": 4365 }, { "epoch": 0.1338114502880961, "grad_norm": 1.4473638503293664, "learning_rate": 9.720066125279167e-06, "loss": 0.769, "step": 4366 }, { "epoch": 0.1338420988108373, "grad_norm": 1.328791564618035, "learning_rate": 9.719902362993719e-06, "loss": 0.6724, "step": 4367 }, { "epoch": 0.1338727473335785, "grad_norm": 1.246114583271773, "learning_rate": 9.719738554201863e-06, "loss": 0.4969, "step": 4368 }, { "epoch": 0.13390339585631972, "grad_norm": 1.5729174454721424, "learning_rate": 9.719574698905216e-06, "loss": 0.8129, "step": 4369 }, { "epoch": 0.13393404437906092, "grad_norm": 0.692517364549207, "learning_rate": 9.719410797105393e-06, "loss": 0.4713, "step": 4370 }, { "epoch": 0.13396469290180213, "grad_norm": 1.5759584724683127, "learning_rate": 9.719246848804008e-06, "loss": 0.7989, "step": 4371 }, { "epoch": 0.13399534142454333, "grad_norm": 1.3270804164751064, "learning_rate": 9.719082854002675e-06, "loss": 0.6703, "step": 4372 }, { "epoch": 0.13402598994728454, "grad_norm": 0.6177098587230717, "learning_rate": 9.71891881270301e-06, "loss": 0.4653, "step": 4373 }, { "epoch": 0.13405663847002575, "grad_norm": 1.4306409819845016, "learning_rate": 9.718754724906634e-06, "loss": 0.7773, "step": 4374 }, { "epoch": 0.13408728699276695, "grad_norm": 0.8062329453979464, "learning_rate": 9.718590590615157e-06, "loss": 0.4627, "step": 4375 }, { "epoch": 0.13411793551550816, "grad_norm": 1.4462408739195387, "learning_rate": 9.718426409830201e-06, "loss": 0.7792, "step": 4376 }, { "epoch": 0.13414858403824936, "grad_norm": 1.1507194233491966, "learning_rate": 9.718262182553384e-06, "loss": 0.7061, "step": 4377 }, { "epoch": 0.13417923256099057, "grad_norm": 0.6060563616508883, "learning_rate": 9.71809790878632e-06, "loss": 0.4631, "step": 4378 }, { "epoch": 0.13420988108373177, "grad_norm": 1.23119066347781, "learning_rate": 9.717933588530632e-06, "loss": 0.6785, "step": 4379 }, { "epoch": 0.13424052960647298, "grad_norm": 1.295125966780296, "learning_rate": 9.717769221787936e-06, "loss": 0.6817, "step": 4380 }, { "epoch": 0.13427117812921416, "grad_norm": 1.369017546675025, "learning_rate": 9.717604808559854e-06, "loss": 0.667, "step": 4381 }, { "epoch": 0.13430182665195536, "grad_norm": 1.3732459341605714, "learning_rate": 9.717440348848004e-06, "loss": 0.8304, "step": 4382 }, { "epoch": 0.13433247517469657, "grad_norm": 1.4282919279277029, "learning_rate": 9.717275842654006e-06, "loss": 0.665, "step": 4383 }, { "epoch": 0.13436312369743778, "grad_norm": 1.3261152260955933, "learning_rate": 9.717111289979484e-06, "loss": 0.6419, "step": 4384 }, { "epoch": 0.13439377222017898, "grad_norm": 0.8562743279463532, "learning_rate": 9.716946690826056e-06, "loss": 0.4828, "step": 4385 }, { "epoch": 0.1344244207429202, "grad_norm": 1.296405417921042, "learning_rate": 9.716782045195348e-06, "loss": 0.7611, "step": 4386 }, { "epoch": 0.1344550692656614, "grad_norm": 1.4684569241213525, "learning_rate": 9.716617353088977e-06, "loss": 0.8738, "step": 4387 }, { "epoch": 0.1344857177884026, "grad_norm": 1.3204971519042539, "learning_rate": 9.716452614508569e-06, "loss": 0.8246, "step": 4388 }, { "epoch": 0.1345163663111438, "grad_norm": 1.6055558943730601, "learning_rate": 9.716287829455748e-06, "loss": 0.7233, "step": 4389 }, { "epoch": 0.134547014833885, "grad_norm": 0.49560155096083586, "learning_rate": 9.716122997932135e-06, "loss": 0.4728, "step": 4390 }, { "epoch": 0.13457766335662621, "grad_norm": 0.4713294756567554, "learning_rate": 9.715958119939355e-06, "loss": 0.4572, "step": 4391 }, { "epoch": 0.13460831187936742, "grad_norm": 1.336698796932437, "learning_rate": 9.715793195479035e-06, "loss": 0.7207, "step": 4392 }, { "epoch": 0.13463896040210863, "grad_norm": 1.4472100895143014, "learning_rate": 9.715628224552795e-06, "loss": 0.7789, "step": 4393 }, { "epoch": 0.13466960892484983, "grad_norm": 1.6629481871718004, "learning_rate": 9.715463207162267e-06, "loss": 0.7315, "step": 4394 }, { "epoch": 0.13470025744759104, "grad_norm": 1.3828067399825883, "learning_rate": 9.71529814330907e-06, "loss": 0.7108, "step": 4395 }, { "epoch": 0.13473090597033224, "grad_norm": 1.3259878638843705, "learning_rate": 9.715133032994837e-06, "loss": 0.6902, "step": 4396 }, { "epoch": 0.13476155449307342, "grad_norm": 0.5648528264510753, "learning_rate": 9.71496787622119e-06, "loss": 0.4476, "step": 4397 }, { "epoch": 0.13479220301581463, "grad_norm": 1.3299217214606582, "learning_rate": 9.71480267298976e-06, "loss": 0.7116, "step": 4398 }, { "epoch": 0.13482285153855583, "grad_norm": 1.4869650876620666, "learning_rate": 9.71463742330217e-06, "loss": 0.707, "step": 4399 }, { "epoch": 0.13485350006129704, "grad_norm": 0.4971457440744464, "learning_rate": 9.714472127160054e-06, "loss": 0.4907, "step": 4400 }, { "epoch": 0.13488414858403824, "grad_norm": 1.4305132996793744, "learning_rate": 9.714306784565037e-06, "loss": 0.6779, "step": 4401 }, { "epoch": 0.13491479710677945, "grad_norm": 1.414351376778573, "learning_rate": 9.714141395518748e-06, "loss": 0.7562, "step": 4402 }, { "epoch": 0.13494544562952066, "grad_norm": 1.304569629865171, "learning_rate": 9.71397596002282e-06, "loss": 0.6851, "step": 4403 }, { "epoch": 0.13497609415226186, "grad_norm": 1.4614629828091363, "learning_rate": 9.713810478078878e-06, "loss": 0.7785, "step": 4404 }, { "epoch": 0.13500674267500307, "grad_norm": 1.6440105103908826, "learning_rate": 9.713644949688556e-06, "loss": 0.733, "step": 4405 }, { "epoch": 0.13503739119774427, "grad_norm": 1.342397051974111, "learning_rate": 9.713479374853486e-06, "loss": 0.6581, "step": 4406 }, { "epoch": 0.13506803972048548, "grad_norm": 1.2001049913466837, "learning_rate": 9.713313753575296e-06, "loss": 0.6725, "step": 4407 }, { "epoch": 0.13509868824322668, "grad_norm": 1.449065848057269, "learning_rate": 9.713148085855619e-06, "loss": 0.6982, "step": 4408 }, { "epoch": 0.1351293367659679, "grad_norm": 0.548751139174937, "learning_rate": 9.71298237169609e-06, "loss": 0.4675, "step": 4409 }, { "epoch": 0.1351599852887091, "grad_norm": 1.7013473726055495, "learning_rate": 9.712816611098339e-06, "loss": 0.6804, "step": 4410 }, { "epoch": 0.1351906338114503, "grad_norm": 0.5085562304555642, "learning_rate": 9.712650804064e-06, "loss": 0.4536, "step": 4411 }, { "epoch": 0.13522128233419148, "grad_norm": 0.47879653853032705, "learning_rate": 9.712484950594707e-06, "loss": 0.473, "step": 4412 }, { "epoch": 0.13525193085693268, "grad_norm": 1.4862868180910964, "learning_rate": 9.712319050692093e-06, "loss": 0.7878, "step": 4413 }, { "epoch": 0.1352825793796739, "grad_norm": 1.0955631052564898, "learning_rate": 9.712153104357796e-06, "loss": 0.6164, "step": 4414 }, { "epoch": 0.1353132279024151, "grad_norm": 1.382091782502779, "learning_rate": 9.711987111593446e-06, "loss": 0.7861, "step": 4415 }, { "epoch": 0.1353438764251563, "grad_norm": 1.4351358958029374, "learning_rate": 9.711821072400683e-06, "loss": 0.7893, "step": 4416 }, { "epoch": 0.1353745249478975, "grad_norm": 1.2239009295550367, "learning_rate": 9.711654986781142e-06, "loss": 0.6565, "step": 4417 }, { "epoch": 0.1354051734706387, "grad_norm": 1.3989882937618514, "learning_rate": 9.711488854736457e-06, "loss": 0.6921, "step": 4418 }, { "epoch": 0.13543582199337992, "grad_norm": 1.4680018246840536, "learning_rate": 9.711322676268269e-06, "loss": 0.7615, "step": 4419 }, { "epoch": 0.13546647051612112, "grad_norm": 1.362243727903894, "learning_rate": 9.711156451378212e-06, "loss": 0.7405, "step": 4420 }, { "epoch": 0.13549711903886233, "grad_norm": 1.3839329405942395, "learning_rate": 9.710990180067926e-06, "loss": 0.7196, "step": 4421 }, { "epoch": 0.13552776756160353, "grad_norm": 1.303380991617168, "learning_rate": 9.710823862339048e-06, "loss": 0.7429, "step": 4422 }, { "epoch": 0.13555841608434474, "grad_norm": 1.2944666880997735, "learning_rate": 9.710657498193215e-06, "loss": 0.7765, "step": 4423 }, { "epoch": 0.13558906460708595, "grad_norm": 1.1753461923393111, "learning_rate": 9.71049108763207e-06, "loss": 0.7122, "step": 4424 }, { "epoch": 0.13561971312982715, "grad_norm": 1.4231414110505756, "learning_rate": 9.710324630657252e-06, "loss": 0.6798, "step": 4425 }, { "epoch": 0.13565036165256836, "grad_norm": 1.4587342167448276, "learning_rate": 9.7101581272704e-06, "loss": 0.7385, "step": 4426 }, { "epoch": 0.13568101017530956, "grad_norm": 1.3673663550027912, "learning_rate": 9.709991577473154e-06, "loss": 0.7758, "step": 4427 }, { "epoch": 0.13571165869805074, "grad_norm": 1.4563162024393526, "learning_rate": 9.709824981267155e-06, "loss": 0.8051, "step": 4428 }, { "epoch": 0.13574230722079195, "grad_norm": 1.395887714148982, "learning_rate": 9.709658338654046e-06, "loss": 0.77, "step": 4429 }, { "epoch": 0.13577295574353315, "grad_norm": 1.2607428982412088, "learning_rate": 9.70949164963547e-06, "loss": 0.7072, "step": 4430 }, { "epoch": 0.13580360426627436, "grad_norm": 1.3986491605685007, "learning_rate": 9.709324914213068e-06, "loss": 0.7363, "step": 4431 }, { "epoch": 0.13583425278901556, "grad_norm": 1.7503643818627845, "learning_rate": 9.70915813238848e-06, "loss": 0.7813, "step": 4432 }, { "epoch": 0.13586490131175677, "grad_norm": 1.1968233047438652, "learning_rate": 9.708991304163353e-06, "loss": 0.7559, "step": 4433 }, { "epoch": 0.13589554983449798, "grad_norm": 1.3915441532754687, "learning_rate": 9.708824429539332e-06, "loss": 0.6386, "step": 4434 }, { "epoch": 0.13592619835723918, "grad_norm": 1.3346526039823419, "learning_rate": 9.708657508518056e-06, "loss": 0.6996, "step": 4435 }, { "epoch": 0.1359568468799804, "grad_norm": 1.3775930968298877, "learning_rate": 9.708490541101174e-06, "loss": 0.8292, "step": 4436 }, { "epoch": 0.1359874954027216, "grad_norm": 1.5264055656251323, "learning_rate": 9.70832352729033e-06, "loss": 0.7142, "step": 4437 }, { "epoch": 0.1360181439254628, "grad_norm": 1.5232333820532467, "learning_rate": 9.70815646708717e-06, "loss": 0.7573, "step": 4438 }, { "epoch": 0.136048792448204, "grad_norm": 1.494364299355276, "learning_rate": 9.707989360493339e-06, "loss": 0.7074, "step": 4439 }, { "epoch": 0.1360794409709452, "grad_norm": 0.6377487477581057, "learning_rate": 9.707822207510486e-06, "loss": 0.4705, "step": 4440 }, { "epoch": 0.13611008949368641, "grad_norm": 1.3277834246083873, "learning_rate": 9.707655008140255e-06, "loss": 0.6981, "step": 4441 }, { "epoch": 0.13614073801642762, "grad_norm": 0.5936498286530887, "learning_rate": 9.707487762384294e-06, "loss": 0.4816, "step": 4442 }, { "epoch": 0.1361713865391688, "grad_norm": 1.2628502720494976, "learning_rate": 9.707320470244253e-06, "loss": 0.6784, "step": 4443 }, { "epoch": 0.13620203506191, "grad_norm": 0.5154946454273901, "learning_rate": 9.707153131721777e-06, "loss": 0.4783, "step": 4444 }, { "epoch": 0.1362326835846512, "grad_norm": 1.273348995548657, "learning_rate": 9.706985746818519e-06, "loss": 0.7738, "step": 4445 }, { "epoch": 0.13626333210739242, "grad_norm": 1.3490345671644746, "learning_rate": 9.706818315536127e-06, "loss": 0.6635, "step": 4446 }, { "epoch": 0.13629398063013362, "grad_norm": 0.5474612600885487, "learning_rate": 9.706650837876246e-06, "loss": 0.4911, "step": 4447 }, { "epoch": 0.13632462915287483, "grad_norm": 1.4518720738271154, "learning_rate": 9.706483313840533e-06, "loss": 0.7489, "step": 4448 }, { "epoch": 0.13635527767561603, "grad_norm": 1.1336342961465653, "learning_rate": 9.706315743430635e-06, "loss": 0.7281, "step": 4449 }, { "epoch": 0.13638592619835724, "grad_norm": 1.1542448216217076, "learning_rate": 9.706148126648203e-06, "loss": 0.627, "step": 4450 }, { "epoch": 0.13641657472109844, "grad_norm": 0.6403405486033996, "learning_rate": 9.70598046349489e-06, "loss": 0.4659, "step": 4451 }, { "epoch": 0.13644722324383965, "grad_norm": 1.402874905997162, "learning_rate": 9.705812753972348e-06, "loss": 0.6851, "step": 4452 }, { "epoch": 0.13647787176658085, "grad_norm": 1.3773501490272149, "learning_rate": 9.705644998082228e-06, "loss": 0.7999, "step": 4453 }, { "epoch": 0.13650852028932206, "grad_norm": 1.24476208469929, "learning_rate": 9.705477195826183e-06, "loss": 0.7234, "step": 4454 }, { "epoch": 0.13653916881206327, "grad_norm": 1.4341801093061335, "learning_rate": 9.705309347205869e-06, "loss": 0.772, "step": 4455 }, { "epoch": 0.13656981733480447, "grad_norm": 1.4682450612822349, "learning_rate": 9.705141452222937e-06, "loss": 0.683, "step": 4456 }, { "epoch": 0.13660046585754568, "grad_norm": 1.5067556665004873, "learning_rate": 9.704973510879044e-06, "loss": 0.7821, "step": 4457 }, { "epoch": 0.13663111438028688, "grad_norm": 0.5956128127955121, "learning_rate": 9.704805523175842e-06, "loss": 0.4649, "step": 4458 }, { "epoch": 0.13666176290302806, "grad_norm": 1.602511643203054, "learning_rate": 9.704637489114987e-06, "loss": 0.6378, "step": 4459 }, { "epoch": 0.13669241142576927, "grad_norm": 1.4507588529951247, "learning_rate": 9.704469408698136e-06, "loss": 0.7888, "step": 4460 }, { "epoch": 0.13672305994851047, "grad_norm": 1.2878579197467437, "learning_rate": 9.704301281926943e-06, "loss": 0.6005, "step": 4461 }, { "epoch": 0.13675370847125168, "grad_norm": 1.2272687359749708, "learning_rate": 9.704133108803067e-06, "loss": 0.7669, "step": 4462 }, { "epoch": 0.13678435699399288, "grad_norm": 1.216451318870123, "learning_rate": 9.703964889328164e-06, "loss": 0.6427, "step": 4463 }, { "epoch": 0.1368150055167341, "grad_norm": 1.2757361788873345, "learning_rate": 9.703796623503891e-06, "loss": 0.7464, "step": 4464 }, { "epoch": 0.1368456540394753, "grad_norm": 1.17477490267755, "learning_rate": 9.703628311331904e-06, "loss": 0.7477, "step": 4465 }, { "epoch": 0.1368763025622165, "grad_norm": 0.524488206717874, "learning_rate": 9.703459952813868e-06, "loss": 0.4665, "step": 4466 }, { "epoch": 0.1369069510849577, "grad_norm": 1.3009633686709787, "learning_rate": 9.703291547951434e-06, "loss": 0.614, "step": 4467 }, { "epoch": 0.1369375996076989, "grad_norm": 1.4411802310606154, "learning_rate": 9.703123096746267e-06, "loss": 0.7697, "step": 4468 }, { "epoch": 0.13696824813044012, "grad_norm": 1.3709721068104272, "learning_rate": 9.702954599200025e-06, "loss": 0.7392, "step": 4469 }, { "epoch": 0.13699889665318132, "grad_norm": 1.3608367529848056, "learning_rate": 9.702786055314368e-06, "loss": 0.6397, "step": 4470 }, { "epoch": 0.13702954517592253, "grad_norm": 1.3010333718968519, "learning_rate": 9.702617465090955e-06, "loss": 0.6857, "step": 4471 }, { "epoch": 0.13706019369866373, "grad_norm": 1.347239117886551, "learning_rate": 9.70244882853145e-06, "loss": 0.74, "step": 4472 }, { "epoch": 0.13709084222140494, "grad_norm": 1.4347136933084084, "learning_rate": 9.702280145637516e-06, "loss": 0.7321, "step": 4473 }, { "epoch": 0.13712149074414612, "grad_norm": 1.7998457825621832, "learning_rate": 9.702111416410809e-06, "loss": 0.7383, "step": 4474 }, { "epoch": 0.13715213926688732, "grad_norm": 1.6416942480406198, "learning_rate": 9.701942640852996e-06, "loss": 0.8084, "step": 4475 }, { "epoch": 0.13718278778962853, "grad_norm": 1.4160120059421948, "learning_rate": 9.70177381896574e-06, "loss": 0.7153, "step": 4476 }, { "epoch": 0.13721343631236974, "grad_norm": 1.5477637599310394, "learning_rate": 9.701604950750703e-06, "loss": 0.7817, "step": 4477 }, { "epoch": 0.13724408483511094, "grad_norm": 1.3639267056773676, "learning_rate": 9.701436036209549e-06, "loss": 0.7301, "step": 4478 }, { "epoch": 0.13727473335785215, "grad_norm": 1.5421288804666275, "learning_rate": 9.701267075343943e-06, "loss": 0.7344, "step": 4479 }, { "epoch": 0.13730538188059335, "grad_norm": 1.3258873263214692, "learning_rate": 9.70109806815555e-06, "loss": 0.7867, "step": 4480 }, { "epoch": 0.13733603040333456, "grad_norm": 1.3420826575656248, "learning_rate": 9.700929014646035e-06, "loss": 0.6394, "step": 4481 }, { "epoch": 0.13736667892607576, "grad_norm": 1.3918654617880137, "learning_rate": 9.700759914817064e-06, "loss": 0.6658, "step": 4482 }, { "epoch": 0.13739732744881697, "grad_norm": 0.5691857835960231, "learning_rate": 9.700590768670302e-06, "loss": 0.4649, "step": 4483 }, { "epoch": 0.13742797597155818, "grad_norm": 1.2222940960077238, "learning_rate": 9.700421576207417e-06, "loss": 0.6104, "step": 4484 }, { "epoch": 0.13745862449429938, "grad_norm": 1.4114752803953514, "learning_rate": 9.700252337430075e-06, "loss": 0.7709, "step": 4485 }, { "epoch": 0.1374892730170406, "grad_norm": 0.5091345842192702, "learning_rate": 9.700083052339944e-06, "loss": 0.4558, "step": 4486 }, { "epoch": 0.1375199215397818, "grad_norm": 1.3006323820314816, "learning_rate": 9.699913720938694e-06, "loss": 0.6909, "step": 4487 }, { "epoch": 0.137550570062523, "grad_norm": 1.3341346844969038, "learning_rate": 9.69974434322799e-06, "loss": 0.6419, "step": 4488 }, { "epoch": 0.1375812185852642, "grad_norm": 1.2559997154232825, "learning_rate": 9.699574919209502e-06, "loss": 0.694, "step": 4489 }, { "epoch": 0.13761186710800538, "grad_norm": 1.4704686393123014, "learning_rate": 9.6994054488849e-06, "loss": 0.788, "step": 4490 }, { "epoch": 0.1376425156307466, "grad_norm": 1.458924911988881, "learning_rate": 9.699235932255855e-06, "loss": 0.713, "step": 4491 }, { "epoch": 0.1376731641534878, "grad_norm": 1.5373552671129334, "learning_rate": 9.699066369324034e-06, "loss": 0.6057, "step": 4492 }, { "epoch": 0.137703812676229, "grad_norm": 1.4168114983277347, "learning_rate": 9.698896760091112e-06, "loss": 0.7527, "step": 4493 }, { "epoch": 0.1377344611989702, "grad_norm": 1.3938709610982942, "learning_rate": 9.698727104558756e-06, "loss": 0.757, "step": 4494 }, { "epoch": 0.1377651097217114, "grad_norm": 1.2842396279598904, "learning_rate": 9.698557402728642e-06, "loss": 0.7684, "step": 4495 }, { "epoch": 0.13779575824445262, "grad_norm": 1.5194902535711405, "learning_rate": 9.698387654602437e-06, "loss": 0.7574, "step": 4496 }, { "epoch": 0.13782640676719382, "grad_norm": 0.6091822660689695, "learning_rate": 9.698217860181817e-06, "loss": 0.4864, "step": 4497 }, { "epoch": 0.13785705528993503, "grad_norm": 1.354473854334098, "learning_rate": 9.698048019468455e-06, "loss": 0.8102, "step": 4498 }, { "epoch": 0.13788770381267623, "grad_norm": 1.3085392548641526, "learning_rate": 9.697878132464024e-06, "loss": 0.7097, "step": 4499 }, { "epoch": 0.13791835233541744, "grad_norm": 1.3414616278986429, "learning_rate": 9.697708199170198e-06, "loss": 0.763, "step": 4500 }, { "epoch": 0.13794900085815864, "grad_norm": 1.187423256168554, "learning_rate": 9.697538219588652e-06, "loss": 0.8039, "step": 4501 }, { "epoch": 0.13797964938089985, "grad_norm": 1.3318672176100295, "learning_rate": 9.697368193721057e-06, "loss": 0.7116, "step": 4502 }, { "epoch": 0.13801029790364105, "grad_norm": 0.47851280062701856, "learning_rate": 9.697198121569093e-06, "loss": 0.4574, "step": 4503 }, { "epoch": 0.13804094642638226, "grad_norm": 1.3185182508781594, "learning_rate": 9.697028003134434e-06, "loss": 0.7918, "step": 4504 }, { "epoch": 0.13807159494912344, "grad_norm": 1.4713245366624967, "learning_rate": 9.696857838418755e-06, "loss": 0.6649, "step": 4505 }, { "epoch": 0.13810224347186464, "grad_norm": 0.5063227196409156, "learning_rate": 9.696687627423738e-06, "loss": 0.4894, "step": 4506 }, { "epoch": 0.13813289199460585, "grad_norm": 1.5191356760630972, "learning_rate": 9.696517370151053e-06, "loss": 0.7323, "step": 4507 }, { "epoch": 0.13816354051734706, "grad_norm": 1.4566201325432633, "learning_rate": 9.696347066602381e-06, "loss": 0.7724, "step": 4508 }, { "epoch": 0.13819418904008826, "grad_norm": 1.2619628116201886, "learning_rate": 9.6961767167794e-06, "loss": 0.7045, "step": 4509 }, { "epoch": 0.13822483756282947, "grad_norm": 1.3536458574874812, "learning_rate": 9.696006320683787e-06, "loss": 0.779, "step": 4510 }, { "epoch": 0.13825548608557067, "grad_norm": 1.348574786529335, "learning_rate": 9.695835878317223e-06, "loss": 0.7378, "step": 4511 }, { "epoch": 0.13828613460831188, "grad_norm": 1.3224954950466639, "learning_rate": 9.695665389681389e-06, "loss": 0.7067, "step": 4512 }, { "epoch": 0.13831678313105308, "grad_norm": 1.4848151644436514, "learning_rate": 9.69549485477796e-06, "loss": 0.6491, "step": 4513 }, { "epoch": 0.1383474316537943, "grad_norm": 1.3279707224135402, "learning_rate": 9.695324273608619e-06, "loss": 0.7031, "step": 4514 }, { "epoch": 0.1383780801765355, "grad_norm": 1.3050821923551768, "learning_rate": 9.695153646175047e-06, "loss": 0.7425, "step": 4515 }, { "epoch": 0.1384087286992767, "grad_norm": 0.5484564563760052, "learning_rate": 9.694982972478923e-06, "loss": 0.4939, "step": 4516 }, { "epoch": 0.1384393772220179, "grad_norm": 1.2746544917892304, "learning_rate": 9.694812252521933e-06, "loss": 0.703, "step": 4517 }, { "epoch": 0.1384700257447591, "grad_norm": 1.3982229231779943, "learning_rate": 9.694641486305756e-06, "loss": 0.7498, "step": 4518 }, { "epoch": 0.13850067426750032, "grad_norm": 1.3881820286794933, "learning_rate": 9.694470673832075e-06, "loss": 0.8028, "step": 4519 }, { "epoch": 0.13853132279024152, "grad_norm": 1.643232286333075, "learning_rate": 9.694299815102572e-06, "loss": 0.6762, "step": 4520 }, { "epoch": 0.1385619713129827, "grad_norm": 1.4739693838227814, "learning_rate": 9.694128910118934e-06, "loss": 0.7269, "step": 4521 }, { "epoch": 0.1385926198357239, "grad_norm": 1.3179581000200855, "learning_rate": 9.693957958882843e-06, "loss": 0.7159, "step": 4522 }, { "epoch": 0.1386232683584651, "grad_norm": 0.5214474955344754, "learning_rate": 9.693786961395982e-06, "loss": 0.4705, "step": 4523 }, { "epoch": 0.13865391688120632, "grad_norm": 1.4880923854352561, "learning_rate": 9.693615917660036e-06, "loss": 0.7009, "step": 4524 }, { "epoch": 0.13868456540394752, "grad_norm": 1.2562752406047342, "learning_rate": 9.693444827676694e-06, "loss": 0.6795, "step": 4525 }, { "epoch": 0.13871521392668873, "grad_norm": 1.4413010718762354, "learning_rate": 9.693273691447637e-06, "loss": 0.7755, "step": 4526 }, { "epoch": 0.13874586244942994, "grad_norm": 0.4793144794409411, "learning_rate": 9.693102508974555e-06, "loss": 0.4386, "step": 4527 }, { "epoch": 0.13877651097217114, "grad_norm": 0.4717133637773104, "learning_rate": 9.692931280259133e-06, "loss": 0.4767, "step": 4528 }, { "epoch": 0.13880715949491235, "grad_norm": 1.3379187968307933, "learning_rate": 9.692760005303057e-06, "loss": 0.6931, "step": 4529 }, { "epoch": 0.13883780801765355, "grad_norm": 1.4814683143469254, "learning_rate": 9.692588684108018e-06, "loss": 0.8507, "step": 4530 }, { "epoch": 0.13886845654039476, "grad_norm": 0.459562466627474, "learning_rate": 9.6924173166757e-06, "loss": 0.4478, "step": 4531 }, { "epoch": 0.13889910506313596, "grad_norm": 0.5081664787761879, "learning_rate": 9.692245903007795e-06, "loss": 0.4862, "step": 4532 }, { "epoch": 0.13892975358587717, "grad_norm": 1.455994369053081, "learning_rate": 9.69207444310599e-06, "loss": 0.6839, "step": 4533 }, { "epoch": 0.13896040210861837, "grad_norm": 1.6241933990200608, "learning_rate": 9.691902936971975e-06, "loss": 0.7611, "step": 4534 }, { "epoch": 0.13899105063135958, "grad_norm": 1.200948396330862, "learning_rate": 9.691731384607441e-06, "loss": 0.6505, "step": 4535 }, { "epoch": 0.13902169915410076, "grad_norm": 1.229117541905925, "learning_rate": 9.691559786014076e-06, "loss": 0.7282, "step": 4536 }, { "epoch": 0.13905234767684196, "grad_norm": 1.5360128633238237, "learning_rate": 9.691388141193571e-06, "loss": 0.7275, "step": 4537 }, { "epoch": 0.13908299619958317, "grad_norm": 1.3316934737138542, "learning_rate": 9.691216450147622e-06, "loss": 0.7353, "step": 4538 }, { "epoch": 0.13911364472232438, "grad_norm": 0.6069197497951604, "learning_rate": 9.691044712877914e-06, "loss": 0.4774, "step": 4539 }, { "epoch": 0.13914429324506558, "grad_norm": 1.4347947444554545, "learning_rate": 9.690872929386143e-06, "loss": 0.7512, "step": 4540 }, { "epoch": 0.1391749417678068, "grad_norm": 1.2038879204611699, "learning_rate": 9.690701099674e-06, "loss": 0.7576, "step": 4541 }, { "epoch": 0.139205590290548, "grad_norm": 1.3436678830127509, "learning_rate": 9.69052922374318e-06, "loss": 0.6452, "step": 4542 }, { "epoch": 0.1392362388132892, "grad_norm": 0.4777641640539089, "learning_rate": 9.690357301595375e-06, "loss": 0.4872, "step": 4543 }, { "epoch": 0.1392668873360304, "grad_norm": 1.22165295959329, "learning_rate": 9.690185333232278e-06, "loss": 0.7989, "step": 4544 }, { "epoch": 0.1392975358587716, "grad_norm": 1.4617923922509968, "learning_rate": 9.690013318655588e-06, "loss": 0.7673, "step": 4545 }, { "epoch": 0.13932818438151282, "grad_norm": 1.2897789916936924, "learning_rate": 9.689841257866994e-06, "loss": 0.7698, "step": 4546 }, { "epoch": 0.13935883290425402, "grad_norm": 0.5116275761216451, "learning_rate": 9.689669150868196e-06, "loss": 0.442, "step": 4547 }, { "epoch": 0.13938948142699523, "grad_norm": 1.5382994075001875, "learning_rate": 9.689496997660887e-06, "loss": 0.7042, "step": 4548 }, { "epoch": 0.13942012994973643, "grad_norm": 1.8026730082512763, "learning_rate": 9.689324798246765e-06, "loss": 0.7681, "step": 4549 }, { "epoch": 0.13945077847247764, "grad_norm": 1.2733485366557398, "learning_rate": 9.689152552627526e-06, "loss": 0.7187, "step": 4550 }, { "epoch": 0.13948142699521884, "grad_norm": 1.2730437414378404, "learning_rate": 9.688980260804865e-06, "loss": 0.6475, "step": 4551 }, { "epoch": 0.13951207551796002, "grad_norm": 0.4945062206300353, "learning_rate": 9.688807922780483e-06, "loss": 0.4563, "step": 4552 }, { "epoch": 0.13954272404070123, "grad_norm": 1.2376820450270285, "learning_rate": 9.688635538556079e-06, "loss": 0.5962, "step": 4553 }, { "epoch": 0.13957337256344243, "grad_norm": 1.3785991249271596, "learning_rate": 9.688463108133345e-06, "loss": 0.7598, "step": 4554 }, { "epoch": 0.13960402108618364, "grad_norm": 1.3418316679683164, "learning_rate": 9.688290631513989e-06, "loss": 0.7413, "step": 4555 }, { "epoch": 0.13963466960892484, "grad_norm": 0.47050895371257945, "learning_rate": 9.688118108699703e-06, "loss": 0.4887, "step": 4556 }, { "epoch": 0.13966531813166605, "grad_norm": 1.3033954098493508, "learning_rate": 9.687945539692191e-06, "loss": 0.6556, "step": 4557 }, { "epoch": 0.13969596665440726, "grad_norm": 0.4908491437441995, "learning_rate": 9.68777292449315e-06, "loss": 0.4845, "step": 4558 }, { "epoch": 0.13972661517714846, "grad_norm": 0.4894276950507557, "learning_rate": 9.687600263104287e-06, "loss": 0.4704, "step": 4559 }, { "epoch": 0.13975726369988967, "grad_norm": 1.4869127091202425, "learning_rate": 9.687427555527296e-06, "loss": 0.7473, "step": 4560 }, { "epoch": 0.13978791222263087, "grad_norm": 1.3613508643858536, "learning_rate": 9.687254801763883e-06, "loss": 0.6917, "step": 4561 }, { "epoch": 0.13981856074537208, "grad_norm": 1.3584771458553893, "learning_rate": 9.687082001815749e-06, "loss": 0.725, "step": 4562 }, { "epoch": 0.13984920926811328, "grad_norm": 1.1716381774076983, "learning_rate": 9.686909155684596e-06, "loss": 0.7498, "step": 4563 }, { "epoch": 0.1398798577908545, "grad_norm": 1.4573301609553728, "learning_rate": 9.68673626337213e-06, "loss": 0.7199, "step": 4564 }, { "epoch": 0.1399105063135957, "grad_norm": 0.5426334861002683, "learning_rate": 9.68656332488005e-06, "loss": 0.4792, "step": 4565 }, { "epoch": 0.1399411548363369, "grad_norm": 1.366826266930836, "learning_rate": 9.686390340210064e-06, "loss": 0.757, "step": 4566 }, { "epoch": 0.13997180335907808, "grad_norm": 1.4452394165217126, "learning_rate": 9.686217309363875e-06, "loss": 0.7497, "step": 4567 }, { "epoch": 0.14000245188181928, "grad_norm": 1.2921341702503688, "learning_rate": 9.686044232343186e-06, "loss": 0.751, "step": 4568 }, { "epoch": 0.1400331004045605, "grad_norm": 1.3394146992598819, "learning_rate": 9.685871109149706e-06, "loss": 0.7758, "step": 4569 }, { "epoch": 0.1400637489273017, "grad_norm": 1.3161989670912204, "learning_rate": 9.68569793978514e-06, "loss": 0.6655, "step": 4570 }, { "epoch": 0.1400943974500429, "grad_norm": 1.4806498295839208, "learning_rate": 9.68552472425119e-06, "loss": 0.7224, "step": 4571 }, { "epoch": 0.1401250459727841, "grad_norm": 1.5465987366273641, "learning_rate": 9.685351462549568e-06, "loss": 0.8415, "step": 4572 }, { "epoch": 0.1401556944955253, "grad_norm": 1.3007958137957507, "learning_rate": 9.68517815468198e-06, "loss": 0.6966, "step": 4573 }, { "epoch": 0.14018634301826652, "grad_norm": 0.5048168347788807, "learning_rate": 9.68500480065013e-06, "loss": 0.4771, "step": 4574 }, { "epoch": 0.14021699154100772, "grad_norm": 1.4880666699535714, "learning_rate": 9.684831400455731e-06, "loss": 0.6331, "step": 4575 }, { "epoch": 0.14024764006374893, "grad_norm": 1.4967984846719646, "learning_rate": 9.684657954100492e-06, "loss": 0.7524, "step": 4576 }, { "epoch": 0.14027828858649014, "grad_norm": 1.3362861933164756, "learning_rate": 9.684484461586117e-06, "loss": 0.5756, "step": 4577 }, { "epoch": 0.14030893710923134, "grad_norm": 1.4543817372817762, "learning_rate": 9.684310922914318e-06, "loss": 0.6096, "step": 4578 }, { "epoch": 0.14033958563197255, "grad_norm": 1.2183166782142338, "learning_rate": 9.684137338086805e-06, "loss": 0.7353, "step": 4579 }, { "epoch": 0.14037023415471375, "grad_norm": 1.410566922274519, "learning_rate": 9.683963707105288e-06, "loss": 0.7327, "step": 4580 }, { "epoch": 0.14040088267745496, "grad_norm": 1.2980667254563942, "learning_rate": 9.683790029971478e-06, "loss": 0.6443, "step": 4581 }, { "epoch": 0.14043153120019616, "grad_norm": 1.389203175563548, "learning_rate": 9.683616306687086e-06, "loss": 0.741, "step": 4582 }, { "epoch": 0.14046217972293734, "grad_norm": 0.47712428229420006, "learning_rate": 9.683442537253826e-06, "loss": 0.4841, "step": 4583 }, { "epoch": 0.14049282824567855, "grad_norm": 1.4227636070581533, "learning_rate": 9.683268721673408e-06, "loss": 0.7315, "step": 4584 }, { "epoch": 0.14052347676841975, "grad_norm": 0.49851364570582213, "learning_rate": 9.683094859947544e-06, "loss": 0.4707, "step": 4585 }, { "epoch": 0.14055412529116096, "grad_norm": 1.2895601049666585, "learning_rate": 9.68292095207795e-06, "loss": 0.7451, "step": 4586 }, { "epoch": 0.14058477381390216, "grad_norm": 1.317808578408166, "learning_rate": 9.682746998066335e-06, "loss": 0.7951, "step": 4587 }, { "epoch": 0.14061542233664337, "grad_norm": 0.4845229405113702, "learning_rate": 9.682572997914417e-06, "loss": 0.4645, "step": 4588 }, { "epoch": 0.14064607085938458, "grad_norm": 1.6317160368987393, "learning_rate": 9.68239895162391e-06, "loss": 0.8115, "step": 4589 }, { "epoch": 0.14067671938212578, "grad_norm": 1.3807545505423215, "learning_rate": 9.682224859196528e-06, "loss": 0.6584, "step": 4590 }, { "epoch": 0.140707367904867, "grad_norm": 1.5432684484590082, "learning_rate": 9.682050720633985e-06, "loss": 0.6572, "step": 4591 }, { "epoch": 0.1407380164276082, "grad_norm": 1.980813321795604, "learning_rate": 9.681876535937999e-06, "loss": 0.7394, "step": 4592 }, { "epoch": 0.1407686649503494, "grad_norm": 1.3971145274205672, "learning_rate": 9.681702305110285e-06, "loss": 0.671, "step": 4593 }, { "epoch": 0.1407993134730906, "grad_norm": 1.4196571457132647, "learning_rate": 9.681528028152562e-06, "loss": 0.7488, "step": 4594 }, { "epoch": 0.1408299619958318, "grad_norm": 1.323662847632944, "learning_rate": 9.681353705066544e-06, "loss": 0.7567, "step": 4595 }, { "epoch": 0.14086061051857302, "grad_norm": 1.4716830129769216, "learning_rate": 9.681179335853951e-06, "loss": 0.6581, "step": 4596 }, { "epoch": 0.14089125904131422, "grad_norm": 0.5190535564786416, "learning_rate": 9.6810049205165e-06, "loss": 0.4945, "step": 4597 }, { "epoch": 0.14092190756405543, "grad_norm": 1.2319902435893844, "learning_rate": 9.68083045905591e-06, "loss": 0.7792, "step": 4598 }, { "epoch": 0.1409525560867966, "grad_norm": 1.357921220861819, "learning_rate": 9.6806559514739e-06, "loss": 0.6334, "step": 4599 }, { "epoch": 0.1409832046095378, "grad_norm": 0.4917128458457806, "learning_rate": 9.680481397772187e-06, "loss": 0.4492, "step": 4600 }, { "epoch": 0.14101385313227902, "grad_norm": 1.3379511301999312, "learning_rate": 9.680306797952496e-06, "loss": 0.7811, "step": 4601 }, { "epoch": 0.14104450165502022, "grad_norm": 1.2915684968427932, "learning_rate": 9.680132152016544e-06, "loss": 0.7858, "step": 4602 }, { "epoch": 0.14107515017776143, "grad_norm": 1.2336410502952648, "learning_rate": 9.679957459966053e-06, "loss": 0.675, "step": 4603 }, { "epoch": 0.14110579870050263, "grad_norm": 1.4932585310207522, "learning_rate": 9.679782721802742e-06, "loss": 0.7268, "step": 4604 }, { "epoch": 0.14113644722324384, "grad_norm": 1.4567858270749021, "learning_rate": 9.679607937528335e-06, "loss": 0.6735, "step": 4605 }, { "epoch": 0.14116709574598504, "grad_norm": 1.2463011348038977, "learning_rate": 9.679433107144555e-06, "loss": 0.6759, "step": 4606 }, { "epoch": 0.14119774426872625, "grad_norm": 1.4044561951800452, "learning_rate": 9.679258230653122e-06, "loss": 0.7791, "step": 4607 }, { "epoch": 0.14122839279146746, "grad_norm": 1.5455409615660414, "learning_rate": 9.679083308055761e-06, "loss": 0.7275, "step": 4608 }, { "epoch": 0.14125904131420866, "grad_norm": 1.5468083825327863, "learning_rate": 9.678908339354194e-06, "loss": 0.6814, "step": 4609 }, { "epoch": 0.14128968983694987, "grad_norm": 1.32316312990288, "learning_rate": 9.678733324550147e-06, "loss": 0.6962, "step": 4610 }, { "epoch": 0.14132033835969107, "grad_norm": 0.6316495514994983, "learning_rate": 9.678558263645343e-06, "loss": 0.4702, "step": 4611 }, { "epoch": 0.14135098688243228, "grad_norm": 1.4431268914695037, "learning_rate": 9.678383156641507e-06, "loss": 0.7805, "step": 4612 }, { "epoch": 0.14138163540517348, "grad_norm": 1.39805803864545, "learning_rate": 9.678208003540366e-06, "loss": 0.6899, "step": 4613 }, { "epoch": 0.14141228392791466, "grad_norm": 0.47986619107354844, "learning_rate": 9.678032804343644e-06, "loss": 0.4546, "step": 4614 }, { "epoch": 0.14144293245065587, "grad_norm": 1.1983866022993186, "learning_rate": 9.677857559053068e-06, "loss": 0.6813, "step": 4615 }, { "epoch": 0.14147358097339707, "grad_norm": 1.3386738732682453, "learning_rate": 9.677682267670365e-06, "loss": 0.7402, "step": 4616 }, { "epoch": 0.14150422949613828, "grad_norm": 1.296504054437384, "learning_rate": 9.677506930197261e-06, "loss": 0.6864, "step": 4617 }, { "epoch": 0.14153487801887948, "grad_norm": 0.5556421823599142, "learning_rate": 9.677331546635483e-06, "loss": 0.468, "step": 4618 }, { "epoch": 0.1415655265416207, "grad_norm": 1.637895646316487, "learning_rate": 9.677156116986764e-06, "loss": 0.679, "step": 4619 }, { "epoch": 0.1415961750643619, "grad_norm": 1.3786953518368337, "learning_rate": 9.676980641252826e-06, "loss": 0.6992, "step": 4620 }, { "epoch": 0.1416268235871031, "grad_norm": 1.3779397906958322, "learning_rate": 9.676805119435402e-06, "loss": 0.6276, "step": 4621 }, { "epoch": 0.1416574721098443, "grad_norm": 0.5319446688454328, "learning_rate": 9.676629551536221e-06, "loss": 0.46, "step": 4622 }, { "epoch": 0.1416881206325855, "grad_norm": 1.5224487704888605, "learning_rate": 9.676453937557013e-06, "loss": 0.7134, "step": 4623 }, { "epoch": 0.14171876915532672, "grad_norm": 1.4883306461317185, "learning_rate": 9.676278277499507e-06, "loss": 0.6862, "step": 4624 }, { "epoch": 0.14174941767806792, "grad_norm": 1.3579891719008346, "learning_rate": 9.676102571365433e-06, "loss": 0.7609, "step": 4625 }, { "epoch": 0.14178006620080913, "grad_norm": 0.4902363212728991, "learning_rate": 9.675926819156527e-06, "loss": 0.4752, "step": 4626 }, { "epoch": 0.14181071472355034, "grad_norm": 1.3112214729234815, "learning_rate": 9.675751020874516e-06, "loss": 0.6618, "step": 4627 }, { "epoch": 0.14184136324629154, "grad_norm": 1.519875970594237, "learning_rate": 9.675575176521134e-06, "loss": 0.742, "step": 4628 }, { "epoch": 0.14187201176903275, "grad_norm": 1.4223792683652707, "learning_rate": 9.675399286098113e-06, "loss": 0.7272, "step": 4629 }, { "epoch": 0.14190266029177392, "grad_norm": 1.662394041953807, "learning_rate": 9.675223349607187e-06, "loss": 0.7435, "step": 4630 }, { "epoch": 0.14193330881451513, "grad_norm": 2.921382803744123, "learning_rate": 9.67504736705009e-06, "loss": 0.8598, "step": 4631 }, { "epoch": 0.14196395733725634, "grad_norm": 1.3907882091870287, "learning_rate": 9.674871338428555e-06, "loss": 0.717, "step": 4632 }, { "epoch": 0.14199460585999754, "grad_norm": 1.2478380856518423, "learning_rate": 9.674695263744315e-06, "loss": 0.7032, "step": 4633 }, { "epoch": 0.14202525438273875, "grad_norm": 1.468687508826725, "learning_rate": 9.674519142999108e-06, "loss": 0.6137, "step": 4634 }, { "epoch": 0.14205590290547995, "grad_norm": 1.2860174519906584, "learning_rate": 9.674342976194667e-06, "loss": 0.7043, "step": 4635 }, { "epoch": 0.14208655142822116, "grad_norm": 1.3250922929775215, "learning_rate": 9.67416676333273e-06, "loss": 0.7035, "step": 4636 }, { "epoch": 0.14211719995096236, "grad_norm": 1.4824863529052872, "learning_rate": 9.67399050441503e-06, "loss": 0.6605, "step": 4637 }, { "epoch": 0.14214784847370357, "grad_norm": 1.15727854481459, "learning_rate": 9.673814199443308e-06, "loss": 0.6719, "step": 4638 }, { "epoch": 0.14217849699644478, "grad_norm": 1.702246512976738, "learning_rate": 9.673637848419297e-06, "loss": 0.9018, "step": 4639 }, { "epoch": 0.14220914551918598, "grad_norm": 1.4601635790831409, "learning_rate": 9.673461451344736e-06, "loss": 0.7187, "step": 4640 }, { "epoch": 0.1422397940419272, "grad_norm": 0.7286500126438876, "learning_rate": 9.673285008221364e-06, "loss": 0.4524, "step": 4641 }, { "epoch": 0.1422704425646684, "grad_norm": 1.48203067599472, "learning_rate": 9.673108519050922e-06, "loss": 0.6797, "step": 4642 }, { "epoch": 0.1423010910874096, "grad_norm": 1.4122062678084084, "learning_rate": 9.672931983835143e-06, "loss": 0.6797, "step": 4643 }, { "epoch": 0.1423317396101508, "grad_norm": 1.289324825773085, "learning_rate": 9.672755402575771e-06, "loss": 0.6292, "step": 4644 }, { "epoch": 0.14236238813289198, "grad_norm": 1.4380993321629376, "learning_rate": 9.672578775274543e-06, "loss": 0.7534, "step": 4645 }, { "epoch": 0.1423930366556332, "grad_norm": 1.4772620958846612, "learning_rate": 9.672402101933201e-06, "loss": 0.7881, "step": 4646 }, { "epoch": 0.1424236851783744, "grad_norm": 1.5381733372175064, "learning_rate": 9.672225382553486e-06, "loss": 0.6966, "step": 4647 }, { "epoch": 0.1424543337011156, "grad_norm": 1.254449391010241, "learning_rate": 9.67204861713714e-06, "loss": 0.737, "step": 4648 }, { "epoch": 0.1424849822238568, "grad_norm": 1.4652405039560636, "learning_rate": 9.671871805685902e-06, "loss": 0.735, "step": 4649 }, { "epoch": 0.142515630746598, "grad_norm": 1.4538597376580695, "learning_rate": 9.671694948201517e-06, "loss": 0.6765, "step": 4650 }, { "epoch": 0.14254627926933922, "grad_norm": 1.2427270815106684, "learning_rate": 9.671518044685726e-06, "loss": 0.7546, "step": 4651 }, { "epoch": 0.14257692779208042, "grad_norm": 1.3335605071222858, "learning_rate": 9.671341095140273e-06, "loss": 0.7022, "step": 4652 }, { "epoch": 0.14260757631482163, "grad_norm": 1.4970765018952032, "learning_rate": 9.671164099566898e-06, "loss": 0.8086, "step": 4653 }, { "epoch": 0.14263822483756283, "grad_norm": 1.3761792510187258, "learning_rate": 9.67098705796735e-06, "loss": 0.7198, "step": 4654 }, { "epoch": 0.14266887336030404, "grad_norm": 1.5498581630197288, "learning_rate": 9.670809970343372e-06, "loss": 0.7406, "step": 4655 }, { "epoch": 0.14269952188304524, "grad_norm": 1.3992294842847632, "learning_rate": 9.670632836696707e-06, "loss": 0.7627, "step": 4656 }, { "epoch": 0.14273017040578645, "grad_norm": 1.4881718794959011, "learning_rate": 9.670455657029104e-06, "loss": 0.7764, "step": 4657 }, { "epoch": 0.14276081892852766, "grad_norm": 1.312397869546661, "learning_rate": 9.670278431342304e-06, "loss": 0.6542, "step": 4658 }, { "epoch": 0.14279146745126886, "grad_norm": 1.1689495650980988, "learning_rate": 9.670101159638057e-06, "loss": 0.8631, "step": 4659 }, { "epoch": 0.14282211597401007, "grad_norm": 1.326604976672093, "learning_rate": 9.669923841918107e-06, "loss": 0.6291, "step": 4660 }, { "epoch": 0.14285276449675124, "grad_norm": 1.4223683891862708, "learning_rate": 9.669746478184204e-06, "loss": 0.7745, "step": 4661 }, { "epoch": 0.14288341301949245, "grad_norm": 1.4524956230413766, "learning_rate": 9.669569068438094e-06, "loss": 0.8068, "step": 4662 }, { "epoch": 0.14291406154223366, "grad_norm": 1.158572705027321, "learning_rate": 9.669391612681524e-06, "loss": 0.6305, "step": 4663 }, { "epoch": 0.14294471006497486, "grad_norm": 1.2683474222590603, "learning_rate": 9.669214110916246e-06, "loss": 0.64, "step": 4664 }, { "epoch": 0.14297535858771607, "grad_norm": 1.3877519691954743, "learning_rate": 9.669036563144004e-06, "loss": 0.7591, "step": 4665 }, { "epoch": 0.14300600711045727, "grad_norm": 1.4631477182975396, "learning_rate": 9.668858969366551e-06, "loss": 0.8208, "step": 4666 }, { "epoch": 0.14303665563319848, "grad_norm": 1.400905925871757, "learning_rate": 9.668681329585637e-06, "loss": 0.8335, "step": 4667 }, { "epoch": 0.14306730415593968, "grad_norm": 1.3677527106643999, "learning_rate": 9.668503643803011e-06, "loss": 0.7114, "step": 4668 }, { "epoch": 0.1430979526786809, "grad_norm": 1.277632504143114, "learning_rate": 9.668325912020424e-06, "loss": 0.6158, "step": 4669 }, { "epoch": 0.1431286012014221, "grad_norm": 1.1833642637602229, "learning_rate": 9.668148134239626e-06, "loss": 0.724, "step": 4670 }, { "epoch": 0.1431592497241633, "grad_norm": 1.206426658053301, "learning_rate": 9.66797031046237e-06, "loss": 0.5674, "step": 4671 }, { "epoch": 0.1431898982469045, "grad_norm": 1.3621565874934505, "learning_rate": 9.667792440690411e-06, "loss": 0.659, "step": 4672 }, { "epoch": 0.1432205467696457, "grad_norm": 0.8132249454344355, "learning_rate": 9.667614524925496e-06, "loss": 0.4717, "step": 4673 }, { "epoch": 0.14325119529238692, "grad_norm": 1.3293317496711756, "learning_rate": 9.667436563169383e-06, "loss": 0.7441, "step": 4674 }, { "epoch": 0.14328184381512812, "grad_norm": 1.404494036736706, "learning_rate": 9.667258555423822e-06, "loss": 0.6932, "step": 4675 }, { "epoch": 0.1433124923378693, "grad_norm": 1.3164438556821358, "learning_rate": 9.667080501690569e-06, "loss": 0.7438, "step": 4676 }, { "epoch": 0.1433431408606105, "grad_norm": 1.5563202037270139, "learning_rate": 9.666902401971377e-06, "loss": 0.7247, "step": 4677 }, { "epoch": 0.1433737893833517, "grad_norm": 1.3266472750500335, "learning_rate": 9.666724256268001e-06, "loss": 0.8278, "step": 4678 }, { "epoch": 0.14340443790609292, "grad_norm": 0.5268311137923828, "learning_rate": 9.666546064582199e-06, "loss": 0.4601, "step": 4679 }, { "epoch": 0.14343508642883412, "grad_norm": 1.5284667478523084, "learning_rate": 9.666367826915723e-06, "loss": 0.6647, "step": 4680 }, { "epoch": 0.14346573495157533, "grad_norm": 1.3964774722490352, "learning_rate": 9.66618954327033e-06, "loss": 0.7594, "step": 4681 }, { "epoch": 0.14349638347431654, "grad_norm": 1.2815456115294706, "learning_rate": 9.66601121364778e-06, "loss": 0.6569, "step": 4682 }, { "epoch": 0.14352703199705774, "grad_norm": 1.3156737661963078, "learning_rate": 9.665832838049826e-06, "loss": 0.7049, "step": 4683 }, { "epoch": 0.14355768051979895, "grad_norm": 1.507239059976966, "learning_rate": 9.665654416478227e-06, "loss": 0.7309, "step": 4684 }, { "epoch": 0.14358832904254015, "grad_norm": 1.3842661793293223, "learning_rate": 9.665475948934742e-06, "loss": 0.7608, "step": 4685 }, { "epoch": 0.14361897756528136, "grad_norm": 1.5832406919781459, "learning_rate": 9.665297435421128e-06, "loss": 0.7769, "step": 4686 }, { "epoch": 0.14364962608802256, "grad_norm": 1.3503569763954386, "learning_rate": 9.665118875939145e-06, "loss": 0.7631, "step": 4687 }, { "epoch": 0.14368027461076377, "grad_norm": 1.5324223648619315, "learning_rate": 9.664940270490553e-06, "loss": 0.8144, "step": 4688 }, { "epoch": 0.14371092313350498, "grad_norm": 1.3882798590618193, "learning_rate": 9.66476161907711e-06, "loss": 0.7739, "step": 4689 }, { "epoch": 0.14374157165624618, "grad_norm": 1.2331082324974667, "learning_rate": 9.664582921700578e-06, "loss": 0.6271, "step": 4690 }, { "epoch": 0.1437722201789874, "grad_norm": 1.2634464904259135, "learning_rate": 9.664404178362715e-06, "loss": 0.7553, "step": 4691 }, { "epoch": 0.14380286870172856, "grad_norm": 1.439524504431415, "learning_rate": 9.664225389065287e-06, "loss": 0.7296, "step": 4692 }, { "epoch": 0.14383351722446977, "grad_norm": 1.3534134453738353, "learning_rate": 9.664046553810051e-06, "loss": 0.7001, "step": 4693 }, { "epoch": 0.14386416574721098, "grad_norm": 1.3596132475455371, "learning_rate": 9.663867672598772e-06, "loss": 0.7032, "step": 4694 }, { "epoch": 0.14389481426995218, "grad_norm": 1.4484304408701412, "learning_rate": 9.663688745433211e-06, "loss": 0.863, "step": 4695 }, { "epoch": 0.1439254627926934, "grad_norm": 0.6871241968727141, "learning_rate": 9.663509772315132e-06, "loss": 0.4626, "step": 4696 }, { "epoch": 0.1439561113154346, "grad_norm": 1.5467054708029464, "learning_rate": 9.663330753246298e-06, "loss": 0.771, "step": 4697 }, { "epoch": 0.1439867598381758, "grad_norm": 1.5529005443164636, "learning_rate": 9.663151688228473e-06, "loss": 0.7445, "step": 4698 }, { "epoch": 0.144017408360917, "grad_norm": 0.538177051644504, "learning_rate": 9.66297257726342e-06, "loss": 0.478, "step": 4699 }, { "epoch": 0.1440480568836582, "grad_norm": 1.1597531952198687, "learning_rate": 9.662793420352906e-06, "loss": 0.6979, "step": 4700 }, { "epoch": 0.14407870540639942, "grad_norm": 1.193924318510301, "learning_rate": 9.662614217498696e-06, "loss": 0.7397, "step": 4701 }, { "epoch": 0.14410935392914062, "grad_norm": 0.5153891700648473, "learning_rate": 9.662434968702554e-06, "loss": 0.4729, "step": 4702 }, { "epoch": 0.14414000245188183, "grad_norm": 1.2814106837505548, "learning_rate": 9.662255673966248e-06, "loss": 0.809, "step": 4703 }, { "epoch": 0.14417065097462303, "grad_norm": 1.3765836637024165, "learning_rate": 9.662076333291543e-06, "loss": 0.7435, "step": 4704 }, { "epoch": 0.14420129949736424, "grad_norm": 1.412964533945031, "learning_rate": 9.66189694668021e-06, "loss": 0.639, "step": 4705 }, { "epoch": 0.14423194802010544, "grad_norm": 1.34238306585265, "learning_rate": 9.66171751413401e-06, "loss": 0.7781, "step": 4706 }, { "epoch": 0.14426259654284662, "grad_norm": 1.441903742898932, "learning_rate": 9.661538035654716e-06, "loss": 0.7302, "step": 4707 }, { "epoch": 0.14429324506558783, "grad_norm": 1.2442462165341095, "learning_rate": 9.661358511244095e-06, "loss": 0.529, "step": 4708 }, { "epoch": 0.14432389358832903, "grad_norm": 1.302841390410236, "learning_rate": 9.661178940903916e-06, "loss": 0.6859, "step": 4709 }, { "epoch": 0.14435454211107024, "grad_norm": 0.6624829326295313, "learning_rate": 9.660999324635948e-06, "loss": 0.4841, "step": 4710 }, { "epoch": 0.14438519063381144, "grad_norm": 1.4190373446317361, "learning_rate": 9.660819662441962e-06, "loss": 0.6404, "step": 4711 }, { "epoch": 0.14441583915655265, "grad_norm": 1.2160320442028856, "learning_rate": 9.660639954323726e-06, "loss": 0.6631, "step": 4712 }, { "epoch": 0.14444648767929386, "grad_norm": 1.1675539778178197, "learning_rate": 9.660460200283013e-06, "loss": 0.6161, "step": 4713 }, { "epoch": 0.14447713620203506, "grad_norm": 1.3859370136679172, "learning_rate": 9.660280400321593e-06, "loss": 0.8009, "step": 4714 }, { "epoch": 0.14450778472477627, "grad_norm": 1.2460362600252712, "learning_rate": 9.660100554441237e-06, "loss": 0.6384, "step": 4715 }, { "epoch": 0.14453843324751747, "grad_norm": 1.4519983769966163, "learning_rate": 9.659920662643719e-06, "loss": 0.7172, "step": 4716 }, { "epoch": 0.14456908177025868, "grad_norm": 1.3362462561154154, "learning_rate": 9.659740724930811e-06, "loss": 0.7809, "step": 4717 }, { "epoch": 0.14459973029299988, "grad_norm": 1.360325540853473, "learning_rate": 9.659560741304284e-06, "loss": 0.7442, "step": 4718 }, { "epoch": 0.1446303788157411, "grad_norm": 1.237195664641233, "learning_rate": 9.659380711765914e-06, "loss": 0.6859, "step": 4719 }, { "epoch": 0.1446610273384823, "grad_norm": 1.2674627175317676, "learning_rate": 9.659200636317471e-06, "loss": 0.7362, "step": 4720 }, { "epoch": 0.1446916758612235, "grad_norm": 1.5168842230627444, "learning_rate": 9.659020514960736e-06, "loss": 0.8008, "step": 4721 }, { "epoch": 0.1447223243839647, "grad_norm": 1.2806710626972404, "learning_rate": 9.658840347697476e-06, "loss": 0.718, "step": 4722 }, { "epoch": 0.14475297290670588, "grad_norm": 1.5222438845484891, "learning_rate": 9.658660134529473e-06, "loss": 0.6756, "step": 4723 }, { "epoch": 0.1447836214294471, "grad_norm": 1.2743387496765577, "learning_rate": 9.658479875458497e-06, "loss": 0.7072, "step": 4724 }, { "epoch": 0.1448142699521883, "grad_norm": 0.5408741530553454, "learning_rate": 9.658299570486328e-06, "loss": 0.4609, "step": 4725 }, { "epoch": 0.1448449184749295, "grad_norm": 1.303514979144477, "learning_rate": 9.658119219614744e-06, "loss": 0.618, "step": 4726 }, { "epoch": 0.1448755669976707, "grad_norm": 1.1726611508007339, "learning_rate": 9.657938822845517e-06, "loss": 0.7654, "step": 4727 }, { "epoch": 0.1449062155204119, "grad_norm": 1.2412133849184603, "learning_rate": 9.657758380180426e-06, "loss": 0.7285, "step": 4728 }, { "epoch": 0.14493686404315312, "grad_norm": 1.467368891709437, "learning_rate": 9.657577891621252e-06, "loss": 0.7626, "step": 4729 }, { "epoch": 0.14496751256589432, "grad_norm": 0.49579241546496905, "learning_rate": 9.65739735716977e-06, "loss": 0.4727, "step": 4730 }, { "epoch": 0.14499816108863553, "grad_norm": 1.2562164449728819, "learning_rate": 9.657216776827763e-06, "loss": 0.6456, "step": 4731 }, { "epoch": 0.14502880961137674, "grad_norm": 1.302924996140714, "learning_rate": 9.657036150597004e-06, "loss": 0.6645, "step": 4732 }, { "epoch": 0.14505945813411794, "grad_norm": 1.2519798344762947, "learning_rate": 9.656855478479279e-06, "loss": 0.7206, "step": 4733 }, { "epoch": 0.14509010665685915, "grad_norm": 0.5058419762939594, "learning_rate": 9.656674760476364e-06, "loss": 0.4495, "step": 4734 }, { "epoch": 0.14512075517960035, "grad_norm": 1.5439869735077112, "learning_rate": 9.65649399659004e-06, "loss": 0.7977, "step": 4735 }, { "epoch": 0.14515140370234156, "grad_norm": 1.3146702308599916, "learning_rate": 9.65631318682209e-06, "loss": 0.7724, "step": 4736 }, { "epoch": 0.14518205222508276, "grad_norm": 1.2436558796211332, "learning_rate": 9.656132331174297e-06, "loss": 0.7485, "step": 4737 }, { "epoch": 0.14521270074782394, "grad_norm": 1.212897991642811, "learning_rate": 9.655951429648438e-06, "loss": 0.6884, "step": 4738 }, { "epoch": 0.14524334927056515, "grad_norm": 1.357603143134423, "learning_rate": 9.655770482246299e-06, "loss": 0.8065, "step": 4739 }, { "epoch": 0.14527399779330635, "grad_norm": 1.2819792186292644, "learning_rate": 9.655589488969663e-06, "loss": 0.6369, "step": 4740 }, { "epoch": 0.14530464631604756, "grad_norm": 1.4216037968024644, "learning_rate": 9.655408449820312e-06, "loss": 0.6708, "step": 4741 }, { "epoch": 0.14533529483878876, "grad_norm": 1.3090911432713992, "learning_rate": 9.65522736480003e-06, "loss": 0.731, "step": 4742 }, { "epoch": 0.14536594336152997, "grad_norm": 1.4939963705836146, "learning_rate": 9.655046233910604e-06, "loss": 0.6738, "step": 4743 }, { "epoch": 0.14539659188427118, "grad_norm": 1.339425969376855, "learning_rate": 9.654865057153813e-06, "loss": 0.7094, "step": 4744 }, { "epoch": 0.14542724040701238, "grad_norm": 1.2660000708765864, "learning_rate": 9.654683834531447e-06, "loss": 0.6639, "step": 4745 }, { "epoch": 0.1454578889297536, "grad_norm": 1.4480422500084424, "learning_rate": 9.654502566045292e-06, "loss": 0.7242, "step": 4746 }, { "epoch": 0.1454885374524948, "grad_norm": 1.3018007410864598, "learning_rate": 9.65432125169713e-06, "loss": 0.6776, "step": 4747 }, { "epoch": 0.145519185975236, "grad_norm": 1.2676193058653416, "learning_rate": 9.654139891488751e-06, "loss": 0.705, "step": 4748 }, { "epoch": 0.1455498344979772, "grad_norm": 1.347947413790531, "learning_rate": 9.653958485421939e-06, "loss": 0.7456, "step": 4749 }, { "epoch": 0.1455804830207184, "grad_norm": 0.5087331045306325, "learning_rate": 9.653777033498485e-06, "loss": 0.4823, "step": 4750 }, { "epoch": 0.14561113154345962, "grad_norm": 0.5158257254887713, "learning_rate": 9.653595535720175e-06, "loss": 0.4636, "step": 4751 }, { "epoch": 0.14564178006620082, "grad_norm": 0.49295021468443745, "learning_rate": 9.653413992088798e-06, "loss": 0.4819, "step": 4752 }, { "epoch": 0.14567242858894203, "grad_norm": 1.4499880154415754, "learning_rate": 9.653232402606142e-06, "loss": 0.7448, "step": 4753 }, { "epoch": 0.1457030771116832, "grad_norm": 0.48903881184205217, "learning_rate": 9.653050767273996e-06, "loss": 0.4704, "step": 4754 }, { "epoch": 0.1457337256344244, "grad_norm": 1.4515783757559033, "learning_rate": 9.65286908609415e-06, "loss": 0.6664, "step": 4755 }, { "epoch": 0.14576437415716562, "grad_norm": 1.410312111668202, "learning_rate": 9.652687359068396e-06, "loss": 0.8482, "step": 4756 }, { "epoch": 0.14579502267990682, "grad_norm": 1.2303738877379633, "learning_rate": 9.652505586198523e-06, "loss": 0.7009, "step": 4757 }, { "epoch": 0.14582567120264803, "grad_norm": 1.3308643707472254, "learning_rate": 9.65232376748632e-06, "loss": 0.6693, "step": 4758 }, { "epoch": 0.14585631972538923, "grad_norm": 1.247756807583719, "learning_rate": 9.65214190293358e-06, "loss": 0.7946, "step": 4759 }, { "epoch": 0.14588696824813044, "grad_norm": 1.2645213869613288, "learning_rate": 9.651959992542097e-06, "loss": 0.7276, "step": 4760 }, { "epoch": 0.14591761677087164, "grad_norm": 1.2040617234104456, "learning_rate": 9.651778036313664e-06, "loss": 0.6877, "step": 4761 }, { "epoch": 0.14594826529361285, "grad_norm": 1.2665059768699196, "learning_rate": 9.651596034250069e-06, "loss": 0.8011, "step": 4762 }, { "epoch": 0.14597891381635406, "grad_norm": 1.3362240661368876, "learning_rate": 9.651413986353109e-06, "loss": 0.7634, "step": 4763 }, { "epoch": 0.14600956233909526, "grad_norm": 0.6089823748977717, "learning_rate": 9.651231892624577e-06, "loss": 0.4792, "step": 4764 }, { "epoch": 0.14604021086183647, "grad_norm": 1.4972201758510368, "learning_rate": 9.651049753066267e-06, "loss": 0.6939, "step": 4765 }, { "epoch": 0.14607085938457767, "grad_norm": 1.4744186960433865, "learning_rate": 9.650867567679973e-06, "loss": 0.7346, "step": 4766 }, { "epoch": 0.14610150790731888, "grad_norm": 1.415971685213835, "learning_rate": 9.650685336467492e-06, "loss": 0.7394, "step": 4767 }, { "epoch": 0.14613215643006008, "grad_norm": 1.4346661138082069, "learning_rate": 9.650503059430618e-06, "loss": 0.7774, "step": 4768 }, { "epoch": 0.14616280495280126, "grad_norm": 1.2806221452401054, "learning_rate": 9.650320736571146e-06, "loss": 0.6833, "step": 4769 }, { "epoch": 0.14619345347554247, "grad_norm": 1.2672563453722772, "learning_rate": 9.650138367890876e-06, "loss": 0.6619, "step": 4770 }, { "epoch": 0.14622410199828367, "grad_norm": 1.1335365421065369, "learning_rate": 9.649955953391603e-06, "loss": 0.6357, "step": 4771 }, { "epoch": 0.14625475052102488, "grad_norm": 1.3486221390836552, "learning_rate": 9.649773493075122e-06, "loss": 0.7587, "step": 4772 }, { "epoch": 0.14628539904376608, "grad_norm": 1.314227748246262, "learning_rate": 9.649590986943236e-06, "loss": 0.7174, "step": 4773 }, { "epoch": 0.1463160475665073, "grad_norm": 1.415815414799064, "learning_rate": 9.649408434997739e-06, "loss": 0.6996, "step": 4774 }, { "epoch": 0.1463466960892485, "grad_norm": 1.2889825515894857, "learning_rate": 9.64922583724043e-06, "loss": 0.7761, "step": 4775 }, { "epoch": 0.1463773446119897, "grad_norm": 1.1652992240188187, "learning_rate": 9.64904319367311e-06, "loss": 0.6518, "step": 4776 }, { "epoch": 0.1464079931347309, "grad_norm": 1.4486382489084075, "learning_rate": 9.64886050429758e-06, "loss": 0.742, "step": 4777 }, { "epoch": 0.1464386416574721, "grad_norm": 1.2740661958668302, "learning_rate": 9.648677769115637e-06, "loss": 0.6912, "step": 4778 }, { "epoch": 0.14646929018021332, "grad_norm": 1.348272903312105, "learning_rate": 9.64849498812908e-06, "loss": 0.681, "step": 4779 }, { "epoch": 0.14649993870295452, "grad_norm": 1.40697128088933, "learning_rate": 9.648312161339715e-06, "loss": 0.6564, "step": 4780 }, { "epoch": 0.14653058722569573, "grad_norm": 1.4650378130730413, "learning_rate": 9.64812928874934e-06, "loss": 0.6388, "step": 4781 }, { "epoch": 0.14656123574843694, "grad_norm": 1.2744973329552949, "learning_rate": 9.64794637035976e-06, "loss": 0.6876, "step": 4782 }, { "epoch": 0.14659188427117814, "grad_norm": 1.2976401253580974, "learning_rate": 9.647763406172772e-06, "loss": 0.7489, "step": 4783 }, { "epoch": 0.14662253279391935, "grad_norm": 1.3406549182102507, "learning_rate": 9.647580396190184e-06, "loss": 0.7551, "step": 4784 }, { "epoch": 0.14665318131666052, "grad_norm": 1.3629235419476846, "learning_rate": 9.647397340413795e-06, "loss": 0.7377, "step": 4785 }, { "epoch": 0.14668382983940173, "grad_norm": 1.4081770921027523, "learning_rate": 9.647214238845413e-06, "loss": 0.6962, "step": 4786 }, { "epoch": 0.14671447836214294, "grad_norm": 1.4957763921939322, "learning_rate": 9.647031091486838e-06, "loss": 0.6508, "step": 4787 }, { "epoch": 0.14674512688488414, "grad_norm": 1.2905749478483448, "learning_rate": 9.64684789833988e-06, "loss": 0.8303, "step": 4788 }, { "epoch": 0.14677577540762535, "grad_norm": 1.4304666984693195, "learning_rate": 9.646664659406339e-06, "loss": 0.7371, "step": 4789 }, { "epoch": 0.14680642393036655, "grad_norm": 1.47758135031782, "learning_rate": 9.646481374688022e-06, "loss": 0.7495, "step": 4790 }, { "epoch": 0.14683707245310776, "grad_norm": 1.2453338397086293, "learning_rate": 9.646298044186735e-06, "loss": 0.7681, "step": 4791 }, { "epoch": 0.14686772097584896, "grad_norm": 1.4973019678087067, "learning_rate": 9.646114667904285e-06, "loss": 0.6994, "step": 4792 }, { "epoch": 0.14689836949859017, "grad_norm": 1.2378672906464565, "learning_rate": 9.645931245842477e-06, "loss": 0.6861, "step": 4793 }, { "epoch": 0.14692901802133138, "grad_norm": 1.233490931466369, "learning_rate": 9.64574777800312e-06, "loss": 0.7293, "step": 4794 }, { "epoch": 0.14695966654407258, "grad_norm": 1.268667718781663, "learning_rate": 9.645564264388024e-06, "loss": 0.6285, "step": 4795 }, { "epoch": 0.1469903150668138, "grad_norm": 1.3159972894018643, "learning_rate": 9.645380704998993e-06, "loss": 0.7088, "step": 4796 }, { "epoch": 0.147020963589555, "grad_norm": 1.4068460803820348, "learning_rate": 9.645197099837838e-06, "loss": 0.785, "step": 4797 }, { "epoch": 0.1470516121122962, "grad_norm": 1.421015063308632, "learning_rate": 9.645013448906366e-06, "loss": 0.7244, "step": 4798 }, { "epoch": 0.1470822606350374, "grad_norm": 1.3808636354390083, "learning_rate": 9.644829752206388e-06, "loss": 0.7879, "step": 4799 }, { "epoch": 0.14711290915777858, "grad_norm": 1.3862490653211377, "learning_rate": 9.644646009739715e-06, "loss": 0.8672, "step": 4800 }, { "epoch": 0.1471435576805198, "grad_norm": 1.5282102085421645, "learning_rate": 9.644462221508157e-06, "loss": 0.8423, "step": 4801 }, { "epoch": 0.147174206203261, "grad_norm": 0.5876356687042602, "learning_rate": 9.644278387513525e-06, "loss": 0.4918, "step": 4802 }, { "epoch": 0.1472048547260022, "grad_norm": 0.5836171760397386, "learning_rate": 9.644094507757627e-06, "loss": 0.467, "step": 4803 }, { "epoch": 0.1472355032487434, "grad_norm": 1.299389999373656, "learning_rate": 9.643910582242279e-06, "loss": 0.7188, "step": 4804 }, { "epoch": 0.1472661517714846, "grad_norm": 1.3586628944026837, "learning_rate": 9.643726610969293e-06, "loss": 0.7856, "step": 4805 }, { "epoch": 0.14729680029422582, "grad_norm": 1.5627345436108282, "learning_rate": 9.643542593940478e-06, "loss": 0.7408, "step": 4806 }, { "epoch": 0.14732744881696702, "grad_norm": 1.2903348736492295, "learning_rate": 9.643358531157651e-06, "loss": 0.7912, "step": 4807 }, { "epoch": 0.14735809733970823, "grad_norm": 0.615478331615523, "learning_rate": 9.643174422622625e-06, "loss": 0.4588, "step": 4808 }, { "epoch": 0.14738874586244943, "grad_norm": 1.5259362001837233, "learning_rate": 9.642990268337214e-06, "loss": 0.7929, "step": 4809 }, { "epoch": 0.14741939438519064, "grad_norm": 0.6163744311515156, "learning_rate": 9.642806068303229e-06, "loss": 0.4827, "step": 4810 }, { "epoch": 0.14745004290793184, "grad_norm": 1.4137772454968949, "learning_rate": 9.642621822522491e-06, "loss": 0.7115, "step": 4811 }, { "epoch": 0.14748069143067305, "grad_norm": 1.4495700242804042, "learning_rate": 9.642437530996812e-06, "loss": 0.7247, "step": 4812 }, { "epoch": 0.14751133995341426, "grad_norm": 0.5147531051579513, "learning_rate": 9.642253193728006e-06, "loss": 0.4568, "step": 4813 }, { "epoch": 0.14754198847615546, "grad_norm": 0.5260710075758392, "learning_rate": 9.642068810717893e-06, "loss": 0.4551, "step": 4814 }, { "epoch": 0.14757263699889667, "grad_norm": 1.6147388150827964, "learning_rate": 9.641884381968289e-06, "loss": 0.7539, "step": 4815 }, { "epoch": 0.14760328552163784, "grad_norm": 1.4107006176010877, "learning_rate": 9.641699907481008e-06, "loss": 0.7362, "step": 4816 }, { "epoch": 0.14763393404437905, "grad_norm": 1.508780249943323, "learning_rate": 9.641515387257873e-06, "loss": 0.7231, "step": 4817 }, { "epoch": 0.14766458256712026, "grad_norm": 1.3903308768734968, "learning_rate": 9.641330821300697e-06, "loss": 0.7743, "step": 4818 }, { "epoch": 0.14769523108986146, "grad_norm": 1.4430531270685276, "learning_rate": 9.641146209611302e-06, "loss": 0.7821, "step": 4819 }, { "epoch": 0.14772587961260267, "grad_norm": 0.6187057920442046, "learning_rate": 9.640961552191505e-06, "loss": 0.4614, "step": 4820 }, { "epoch": 0.14775652813534387, "grad_norm": 1.2906010596849062, "learning_rate": 9.640776849043128e-06, "loss": 0.6943, "step": 4821 }, { "epoch": 0.14778717665808508, "grad_norm": 1.3476414521202946, "learning_rate": 9.640592100167989e-06, "loss": 0.6606, "step": 4822 }, { "epoch": 0.14781782518082628, "grad_norm": 1.259771465658009, "learning_rate": 9.640407305567907e-06, "loss": 0.707, "step": 4823 }, { "epoch": 0.1478484737035675, "grad_norm": 1.5807863955213564, "learning_rate": 9.640222465244706e-06, "loss": 0.7363, "step": 4824 }, { "epoch": 0.1478791222263087, "grad_norm": 1.4383586721725587, "learning_rate": 9.640037579200206e-06, "loss": 0.7314, "step": 4825 }, { "epoch": 0.1479097707490499, "grad_norm": 1.4304317890875733, "learning_rate": 9.63985264743623e-06, "loss": 0.7252, "step": 4826 }, { "epoch": 0.1479404192717911, "grad_norm": 1.370238964069475, "learning_rate": 9.639667669954596e-06, "loss": 0.6448, "step": 4827 }, { "epoch": 0.1479710677945323, "grad_norm": 1.3346005202428985, "learning_rate": 9.63948264675713e-06, "loss": 0.7909, "step": 4828 }, { "epoch": 0.14800171631727352, "grad_norm": 1.3773836437246576, "learning_rate": 9.639297577845654e-06, "loss": 0.7298, "step": 4829 }, { "epoch": 0.14803236484001472, "grad_norm": 1.3089607700409678, "learning_rate": 9.639112463221994e-06, "loss": 0.6196, "step": 4830 }, { "epoch": 0.1480630133627559, "grad_norm": 1.300579998633732, "learning_rate": 9.638927302887968e-06, "loss": 0.779, "step": 4831 }, { "epoch": 0.1480936618854971, "grad_norm": 1.39170834001148, "learning_rate": 9.638742096845408e-06, "loss": 0.7279, "step": 4832 }, { "epoch": 0.1481243104082383, "grad_norm": 1.3249157836090806, "learning_rate": 9.638556845096134e-06, "loss": 0.7047, "step": 4833 }, { "epoch": 0.14815495893097952, "grad_norm": 1.210844659238511, "learning_rate": 9.638371547641972e-06, "loss": 0.6853, "step": 4834 }, { "epoch": 0.14818560745372072, "grad_norm": 1.5117702410557694, "learning_rate": 9.63818620448475e-06, "loss": 0.8385, "step": 4835 }, { "epoch": 0.14821625597646193, "grad_norm": 1.409760688990243, "learning_rate": 9.638000815626292e-06, "loss": 0.7514, "step": 4836 }, { "epoch": 0.14824690449920314, "grad_norm": 1.4006893177749715, "learning_rate": 9.637815381068424e-06, "loss": 0.7322, "step": 4837 }, { "epoch": 0.14827755302194434, "grad_norm": 1.2673311410345929, "learning_rate": 9.637629900812975e-06, "loss": 0.6802, "step": 4838 }, { "epoch": 0.14830820154468555, "grad_norm": 1.2855532595754493, "learning_rate": 9.637444374861774e-06, "loss": 0.811, "step": 4839 }, { "epoch": 0.14833885006742675, "grad_norm": 1.373529690066405, "learning_rate": 9.637258803216643e-06, "loss": 0.7057, "step": 4840 }, { "epoch": 0.14836949859016796, "grad_norm": 0.6133262367058194, "learning_rate": 9.637073185879418e-06, "loss": 0.4934, "step": 4841 }, { "epoch": 0.14840014711290916, "grad_norm": 0.5334335890015133, "learning_rate": 9.636887522851924e-06, "loss": 0.5053, "step": 4842 }, { "epoch": 0.14843079563565037, "grad_norm": 1.4800274752042322, "learning_rate": 9.63670181413599e-06, "loss": 0.721, "step": 4843 }, { "epoch": 0.14846144415839158, "grad_norm": 1.3742331558129526, "learning_rate": 9.636516059733445e-06, "loss": 0.706, "step": 4844 }, { "epoch": 0.14849209268113278, "grad_norm": 1.4047674399770294, "learning_rate": 9.636330259646122e-06, "loss": 0.6802, "step": 4845 }, { "epoch": 0.148522741203874, "grad_norm": 1.440743869206528, "learning_rate": 9.636144413875852e-06, "loss": 0.6441, "step": 4846 }, { "epoch": 0.14855338972661516, "grad_norm": 1.2605358200267167, "learning_rate": 9.635958522424464e-06, "loss": 0.7255, "step": 4847 }, { "epoch": 0.14858403824935637, "grad_norm": 1.323938234864102, "learning_rate": 9.635772585293792e-06, "loss": 0.6564, "step": 4848 }, { "epoch": 0.14861468677209758, "grad_norm": 0.8749878207684127, "learning_rate": 9.635586602485665e-06, "loss": 0.4816, "step": 4849 }, { "epoch": 0.14864533529483878, "grad_norm": 0.6730197470427802, "learning_rate": 9.635400574001918e-06, "loss": 0.4896, "step": 4850 }, { "epoch": 0.14867598381758, "grad_norm": 0.48641381258487715, "learning_rate": 9.635214499844383e-06, "loss": 0.4562, "step": 4851 }, { "epoch": 0.1487066323403212, "grad_norm": 1.4185228708567845, "learning_rate": 9.635028380014893e-06, "loss": 0.7561, "step": 4852 }, { "epoch": 0.1487372808630624, "grad_norm": 1.441058536438303, "learning_rate": 9.634842214515283e-06, "loss": 0.7707, "step": 4853 }, { "epoch": 0.1487679293858036, "grad_norm": 1.3167728918670505, "learning_rate": 9.634656003347387e-06, "loss": 0.6394, "step": 4854 }, { "epoch": 0.1487985779085448, "grad_norm": 0.88634752859352, "learning_rate": 9.634469746513038e-06, "loss": 0.4844, "step": 4855 }, { "epoch": 0.14882922643128602, "grad_norm": 1.2383405871760353, "learning_rate": 9.634283444014075e-06, "loss": 0.6513, "step": 4856 }, { "epoch": 0.14885987495402722, "grad_norm": 1.2962860816498691, "learning_rate": 9.63409709585233e-06, "loss": 0.6965, "step": 4857 }, { "epoch": 0.14889052347676843, "grad_norm": 1.4993211319576936, "learning_rate": 9.633910702029641e-06, "loss": 0.8267, "step": 4858 }, { "epoch": 0.14892117199950963, "grad_norm": 1.4199254921256361, "learning_rate": 9.633724262547843e-06, "loss": 0.696, "step": 4859 }, { "epoch": 0.14895182052225084, "grad_norm": 1.2741135471213234, "learning_rate": 9.633537777408777e-06, "loss": 0.6881, "step": 4860 }, { "epoch": 0.14898246904499204, "grad_norm": 0.7026147092836934, "learning_rate": 9.633351246614275e-06, "loss": 0.479, "step": 4861 }, { "epoch": 0.14901311756773322, "grad_norm": 1.2931268089066428, "learning_rate": 9.633164670166179e-06, "loss": 0.7004, "step": 4862 }, { "epoch": 0.14904376609047443, "grad_norm": 1.312439692881919, "learning_rate": 9.632978048066325e-06, "loss": 0.6857, "step": 4863 }, { "epoch": 0.14907441461321563, "grad_norm": 1.477592791484093, "learning_rate": 9.632791380316556e-06, "loss": 0.7537, "step": 4864 }, { "epoch": 0.14910506313595684, "grad_norm": 1.418581264652309, "learning_rate": 9.632604666918705e-06, "loss": 0.7875, "step": 4865 }, { "epoch": 0.14913571165869804, "grad_norm": 1.2875866335440922, "learning_rate": 9.632417907874617e-06, "loss": 0.712, "step": 4866 }, { "epoch": 0.14916636018143925, "grad_norm": 1.3168165888313454, "learning_rate": 9.632231103186128e-06, "loss": 0.7687, "step": 4867 }, { "epoch": 0.14919700870418046, "grad_norm": 1.214930013385596, "learning_rate": 9.632044252855082e-06, "loss": 0.741, "step": 4868 }, { "epoch": 0.14922765722692166, "grad_norm": 1.350777077775812, "learning_rate": 9.631857356883318e-06, "loss": 0.6493, "step": 4869 }, { "epoch": 0.14925830574966287, "grad_norm": 1.329864328011396, "learning_rate": 9.631670415272679e-06, "loss": 0.8074, "step": 4870 }, { "epoch": 0.14928895427240407, "grad_norm": 1.2197655788247073, "learning_rate": 9.631483428025007e-06, "loss": 0.6866, "step": 4871 }, { "epoch": 0.14931960279514528, "grad_norm": 1.3806906258088294, "learning_rate": 9.631296395142142e-06, "loss": 0.7364, "step": 4872 }, { "epoch": 0.14935025131788648, "grad_norm": 1.251915126196527, "learning_rate": 9.631109316625931e-06, "loss": 0.749, "step": 4873 }, { "epoch": 0.1493808998406277, "grad_norm": 1.1323389637282686, "learning_rate": 9.630922192478213e-06, "loss": 0.6639, "step": 4874 }, { "epoch": 0.1494115483633689, "grad_norm": 1.5057955797050426, "learning_rate": 9.630735022700835e-06, "loss": 0.7548, "step": 4875 }, { "epoch": 0.1494421968861101, "grad_norm": 1.3056786933284354, "learning_rate": 9.630547807295639e-06, "loss": 0.6965, "step": 4876 }, { "epoch": 0.1494728454088513, "grad_norm": 1.3784212000999945, "learning_rate": 9.63036054626447e-06, "loss": 0.749, "step": 4877 }, { "epoch": 0.14950349393159248, "grad_norm": 1.6231785824589322, "learning_rate": 9.630173239609176e-06, "loss": 0.7738, "step": 4878 }, { "epoch": 0.1495341424543337, "grad_norm": 1.3963107871269487, "learning_rate": 9.629985887331598e-06, "loss": 0.693, "step": 4879 }, { "epoch": 0.1495647909770749, "grad_norm": 1.3115647314966152, "learning_rate": 9.629798489433586e-06, "loss": 0.7554, "step": 4880 }, { "epoch": 0.1495954394998161, "grad_norm": 1.5059523635527805, "learning_rate": 9.629611045916985e-06, "loss": 0.6887, "step": 4881 }, { "epoch": 0.1496260880225573, "grad_norm": 1.2972772185219918, "learning_rate": 9.629423556783641e-06, "loss": 0.7226, "step": 4882 }, { "epoch": 0.1496567365452985, "grad_norm": 1.3846551069956823, "learning_rate": 9.629236022035404e-06, "loss": 0.7842, "step": 4883 }, { "epoch": 0.14968738506803972, "grad_norm": 1.4291607518529006, "learning_rate": 9.629048441674117e-06, "loss": 0.7964, "step": 4884 }, { "epoch": 0.14971803359078092, "grad_norm": 1.2732180370349302, "learning_rate": 9.628860815701635e-06, "loss": 0.6968, "step": 4885 }, { "epoch": 0.14974868211352213, "grad_norm": 1.2608964798718876, "learning_rate": 9.6286731441198e-06, "loss": 0.6453, "step": 4886 }, { "epoch": 0.14977933063626334, "grad_norm": 1.4283222626362353, "learning_rate": 9.628485426930465e-06, "loss": 0.6654, "step": 4887 }, { "epoch": 0.14980997915900454, "grad_norm": 1.5172871030477457, "learning_rate": 9.62829766413548e-06, "loss": 0.732, "step": 4888 }, { "epoch": 0.14984062768174575, "grad_norm": 1.454563672217483, "learning_rate": 9.628109855736692e-06, "loss": 0.6915, "step": 4889 }, { "epoch": 0.14987127620448695, "grad_norm": 1.4249577201925663, "learning_rate": 9.627922001735955e-06, "loss": 0.7613, "step": 4890 }, { "epoch": 0.14990192472722816, "grad_norm": 1.3580427790080696, "learning_rate": 9.627734102135118e-06, "loss": 0.7474, "step": 4891 }, { "epoch": 0.14993257324996936, "grad_norm": 1.320328968897207, "learning_rate": 9.627546156936033e-06, "loss": 0.7622, "step": 4892 }, { "epoch": 0.14996322177271054, "grad_norm": 1.4715149711140116, "learning_rate": 9.627358166140551e-06, "loss": 0.7168, "step": 4893 }, { "epoch": 0.14999387029545175, "grad_norm": 1.461810582165026, "learning_rate": 9.627170129750526e-06, "loss": 0.7272, "step": 4894 }, { "epoch": 0.15002451881819295, "grad_norm": 1.3189387217444186, "learning_rate": 9.626982047767808e-06, "loss": 0.7697, "step": 4895 }, { "epoch": 0.15005516734093416, "grad_norm": 1.149166479807196, "learning_rate": 9.626793920194254e-06, "loss": 0.6164, "step": 4896 }, { "epoch": 0.15008581586367536, "grad_norm": 1.3407275789501394, "learning_rate": 9.626605747031715e-06, "loss": 0.7093, "step": 4897 }, { "epoch": 0.15011646438641657, "grad_norm": 1.2992289188813457, "learning_rate": 9.626417528282047e-06, "loss": 0.7251, "step": 4898 }, { "epoch": 0.15014711290915778, "grad_norm": 1.3246230010415234, "learning_rate": 9.6262292639471e-06, "loss": 0.6904, "step": 4899 }, { "epoch": 0.15017776143189898, "grad_norm": 1.3317215179225084, "learning_rate": 9.626040954028735e-06, "loss": 0.6341, "step": 4900 }, { "epoch": 0.1502084099546402, "grad_norm": 1.5122239055657407, "learning_rate": 9.625852598528804e-06, "loss": 0.7462, "step": 4901 }, { "epoch": 0.1502390584773814, "grad_norm": 0.8571276616452835, "learning_rate": 9.625664197449165e-06, "loss": 0.4741, "step": 4902 }, { "epoch": 0.1502697070001226, "grad_norm": 1.2208049617455652, "learning_rate": 9.625475750791672e-06, "loss": 0.6744, "step": 4903 }, { "epoch": 0.1503003555228638, "grad_norm": 0.6148213489117638, "learning_rate": 9.625287258558183e-06, "loss": 0.456, "step": 4904 }, { "epoch": 0.150331004045605, "grad_norm": 1.6224234757192773, "learning_rate": 9.625098720750557e-06, "loss": 0.6794, "step": 4905 }, { "epoch": 0.15036165256834622, "grad_norm": 1.4307538671785855, "learning_rate": 9.624910137370647e-06, "loss": 0.7559, "step": 4906 }, { "epoch": 0.15039230109108742, "grad_norm": 1.161966022077114, "learning_rate": 9.624721508420316e-06, "loss": 0.6809, "step": 4907 }, { "epoch": 0.15042294961382863, "grad_norm": 1.4399484025807452, "learning_rate": 9.624532833901419e-06, "loss": 0.7033, "step": 4908 }, { "epoch": 0.1504535981365698, "grad_norm": 1.4149462735445189, "learning_rate": 9.624344113815818e-06, "loss": 0.7371, "step": 4909 }, { "epoch": 0.150484246659311, "grad_norm": 1.4549834413401974, "learning_rate": 9.624155348165372e-06, "loss": 0.7027, "step": 4910 }, { "epoch": 0.15051489518205222, "grad_norm": 1.3605807295932535, "learning_rate": 9.623966536951939e-06, "loss": 0.7646, "step": 4911 }, { "epoch": 0.15054554370479342, "grad_norm": 1.449785315276508, "learning_rate": 9.62377768017738e-06, "loss": 0.7268, "step": 4912 }, { "epoch": 0.15057619222753463, "grad_norm": 1.3741635893872817, "learning_rate": 9.623588777843558e-06, "loss": 0.7393, "step": 4913 }, { "epoch": 0.15060684075027583, "grad_norm": 1.3277079260635887, "learning_rate": 9.623399829952332e-06, "loss": 0.6522, "step": 4914 }, { "epoch": 0.15063748927301704, "grad_norm": 1.4772825210868135, "learning_rate": 9.623210836505565e-06, "loss": 0.7521, "step": 4915 }, { "epoch": 0.15066813779575824, "grad_norm": 1.46506601329335, "learning_rate": 9.623021797505118e-06, "loss": 0.7889, "step": 4916 }, { "epoch": 0.15069878631849945, "grad_norm": 1.4428535294828162, "learning_rate": 9.622832712952856e-06, "loss": 0.6741, "step": 4917 }, { "epoch": 0.15072943484124066, "grad_norm": 1.3813560598297947, "learning_rate": 9.62264358285064e-06, "loss": 0.8798, "step": 4918 }, { "epoch": 0.15076008336398186, "grad_norm": 1.515415193595614, "learning_rate": 9.622454407200333e-06, "loss": 0.7395, "step": 4919 }, { "epoch": 0.15079073188672307, "grad_norm": 1.4354652562928825, "learning_rate": 9.622265186003799e-06, "loss": 0.7215, "step": 4920 }, { "epoch": 0.15082138040946427, "grad_norm": 1.6311866757443236, "learning_rate": 9.622075919262905e-06, "loss": 0.7249, "step": 4921 }, { "epoch": 0.15085202893220548, "grad_norm": 1.5854181119313904, "learning_rate": 9.621886606979514e-06, "loss": 0.7497, "step": 4922 }, { "epoch": 0.15088267745494668, "grad_norm": 1.1985274148033624, "learning_rate": 9.621697249155493e-06, "loss": 0.4728, "step": 4923 }, { "epoch": 0.15091332597768786, "grad_norm": 1.0074485681901477, "learning_rate": 9.621507845792705e-06, "loss": 0.4643, "step": 4924 }, { "epoch": 0.15094397450042907, "grad_norm": 1.419890500004119, "learning_rate": 9.621318396893018e-06, "loss": 0.7582, "step": 4925 }, { "epoch": 0.15097462302317027, "grad_norm": 1.4878696519564107, "learning_rate": 9.621128902458299e-06, "loss": 0.7922, "step": 4926 }, { "epoch": 0.15100527154591148, "grad_norm": 1.578248063913343, "learning_rate": 9.620939362490414e-06, "loss": 0.8062, "step": 4927 }, { "epoch": 0.15103592006865268, "grad_norm": 0.8355559707519707, "learning_rate": 9.62074977699123e-06, "loss": 0.4588, "step": 4928 }, { "epoch": 0.1510665685913939, "grad_norm": 0.9875764505431305, "learning_rate": 9.620560145962618e-06, "loss": 0.4825, "step": 4929 }, { "epoch": 0.1510972171141351, "grad_norm": 1.283804295258268, "learning_rate": 9.620370469406443e-06, "loss": 0.7008, "step": 4930 }, { "epoch": 0.1511278656368763, "grad_norm": 1.6424031575083355, "learning_rate": 9.620180747324577e-06, "loss": 0.632, "step": 4931 }, { "epoch": 0.1511585141596175, "grad_norm": 1.4328652846417627, "learning_rate": 9.619990979718889e-06, "loss": 0.7176, "step": 4932 }, { "epoch": 0.1511891626823587, "grad_norm": 1.4413395835062264, "learning_rate": 9.619801166591247e-06, "loss": 0.7985, "step": 4933 }, { "epoch": 0.15121981120509992, "grad_norm": 1.160275242638548, "learning_rate": 9.61961130794352e-06, "loss": 0.649, "step": 4934 }, { "epoch": 0.15125045972784112, "grad_norm": 1.439877483476097, "learning_rate": 9.619421403777583e-06, "loss": 0.8333, "step": 4935 }, { "epoch": 0.15128110825058233, "grad_norm": 1.5412270150839218, "learning_rate": 9.619231454095304e-06, "loss": 0.7318, "step": 4936 }, { "epoch": 0.15131175677332354, "grad_norm": 1.1629590765650364, "learning_rate": 9.619041458898557e-06, "loss": 0.6444, "step": 4937 }, { "epoch": 0.15134240529606474, "grad_norm": 0.7114988574801496, "learning_rate": 9.61885141818921e-06, "loss": 0.4656, "step": 4938 }, { "epoch": 0.15137305381880595, "grad_norm": 0.6668091351026713, "learning_rate": 9.61866133196914e-06, "loss": 0.461, "step": 4939 }, { "epoch": 0.15140370234154713, "grad_norm": 1.4010038396604039, "learning_rate": 9.618471200240219e-06, "loss": 0.6852, "step": 4940 }, { "epoch": 0.15143435086428833, "grad_norm": 1.2655109541583855, "learning_rate": 9.618281023004318e-06, "loss": 0.7731, "step": 4941 }, { "epoch": 0.15146499938702954, "grad_norm": 1.4232959649753354, "learning_rate": 9.618090800263313e-06, "loss": 0.7873, "step": 4942 }, { "epoch": 0.15149564790977074, "grad_norm": 1.4566648244008762, "learning_rate": 9.617900532019078e-06, "loss": 0.7343, "step": 4943 }, { "epoch": 0.15152629643251195, "grad_norm": 1.3969264293955057, "learning_rate": 9.617710218273486e-06, "loss": 0.7579, "step": 4944 }, { "epoch": 0.15155694495525315, "grad_norm": 1.4382467555807943, "learning_rate": 9.617519859028415e-06, "loss": 0.6589, "step": 4945 }, { "epoch": 0.15158759347799436, "grad_norm": 1.421465662939173, "learning_rate": 9.61732945428574e-06, "loss": 0.7361, "step": 4946 }, { "epoch": 0.15161824200073556, "grad_norm": 1.4234869414579758, "learning_rate": 9.617139004047335e-06, "loss": 0.7425, "step": 4947 }, { "epoch": 0.15164889052347677, "grad_norm": 1.44397507834829, "learning_rate": 9.61694850831508e-06, "loss": 0.8215, "step": 4948 }, { "epoch": 0.15167953904621798, "grad_norm": 1.550267546477254, "learning_rate": 9.616757967090848e-06, "loss": 0.7199, "step": 4949 }, { "epoch": 0.15171018756895918, "grad_norm": 1.2752919872677912, "learning_rate": 9.61656738037652e-06, "loss": 0.767, "step": 4950 }, { "epoch": 0.1517408360917004, "grad_norm": 0.9991013145864975, "learning_rate": 9.616376748173973e-06, "loss": 0.4664, "step": 4951 }, { "epoch": 0.1517714846144416, "grad_norm": 1.3475974829853605, "learning_rate": 9.616186070485082e-06, "loss": 0.7223, "step": 4952 }, { "epoch": 0.1518021331371828, "grad_norm": 1.4699686337428537, "learning_rate": 9.61599534731173e-06, "loss": 0.6989, "step": 4953 }, { "epoch": 0.151832781659924, "grad_norm": 0.5595541117356588, "learning_rate": 9.615804578655796e-06, "loss": 0.4701, "step": 4954 }, { "epoch": 0.15186343018266518, "grad_norm": 1.3321410592676897, "learning_rate": 9.615613764519155e-06, "loss": 0.7591, "step": 4955 }, { "epoch": 0.1518940787054064, "grad_norm": 1.3369979870997646, "learning_rate": 9.615422904903695e-06, "loss": 0.7556, "step": 4956 }, { "epoch": 0.1519247272281476, "grad_norm": 1.553102914426146, "learning_rate": 9.61523199981129e-06, "loss": 0.8108, "step": 4957 }, { "epoch": 0.1519553757508888, "grad_norm": 1.2399297997520164, "learning_rate": 9.615041049243825e-06, "loss": 0.6776, "step": 4958 }, { "epoch": 0.15198602427363, "grad_norm": 1.5059973381466516, "learning_rate": 9.614850053203178e-06, "loss": 0.7584, "step": 4959 }, { "epoch": 0.1520166727963712, "grad_norm": 1.395972979593006, "learning_rate": 9.614659011691232e-06, "loss": 0.7215, "step": 4960 }, { "epoch": 0.15204732131911242, "grad_norm": 1.382094160299106, "learning_rate": 9.61446792470987e-06, "loss": 0.7067, "step": 4961 }, { "epoch": 0.15207796984185362, "grad_norm": 1.4611913344464549, "learning_rate": 9.614276792260978e-06, "loss": 0.6598, "step": 4962 }, { "epoch": 0.15210861836459483, "grad_norm": 1.2411171979006659, "learning_rate": 9.614085614346433e-06, "loss": 0.7125, "step": 4963 }, { "epoch": 0.15213926688733603, "grad_norm": 0.8674120124126796, "learning_rate": 9.613894390968121e-06, "loss": 0.4825, "step": 4964 }, { "epoch": 0.15216991541007724, "grad_norm": 1.447518954813967, "learning_rate": 9.61370312212793e-06, "loss": 0.7764, "step": 4965 }, { "epoch": 0.15220056393281844, "grad_norm": 1.37731214657748, "learning_rate": 9.61351180782774e-06, "loss": 0.7954, "step": 4966 }, { "epoch": 0.15223121245555965, "grad_norm": 0.6189465225468955, "learning_rate": 9.61332044806944e-06, "loss": 0.4702, "step": 4967 }, { "epoch": 0.15226186097830086, "grad_norm": 1.408288420400866, "learning_rate": 9.61312904285491e-06, "loss": 0.7345, "step": 4968 }, { "epoch": 0.15229250950104206, "grad_norm": 1.2682790737332637, "learning_rate": 9.612937592186041e-06, "loss": 0.6135, "step": 4969 }, { "epoch": 0.15232315802378327, "grad_norm": 1.49709946142604, "learning_rate": 9.612746096064718e-06, "loss": 0.6598, "step": 4970 }, { "epoch": 0.15235380654652445, "grad_norm": 0.5174700607228456, "learning_rate": 9.612554554492825e-06, "loss": 0.4585, "step": 4971 }, { "epoch": 0.15238445506926565, "grad_norm": 1.352160370315152, "learning_rate": 9.612362967472254e-06, "loss": 0.7481, "step": 4972 }, { "epoch": 0.15241510359200686, "grad_norm": 0.5918071385311636, "learning_rate": 9.612171335004892e-06, "loss": 0.4485, "step": 4973 }, { "epoch": 0.15244575211474806, "grad_norm": 1.6454469194118087, "learning_rate": 9.611979657092622e-06, "loss": 0.8168, "step": 4974 }, { "epoch": 0.15247640063748927, "grad_norm": 1.301309714897432, "learning_rate": 9.61178793373734e-06, "loss": 0.7633, "step": 4975 }, { "epoch": 0.15250704916023047, "grad_norm": 0.5033571788760893, "learning_rate": 9.611596164940929e-06, "loss": 0.4817, "step": 4976 }, { "epoch": 0.15253769768297168, "grad_norm": 1.5065734612897426, "learning_rate": 9.611404350705283e-06, "loss": 0.6914, "step": 4977 }, { "epoch": 0.15256834620571288, "grad_norm": 1.3928053983949205, "learning_rate": 9.611212491032289e-06, "loss": 0.7068, "step": 4978 }, { "epoch": 0.1525989947284541, "grad_norm": 1.341362328115639, "learning_rate": 9.611020585923838e-06, "loss": 0.7547, "step": 4979 }, { "epoch": 0.1526296432511953, "grad_norm": 1.400326764596557, "learning_rate": 9.610828635381822e-06, "loss": 0.7716, "step": 4980 }, { "epoch": 0.1526602917739365, "grad_norm": 1.2818392054139585, "learning_rate": 9.610636639408132e-06, "loss": 0.7131, "step": 4981 }, { "epoch": 0.1526909402966777, "grad_norm": 1.325820722442725, "learning_rate": 9.610444598004658e-06, "loss": 0.7181, "step": 4982 }, { "epoch": 0.1527215888194189, "grad_norm": 0.5486140710686745, "learning_rate": 9.610252511173297e-06, "loss": 0.4845, "step": 4983 }, { "epoch": 0.15275223734216012, "grad_norm": 2.1820484729187655, "learning_rate": 9.610060378915935e-06, "loss": 0.634, "step": 4984 }, { "epoch": 0.15278288586490132, "grad_norm": 1.893392935990431, "learning_rate": 9.60986820123447e-06, "loss": 0.7217, "step": 4985 }, { "epoch": 0.1528135343876425, "grad_norm": 1.402321519121299, "learning_rate": 9.609675978130795e-06, "loss": 0.7218, "step": 4986 }, { "epoch": 0.1528441829103837, "grad_norm": 1.5055833695109992, "learning_rate": 9.609483709606802e-06, "loss": 0.7868, "step": 4987 }, { "epoch": 0.1528748314331249, "grad_norm": 1.3598202105228936, "learning_rate": 9.609291395664387e-06, "loss": 0.7069, "step": 4988 }, { "epoch": 0.15290547995586612, "grad_norm": 1.3867317843207683, "learning_rate": 9.609099036305443e-06, "loss": 0.7927, "step": 4989 }, { "epoch": 0.15293612847860732, "grad_norm": 1.4226860459064892, "learning_rate": 9.608906631531869e-06, "loss": 0.6396, "step": 4990 }, { "epoch": 0.15296677700134853, "grad_norm": 1.2709852687871466, "learning_rate": 9.608714181345558e-06, "loss": 0.7588, "step": 4991 }, { "epoch": 0.15299742552408974, "grad_norm": 1.508416830252208, "learning_rate": 9.608521685748406e-06, "loss": 0.9119, "step": 4992 }, { "epoch": 0.15302807404683094, "grad_norm": 1.2255898662605207, "learning_rate": 9.608329144742312e-06, "loss": 0.6881, "step": 4993 }, { "epoch": 0.15305872256957215, "grad_norm": 1.395699122970822, "learning_rate": 9.608136558329172e-06, "loss": 0.7892, "step": 4994 }, { "epoch": 0.15308937109231335, "grad_norm": 0.5178580924905071, "learning_rate": 9.607943926510883e-06, "loss": 0.4624, "step": 4995 }, { "epoch": 0.15312001961505456, "grad_norm": 5.679604277458043, "learning_rate": 9.607751249289342e-06, "loss": 0.8243, "step": 4996 }, { "epoch": 0.15315066813779576, "grad_norm": 1.3469047485368366, "learning_rate": 9.607558526666451e-06, "loss": 0.6997, "step": 4997 }, { "epoch": 0.15318131666053697, "grad_norm": 1.290643828934099, "learning_rate": 9.607365758644107e-06, "loss": 0.6529, "step": 4998 }, { "epoch": 0.15321196518327818, "grad_norm": 1.18282859315787, "learning_rate": 9.607172945224208e-06, "loss": 0.6334, "step": 4999 }, { "epoch": 0.15324261370601938, "grad_norm": 1.6762280157607026, "learning_rate": 9.606980086408657e-06, "loss": 0.7165, "step": 5000 }, { "epoch": 0.1532732622287606, "grad_norm": 1.4422240132227657, "learning_rate": 9.60678718219935e-06, "loss": 0.7468, "step": 5001 }, { "epoch": 0.15330391075150177, "grad_norm": 1.357200664966703, "learning_rate": 9.606594232598192e-06, "loss": 0.7846, "step": 5002 }, { "epoch": 0.15333455927424297, "grad_norm": 1.2727810982834518, "learning_rate": 9.606401237607081e-06, "loss": 0.7208, "step": 5003 }, { "epoch": 0.15336520779698418, "grad_norm": 1.5089284052056342, "learning_rate": 9.606208197227922e-06, "loss": 0.8356, "step": 5004 }, { "epoch": 0.15339585631972538, "grad_norm": 0.529966626881045, "learning_rate": 9.606015111462614e-06, "loss": 0.4774, "step": 5005 }, { "epoch": 0.1534265048424666, "grad_norm": 1.3678435280446, "learning_rate": 9.605821980313061e-06, "loss": 0.7703, "step": 5006 }, { "epoch": 0.1534571533652078, "grad_norm": 1.3471664131356862, "learning_rate": 9.605628803781165e-06, "loss": 0.7777, "step": 5007 }, { "epoch": 0.153487801887949, "grad_norm": 1.4634139309366099, "learning_rate": 9.60543558186883e-06, "loss": 0.6411, "step": 5008 }, { "epoch": 0.1535184504106902, "grad_norm": 1.8057699643816738, "learning_rate": 9.605242314577961e-06, "loss": 0.7225, "step": 5009 }, { "epoch": 0.1535490989334314, "grad_norm": 1.4658020004094632, "learning_rate": 9.605049001910458e-06, "loss": 0.8449, "step": 5010 }, { "epoch": 0.15357974745617262, "grad_norm": 1.4291775199152819, "learning_rate": 9.604855643868231e-06, "loss": 0.7804, "step": 5011 }, { "epoch": 0.15361039597891382, "grad_norm": 1.3823776744080454, "learning_rate": 9.604662240453185e-06, "loss": 0.7759, "step": 5012 }, { "epoch": 0.15364104450165503, "grad_norm": 1.4552503831120194, "learning_rate": 9.604468791667221e-06, "loss": 0.7152, "step": 5013 }, { "epoch": 0.15367169302439623, "grad_norm": 1.3986762409460496, "learning_rate": 9.60427529751225e-06, "loss": 0.6567, "step": 5014 }, { "epoch": 0.15370234154713744, "grad_norm": 1.212827057503624, "learning_rate": 9.604081757990175e-06, "loss": 0.7602, "step": 5015 }, { "epoch": 0.15373299006987864, "grad_norm": 1.4773924478914395, "learning_rate": 9.603888173102904e-06, "loss": 0.7273, "step": 5016 }, { "epoch": 0.15376363859261982, "grad_norm": 1.749772975272824, "learning_rate": 9.603694542852346e-06, "loss": 0.7923, "step": 5017 }, { "epoch": 0.15379428711536103, "grad_norm": 1.5722201111469913, "learning_rate": 9.603500867240408e-06, "loss": 0.7886, "step": 5018 }, { "epoch": 0.15382493563810223, "grad_norm": 1.3044292497324903, "learning_rate": 9.603307146268998e-06, "loss": 0.7187, "step": 5019 }, { "epoch": 0.15385558416084344, "grad_norm": 1.4017221605686778, "learning_rate": 9.603113379940024e-06, "loss": 0.7145, "step": 5020 }, { "epoch": 0.15388623268358465, "grad_norm": 1.32106096339812, "learning_rate": 9.602919568255396e-06, "loss": 0.651, "step": 5021 }, { "epoch": 0.15391688120632585, "grad_norm": 1.5460158695607493, "learning_rate": 9.602725711217025e-06, "loss": 0.716, "step": 5022 }, { "epoch": 0.15394752972906706, "grad_norm": 1.4521635028009603, "learning_rate": 9.60253180882682e-06, "loss": 0.8669, "step": 5023 }, { "epoch": 0.15397817825180826, "grad_norm": 1.3564414709361108, "learning_rate": 9.60233786108669e-06, "loss": 0.7239, "step": 5024 }, { "epoch": 0.15400882677454947, "grad_norm": 1.3893956719456402, "learning_rate": 9.602143867998548e-06, "loss": 0.7124, "step": 5025 }, { "epoch": 0.15403947529729067, "grad_norm": 1.6581226762820502, "learning_rate": 9.601949829564305e-06, "loss": 0.8318, "step": 5026 }, { "epoch": 0.15407012382003188, "grad_norm": 1.3764812755608884, "learning_rate": 9.601755745785873e-06, "loss": 0.7087, "step": 5027 }, { "epoch": 0.15410077234277308, "grad_norm": 1.3601315618321403, "learning_rate": 9.601561616665164e-06, "loss": 0.7557, "step": 5028 }, { "epoch": 0.1541314208655143, "grad_norm": 1.6731929211324956, "learning_rate": 9.601367442204093e-06, "loss": 0.7021, "step": 5029 }, { "epoch": 0.1541620693882555, "grad_norm": 1.397467246226019, "learning_rate": 9.601173222404568e-06, "loss": 0.6565, "step": 5030 }, { "epoch": 0.1541927179109967, "grad_norm": 0.5089530992730914, "learning_rate": 9.600978957268508e-06, "loss": 0.459, "step": 5031 }, { "epoch": 0.1542233664337379, "grad_norm": 1.3832361412215348, "learning_rate": 9.600784646797825e-06, "loss": 0.8044, "step": 5032 }, { "epoch": 0.15425401495647909, "grad_norm": 1.3811769798073243, "learning_rate": 9.600590290994434e-06, "loss": 0.7857, "step": 5033 }, { "epoch": 0.1542846634792203, "grad_norm": 1.439291462425552, "learning_rate": 9.600395889860248e-06, "loss": 0.6917, "step": 5034 }, { "epoch": 0.1543153120019615, "grad_norm": 1.4432315080258329, "learning_rate": 9.600201443397185e-06, "loss": 0.8004, "step": 5035 }, { "epoch": 0.1543459605247027, "grad_norm": 1.4697874307129983, "learning_rate": 9.600006951607162e-06, "loss": 0.7162, "step": 5036 }, { "epoch": 0.1543766090474439, "grad_norm": 1.4415498174707906, "learning_rate": 9.599812414492092e-06, "loss": 0.7145, "step": 5037 }, { "epoch": 0.1544072575701851, "grad_norm": 1.5854900740814017, "learning_rate": 9.599617832053893e-06, "loss": 0.6779, "step": 5038 }, { "epoch": 0.15443790609292632, "grad_norm": 1.2929417420704155, "learning_rate": 9.599423204294484e-06, "loss": 0.7717, "step": 5039 }, { "epoch": 0.15446855461566752, "grad_norm": 1.3195262969603752, "learning_rate": 9.599228531215779e-06, "loss": 0.7173, "step": 5040 }, { "epoch": 0.15449920313840873, "grad_norm": 1.4531800540289024, "learning_rate": 9.5990338128197e-06, "loss": 0.6694, "step": 5041 }, { "epoch": 0.15452985166114994, "grad_norm": 1.323335001286476, "learning_rate": 9.598839049108164e-06, "loss": 0.7531, "step": 5042 }, { "epoch": 0.15456050018389114, "grad_norm": 1.771148502279662, "learning_rate": 9.59864424008309e-06, "loss": 0.7787, "step": 5043 }, { "epoch": 0.15459114870663235, "grad_norm": 0.49976247629540704, "learning_rate": 9.598449385746399e-06, "loss": 0.4788, "step": 5044 }, { "epoch": 0.15462179722937355, "grad_norm": 1.3498739451016544, "learning_rate": 9.59825448610001e-06, "loss": 0.738, "step": 5045 }, { "epoch": 0.15465244575211476, "grad_norm": 1.3477521885605845, "learning_rate": 9.598059541145841e-06, "loss": 0.733, "step": 5046 }, { "epoch": 0.15468309427485596, "grad_norm": 1.26372134253343, "learning_rate": 9.597864550885816e-06, "loss": 0.7641, "step": 5047 }, { "epoch": 0.15471374279759714, "grad_norm": 1.3830639835450842, "learning_rate": 9.597669515321853e-06, "loss": 0.8381, "step": 5048 }, { "epoch": 0.15474439132033835, "grad_norm": 1.3976356892607216, "learning_rate": 9.597474434455878e-06, "loss": 0.7651, "step": 5049 }, { "epoch": 0.15477503984307955, "grad_norm": 0.4670985295787647, "learning_rate": 9.597279308289811e-06, "loss": 0.4876, "step": 5050 }, { "epoch": 0.15480568836582076, "grad_norm": 1.1669700635682103, "learning_rate": 9.597084136825573e-06, "loss": 0.6986, "step": 5051 }, { "epoch": 0.15483633688856197, "grad_norm": 1.3256404756746496, "learning_rate": 9.59688892006509e-06, "loss": 0.7727, "step": 5052 }, { "epoch": 0.15486698541130317, "grad_norm": 1.436859012126295, "learning_rate": 9.596693658010286e-06, "loss": 0.7197, "step": 5053 }, { "epoch": 0.15489763393404438, "grad_norm": 1.4316039085510788, "learning_rate": 9.596498350663082e-06, "loss": 0.7617, "step": 5054 }, { "epoch": 0.15492828245678558, "grad_norm": 1.3643855908795086, "learning_rate": 9.596302998025403e-06, "loss": 0.6627, "step": 5055 }, { "epoch": 0.1549589309795268, "grad_norm": 1.469330071387763, "learning_rate": 9.596107600099176e-06, "loss": 0.7308, "step": 5056 }, { "epoch": 0.154989579502268, "grad_norm": 1.395549580289019, "learning_rate": 9.595912156886323e-06, "loss": 0.7833, "step": 5057 }, { "epoch": 0.1550202280250092, "grad_norm": 1.4009377945750026, "learning_rate": 9.595716668388773e-06, "loss": 0.6855, "step": 5058 }, { "epoch": 0.1550508765477504, "grad_norm": 1.345731579923777, "learning_rate": 9.59552113460845e-06, "loss": 0.7737, "step": 5059 }, { "epoch": 0.1550815250704916, "grad_norm": 1.3417130291343646, "learning_rate": 9.595325555547281e-06, "loss": 0.6843, "step": 5060 }, { "epoch": 0.15511217359323282, "grad_norm": 0.494803176229646, "learning_rate": 9.595129931207194e-06, "loss": 0.4823, "step": 5061 }, { "epoch": 0.15514282211597402, "grad_norm": 1.2852071321970593, "learning_rate": 9.594934261590117e-06, "loss": 0.6801, "step": 5062 }, { "epoch": 0.15517347063871523, "grad_norm": 0.48757533813275705, "learning_rate": 9.594738546697977e-06, "loss": 0.4639, "step": 5063 }, { "epoch": 0.1552041191614564, "grad_norm": 0.46950702803577604, "learning_rate": 9.594542786532702e-06, "loss": 0.462, "step": 5064 }, { "epoch": 0.1552347676841976, "grad_norm": 1.3821422723155898, "learning_rate": 9.594346981096221e-06, "loss": 0.806, "step": 5065 }, { "epoch": 0.15526541620693882, "grad_norm": 1.377600489334748, "learning_rate": 9.594151130390463e-06, "loss": 0.6972, "step": 5066 }, { "epoch": 0.15529606472968002, "grad_norm": 0.4800133374794051, "learning_rate": 9.593955234417361e-06, "loss": 0.49, "step": 5067 }, { "epoch": 0.15532671325242123, "grad_norm": 1.3660965701205703, "learning_rate": 9.593759293178839e-06, "loss": 0.7328, "step": 5068 }, { "epoch": 0.15535736177516243, "grad_norm": 1.2694286689698178, "learning_rate": 9.593563306676835e-06, "loss": 0.6806, "step": 5069 }, { "epoch": 0.15538801029790364, "grad_norm": 1.6320660148944701, "learning_rate": 9.593367274913274e-06, "loss": 0.8269, "step": 5070 }, { "epoch": 0.15541865882064484, "grad_norm": 1.3168476259611026, "learning_rate": 9.59317119789009e-06, "loss": 0.734, "step": 5071 }, { "epoch": 0.15544930734338605, "grad_norm": 1.402357860205423, "learning_rate": 9.592975075609216e-06, "loss": 0.7158, "step": 5072 }, { "epoch": 0.15547995586612726, "grad_norm": 1.1322254165608552, "learning_rate": 9.592778908072583e-06, "loss": 0.6762, "step": 5073 }, { "epoch": 0.15551060438886846, "grad_norm": 1.3261119967151278, "learning_rate": 9.592582695282124e-06, "loss": 0.8088, "step": 5074 }, { "epoch": 0.15554125291160967, "grad_norm": 1.467438750035203, "learning_rate": 9.592386437239773e-06, "loss": 0.6186, "step": 5075 }, { "epoch": 0.15557190143435087, "grad_norm": 0.6275659620850175, "learning_rate": 9.592190133947465e-06, "loss": 0.4951, "step": 5076 }, { "epoch": 0.15560254995709208, "grad_norm": 1.565678105042639, "learning_rate": 9.59199378540713e-06, "loss": 0.7611, "step": 5077 }, { "epoch": 0.15563319847983328, "grad_norm": 1.246515489375651, "learning_rate": 9.591797391620708e-06, "loss": 0.6276, "step": 5078 }, { "epoch": 0.15566384700257446, "grad_norm": 1.2641014611838757, "learning_rate": 9.591600952590129e-06, "loss": 0.6389, "step": 5079 }, { "epoch": 0.15569449552531567, "grad_norm": 0.47577623709618927, "learning_rate": 9.59140446831733e-06, "loss": 0.4716, "step": 5080 }, { "epoch": 0.15572514404805687, "grad_norm": 1.4678231511480648, "learning_rate": 9.591207938804252e-06, "loss": 0.7883, "step": 5081 }, { "epoch": 0.15575579257079808, "grad_norm": 1.1993438636521905, "learning_rate": 9.591011364052825e-06, "loss": 0.7487, "step": 5082 }, { "epoch": 0.15578644109353929, "grad_norm": 1.3241001709388704, "learning_rate": 9.59081474406499e-06, "loss": 0.6834, "step": 5083 }, { "epoch": 0.1558170896162805, "grad_norm": 1.3498933602905598, "learning_rate": 9.590618078842679e-06, "loss": 0.7708, "step": 5084 }, { "epoch": 0.1558477381390217, "grad_norm": 1.3099856124645948, "learning_rate": 9.590421368387837e-06, "loss": 0.7143, "step": 5085 }, { "epoch": 0.1558783866617629, "grad_norm": 1.3753398796429526, "learning_rate": 9.590224612702398e-06, "loss": 0.7164, "step": 5086 }, { "epoch": 0.1559090351845041, "grad_norm": 1.4399037768969942, "learning_rate": 9.590027811788301e-06, "loss": 0.8552, "step": 5087 }, { "epoch": 0.1559396837072453, "grad_norm": 1.2932699605170166, "learning_rate": 9.589830965647487e-06, "loss": 0.751, "step": 5088 }, { "epoch": 0.15597033222998652, "grad_norm": 1.405302759655853, "learning_rate": 9.589634074281891e-06, "loss": 0.8443, "step": 5089 }, { "epoch": 0.15600098075272772, "grad_norm": 0.5776187358179267, "learning_rate": 9.589437137693459e-06, "loss": 0.479, "step": 5090 }, { "epoch": 0.15603162927546893, "grad_norm": 1.3569928431233869, "learning_rate": 9.589240155884128e-06, "loss": 0.6856, "step": 5091 }, { "epoch": 0.15606227779821014, "grad_norm": 1.249036697125073, "learning_rate": 9.589043128855838e-06, "loss": 0.7062, "step": 5092 }, { "epoch": 0.15609292632095134, "grad_norm": 1.3138745814666095, "learning_rate": 9.588846056610533e-06, "loss": 0.7754, "step": 5093 }, { "epoch": 0.15612357484369255, "grad_norm": 1.203299961683601, "learning_rate": 9.588648939150153e-06, "loss": 0.6954, "step": 5094 }, { "epoch": 0.15615422336643373, "grad_norm": 1.4312799973053232, "learning_rate": 9.588451776476643e-06, "loss": 0.7817, "step": 5095 }, { "epoch": 0.15618487188917493, "grad_norm": 0.46637659513876645, "learning_rate": 9.588254568591942e-06, "loss": 0.4744, "step": 5096 }, { "epoch": 0.15621552041191614, "grad_norm": 1.2686645850158387, "learning_rate": 9.588057315497995e-06, "loss": 0.7288, "step": 5097 }, { "epoch": 0.15624616893465734, "grad_norm": 1.3004965996970972, "learning_rate": 9.587860017196747e-06, "loss": 0.7093, "step": 5098 }, { "epoch": 0.15627681745739855, "grad_norm": 1.2202431396449254, "learning_rate": 9.587662673690137e-06, "loss": 0.5768, "step": 5099 }, { "epoch": 0.15630746598013975, "grad_norm": 1.4441090756566284, "learning_rate": 9.587465284980115e-06, "loss": 0.6602, "step": 5100 }, { "epoch": 0.15633811450288096, "grad_norm": 1.307613850046131, "learning_rate": 9.587267851068624e-06, "loss": 0.6342, "step": 5101 }, { "epoch": 0.15636876302562217, "grad_norm": 1.3992680947745264, "learning_rate": 9.587070371957608e-06, "loss": 0.6839, "step": 5102 }, { "epoch": 0.15639941154836337, "grad_norm": 1.480723154252511, "learning_rate": 9.586872847649016e-06, "loss": 0.7325, "step": 5103 }, { "epoch": 0.15643006007110458, "grad_norm": 1.328321364653855, "learning_rate": 9.58667527814479e-06, "loss": 0.8425, "step": 5104 }, { "epoch": 0.15646070859384578, "grad_norm": 1.3487102297549165, "learning_rate": 9.58647766344688e-06, "loss": 0.7098, "step": 5105 }, { "epoch": 0.156491357116587, "grad_norm": 1.3692596664767849, "learning_rate": 9.58628000355723e-06, "loss": 0.719, "step": 5106 }, { "epoch": 0.1565220056393282, "grad_norm": 1.4124933057891527, "learning_rate": 9.586082298477794e-06, "loss": 0.6018, "step": 5107 }, { "epoch": 0.1565526541620694, "grad_norm": 1.2890012683696621, "learning_rate": 9.585884548210513e-06, "loss": 0.6638, "step": 5108 }, { "epoch": 0.1565833026848106, "grad_norm": 0.5066058891249318, "learning_rate": 9.585686752757339e-06, "loss": 0.4683, "step": 5109 }, { "epoch": 0.15661395120755178, "grad_norm": 1.283292114297016, "learning_rate": 9.58548891212022e-06, "loss": 0.8968, "step": 5110 }, { "epoch": 0.156644599730293, "grad_norm": 1.401611806242774, "learning_rate": 9.585291026301105e-06, "loss": 0.8094, "step": 5111 }, { "epoch": 0.1566752482530342, "grad_norm": 1.347559739836886, "learning_rate": 9.585093095301944e-06, "loss": 0.7607, "step": 5112 }, { "epoch": 0.1567058967757754, "grad_norm": 1.1274381315096331, "learning_rate": 9.584895119124688e-06, "loss": 0.6, "step": 5113 }, { "epoch": 0.1567365452985166, "grad_norm": 1.2664148493145673, "learning_rate": 9.584697097771287e-06, "loss": 0.6337, "step": 5114 }, { "epoch": 0.1567671938212578, "grad_norm": 1.3326953457633846, "learning_rate": 9.584499031243693e-06, "loss": 0.7621, "step": 5115 }, { "epoch": 0.15679784234399902, "grad_norm": 1.3133356066253674, "learning_rate": 9.584300919543856e-06, "loss": 0.7016, "step": 5116 }, { "epoch": 0.15682849086674022, "grad_norm": 1.1365395942229215, "learning_rate": 9.58410276267373e-06, "loss": 0.6964, "step": 5117 }, { "epoch": 0.15685913938948143, "grad_norm": 1.3457012954743313, "learning_rate": 9.583904560635267e-06, "loss": 0.8503, "step": 5118 }, { "epoch": 0.15688978791222263, "grad_norm": 0.5037420309434292, "learning_rate": 9.583706313430418e-06, "loss": 0.4853, "step": 5119 }, { "epoch": 0.15692043643496384, "grad_norm": 1.4323744276364032, "learning_rate": 9.583508021061141e-06, "loss": 0.626, "step": 5120 }, { "epoch": 0.15695108495770504, "grad_norm": 1.4252259554015223, "learning_rate": 9.583309683529384e-06, "loss": 0.7701, "step": 5121 }, { "epoch": 0.15698173348044625, "grad_norm": 1.4560752920440287, "learning_rate": 9.583111300837105e-06, "loss": 0.7438, "step": 5122 }, { "epoch": 0.15701238200318746, "grad_norm": 1.409131677421054, "learning_rate": 9.582912872986256e-06, "loss": 0.7676, "step": 5123 }, { "epoch": 0.15704303052592866, "grad_norm": 1.3491113673809747, "learning_rate": 9.582714399978796e-06, "loss": 0.7712, "step": 5124 }, { "epoch": 0.15707367904866987, "grad_norm": 1.3119766905324932, "learning_rate": 9.582515881816678e-06, "loss": 0.7746, "step": 5125 }, { "epoch": 0.15710432757141105, "grad_norm": 0.47778902616376084, "learning_rate": 9.582317318501859e-06, "loss": 0.4875, "step": 5126 }, { "epoch": 0.15713497609415225, "grad_norm": 1.4890643067640403, "learning_rate": 9.582118710036293e-06, "loss": 0.6713, "step": 5127 }, { "epoch": 0.15716562461689346, "grad_norm": 1.3533068203468408, "learning_rate": 9.58192005642194e-06, "loss": 0.6695, "step": 5128 }, { "epoch": 0.15719627313963466, "grad_norm": 0.45072947911971906, "learning_rate": 9.581721357660757e-06, "loss": 0.4455, "step": 5129 }, { "epoch": 0.15722692166237587, "grad_norm": 1.1580941024503646, "learning_rate": 9.581522613754702e-06, "loss": 0.5004, "step": 5130 }, { "epoch": 0.15725757018511707, "grad_norm": 0.47890000188889476, "learning_rate": 9.58132382470573e-06, "loss": 0.4563, "step": 5131 }, { "epoch": 0.15728821870785828, "grad_norm": 1.2936759157682425, "learning_rate": 9.581124990515805e-06, "loss": 0.7121, "step": 5132 }, { "epoch": 0.15731886723059949, "grad_norm": 1.387849776619019, "learning_rate": 9.58092611118688e-06, "loss": 0.7347, "step": 5133 }, { "epoch": 0.1573495157533407, "grad_norm": 1.3699597767464542, "learning_rate": 9.580727186720919e-06, "loss": 0.7887, "step": 5134 }, { "epoch": 0.1573801642760819, "grad_norm": 1.380284298951223, "learning_rate": 9.580528217119882e-06, "loss": 0.7502, "step": 5135 }, { "epoch": 0.1574108127988231, "grad_norm": 1.2224363477015812, "learning_rate": 9.580329202385729e-06, "loss": 0.7187, "step": 5136 }, { "epoch": 0.1574414613215643, "grad_norm": 1.3601195898203249, "learning_rate": 9.580130142520419e-06, "loss": 0.7424, "step": 5137 }, { "epoch": 0.1574721098443055, "grad_norm": 1.2131938512911478, "learning_rate": 9.579931037525915e-06, "loss": 0.6547, "step": 5138 }, { "epoch": 0.15750275836704672, "grad_norm": 1.2130512387456172, "learning_rate": 9.57973188740418e-06, "loss": 0.7372, "step": 5139 }, { "epoch": 0.15753340688978792, "grad_norm": 1.488139587587637, "learning_rate": 9.579532692157174e-06, "loss": 0.7619, "step": 5140 }, { "epoch": 0.1575640554125291, "grad_norm": 1.3297257933112063, "learning_rate": 9.57933345178686e-06, "loss": 0.8313, "step": 5141 }, { "epoch": 0.1575947039352703, "grad_norm": 1.3427749966878615, "learning_rate": 9.579134166295203e-06, "loss": 0.7322, "step": 5142 }, { "epoch": 0.15762535245801151, "grad_norm": 1.425237862465516, "learning_rate": 9.578934835684166e-06, "loss": 0.6261, "step": 5143 }, { "epoch": 0.15765600098075272, "grad_norm": 1.1532660587865375, "learning_rate": 9.57873545995571e-06, "loss": 0.6908, "step": 5144 }, { "epoch": 0.15768664950349393, "grad_norm": 1.8830565836045134, "learning_rate": 9.578536039111806e-06, "loss": 0.6895, "step": 5145 }, { "epoch": 0.15771729802623513, "grad_norm": 1.3090943698249, "learning_rate": 9.578336573154411e-06, "loss": 0.7065, "step": 5146 }, { "epoch": 0.15774794654897634, "grad_norm": 1.4019480347897677, "learning_rate": 9.578137062085496e-06, "loss": 0.8485, "step": 5147 }, { "epoch": 0.15777859507171754, "grad_norm": 1.3246634289601737, "learning_rate": 9.577937505907024e-06, "loss": 0.6815, "step": 5148 }, { "epoch": 0.15780924359445875, "grad_norm": 1.1982713559351357, "learning_rate": 9.577737904620963e-06, "loss": 0.6844, "step": 5149 }, { "epoch": 0.15783989211719995, "grad_norm": 1.3382871894385853, "learning_rate": 9.57753825822928e-06, "loss": 0.7417, "step": 5150 }, { "epoch": 0.15787054063994116, "grad_norm": 1.404875311083307, "learning_rate": 9.57733856673394e-06, "loss": 0.703, "step": 5151 }, { "epoch": 0.15790118916268236, "grad_norm": 0.6059606044086078, "learning_rate": 9.577138830136914e-06, "loss": 0.465, "step": 5152 }, { "epoch": 0.15793183768542357, "grad_norm": 1.2148751119852212, "learning_rate": 9.576939048440167e-06, "loss": 0.7179, "step": 5153 }, { "epoch": 0.15796248620816478, "grad_norm": 1.4155988339666152, "learning_rate": 9.576739221645669e-06, "loss": 0.7754, "step": 5154 }, { "epoch": 0.15799313473090598, "grad_norm": 1.4379005683244073, "learning_rate": 9.576539349755387e-06, "loss": 0.7828, "step": 5155 }, { "epoch": 0.1580237832536472, "grad_norm": 1.3590452572400116, "learning_rate": 9.576339432771293e-06, "loss": 0.7819, "step": 5156 }, { "epoch": 0.15805443177638837, "grad_norm": 1.4215124534441714, "learning_rate": 9.576139470695353e-06, "loss": 0.7972, "step": 5157 }, { "epoch": 0.15808508029912957, "grad_norm": 1.327217516084382, "learning_rate": 9.575939463529544e-06, "loss": 0.7734, "step": 5158 }, { "epoch": 0.15811572882187078, "grad_norm": 1.3578984724561851, "learning_rate": 9.57573941127583e-06, "loss": 0.6922, "step": 5159 }, { "epoch": 0.15814637734461198, "grad_norm": 1.3306561421863128, "learning_rate": 9.575539313936186e-06, "loss": 0.6348, "step": 5160 }, { "epoch": 0.1581770258673532, "grad_norm": 1.4145513874025735, "learning_rate": 9.575339171512582e-06, "loss": 0.7681, "step": 5161 }, { "epoch": 0.1582076743900944, "grad_norm": 1.214027412379885, "learning_rate": 9.57513898400699e-06, "loss": 0.7293, "step": 5162 }, { "epoch": 0.1582383229128356, "grad_norm": 1.3326986149211328, "learning_rate": 9.574938751421384e-06, "loss": 0.7515, "step": 5163 }, { "epoch": 0.1582689714355768, "grad_norm": 1.334334441240776, "learning_rate": 9.574738473757737e-06, "loss": 0.7326, "step": 5164 }, { "epoch": 0.158299619958318, "grad_norm": 0.5556596376238071, "learning_rate": 9.574538151018018e-06, "loss": 0.4412, "step": 5165 }, { "epoch": 0.15833026848105922, "grad_norm": 1.468523859881494, "learning_rate": 9.574337783204206e-06, "loss": 0.808, "step": 5166 }, { "epoch": 0.15836091700380042, "grad_norm": 1.2459665203726127, "learning_rate": 9.574137370318275e-06, "loss": 0.7201, "step": 5167 }, { "epoch": 0.15839156552654163, "grad_norm": 1.2651451435457204, "learning_rate": 9.573936912362195e-06, "loss": 0.7658, "step": 5168 }, { "epoch": 0.15842221404928283, "grad_norm": 1.3163192673689943, "learning_rate": 9.573736409337946e-06, "loss": 0.6724, "step": 5169 }, { "epoch": 0.15845286257202404, "grad_norm": 1.3369477103647434, "learning_rate": 9.573535861247502e-06, "loss": 0.7417, "step": 5170 }, { "epoch": 0.15848351109476524, "grad_norm": 0.49010173699614784, "learning_rate": 9.573335268092839e-06, "loss": 0.4645, "step": 5171 }, { "epoch": 0.15851415961750642, "grad_norm": 1.5001497837962283, "learning_rate": 9.573134629875934e-06, "loss": 0.6172, "step": 5172 }, { "epoch": 0.15854480814024763, "grad_norm": 1.4533583404393369, "learning_rate": 9.572933946598761e-06, "loss": 0.7113, "step": 5173 }, { "epoch": 0.15857545666298883, "grad_norm": 1.219619807364243, "learning_rate": 9.572733218263302e-06, "loss": 0.7108, "step": 5174 }, { "epoch": 0.15860610518573004, "grad_norm": 1.3544591816966782, "learning_rate": 9.572532444871532e-06, "loss": 0.8292, "step": 5175 }, { "epoch": 0.15863675370847125, "grad_norm": 1.324421859830916, "learning_rate": 9.57233162642543e-06, "loss": 0.7778, "step": 5176 }, { "epoch": 0.15866740223121245, "grad_norm": 1.4481288029660386, "learning_rate": 9.572130762926975e-06, "loss": 0.7758, "step": 5177 }, { "epoch": 0.15869805075395366, "grad_norm": 1.3055502485902755, "learning_rate": 9.571929854378144e-06, "loss": 0.7549, "step": 5178 }, { "epoch": 0.15872869927669486, "grad_norm": 1.3429749756516423, "learning_rate": 9.57172890078092e-06, "loss": 0.7496, "step": 5179 }, { "epoch": 0.15875934779943607, "grad_norm": 1.3834294503629743, "learning_rate": 9.57152790213728e-06, "loss": 0.708, "step": 5180 }, { "epoch": 0.15878999632217727, "grad_norm": 1.3218112576182002, "learning_rate": 9.571326858449209e-06, "loss": 0.7184, "step": 5181 }, { "epoch": 0.15882064484491848, "grad_norm": 1.2822375702217554, "learning_rate": 9.57112576971868e-06, "loss": 0.781, "step": 5182 }, { "epoch": 0.15885129336765968, "grad_norm": 1.5612296323492594, "learning_rate": 9.570924635947682e-06, "loss": 0.8385, "step": 5183 }, { "epoch": 0.1588819418904009, "grad_norm": 1.2135580880057202, "learning_rate": 9.570723457138196e-06, "loss": 0.6337, "step": 5184 }, { "epoch": 0.1589125904131421, "grad_norm": 1.3223414930903161, "learning_rate": 9.5705222332922e-06, "loss": 0.7726, "step": 5185 }, { "epoch": 0.1589432389358833, "grad_norm": 1.4596403979738612, "learning_rate": 9.570320964411678e-06, "loss": 0.7481, "step": 5186 }, { "epoch": 0.1589738874586245, "grad_norm": 0.5266263452038891, "learning_rate": 9.570119650498617e-06, "loss": 0.4646, "step": 5187 }, { "epoch": 0.15900453598136569, "grad_norm": 1.2009405768120174, "learning_rate": 9.569918291554995e-06, "loss": 0.6424, "step": 5188 }, { "epoch": 0.1590351845041069, "grad_norm": 1.4549366879318455, "learning_rate": 9.569716887582801e-06, "loss": 0.7991, "step": 5189 }, { "epoch": 0.1590658330268481, "grad_norm": 1.2549644083834242, "learning_rate": 9.569515438584016e-06, "loss": 0.6983, "step": 5190 }, { "epoch": 0.1590964815495893, "grad_norm": 1.3062424008744704, "learning_rate": 9.569313944560628e-06, "loss": 0.8174, "step": 5191 }, { "epoch": 0.1591271300723305, "grad_norm": 0.448113840100462, "learning_rate": 9.569112405514619e-06, "loss": 0.4586, "step": 5192 }, { "epoch": 0.15915777859507171, "grad_norm": 1.2795583056972624, "learning_rate": 9.568910821447976e-06, "loss": 0.7915, "step": 5193 }, { "epoch": 0.15918842711781292, "grad_norm": 1.334349014152037, "learning_rate": 9.568709192362687e-06, "loss": 0.7306, "step": 5194 }, { "epoch": 0.15921907564055413, "grad_norm": 1.5959974259100442, "learning_rate": 9.568507518260737e-06, "loss": 0.731, "step": 5195 }, { "epoch": 0.15924972416329533, "grad_norm": 1.3560336052085973, "learning_rate": 9.568305799144112e-06, "loss": 0.7396, "step": 5196 }, { "epoch": 0.15928037268603654, "grad_norm": 1.2202686432199104, "learning_rate": 9.568104035014802e-06, "loss": 0.6423, "step": 5197 }, { "epoch": 0.15931102120877774, "grad_norm": 1.4504441354208062, "learning_rate": 9.567902225874794e-06, "loss": 0.7075, "step": 5198 }, { "epoch": 0.15934166973151895, "grad_norm": 1.3069204083526156, "learning_rate": 9.567700371726079e-06, "loss": 0.7523, "step": 5199 }, { "epoch": 0.15937231825426015, "grad_norm": 1.5411076645361814, "learning_rate": 9.56749847257064e-06, "loss": 0.7426, "step": 5200 }, { "epoch": 0.15940296677700136, "grad_norm": 1.162238504393686, "learning_rate": 9.567296528410472e-06, "loss": 0.6964, "step": 5201 }, { "epoch": 0.15943361529974256, "grad_norm": 1.3160336234938905, "learning_rate": 9.567094539247562e-06, "loss": 0.7232, "step": 5202 }, { "epoch": 0.15946426382248374, "grad_norm": 1.3220508015980306, "learning_rate": 9.566892505083903e-06, "loss": 0.6999, "step": 5203 }, { "epoch": 0.15949491234522495, "grad_norm": 1.3761333959254738, "learning_rate": 9.566690425921482e-06, "loss": 0.6728, "step": 5204 }, { "epoch": 0.15952556086796615, "grad_norm": 1.4149032344613046, "learning_rate": 9.56648830176229e-06, "loss": 0.7281, "step": 5205 }, { "epoch": 0.15955620939070736, "grad_norm": 0.5402237117535404, "learning_rate": 9.566286132608322e-06, "loss": 0.4723, "step": 5206 }, { "epoch": 0.15958685791344857, "grad_norm": 1.2813748561906098, "learning_rate": 9.566083918461569e-06, "loss": 0.7111, "step": 5207 }, { "epoch": 0.15961750643618977, "grad_norm": 1.7070977669648983, "learning_rate": 9.565881659324021e-06, "loss": 0.7002, "step": 5208 }, { "epoch": 0.15964815495893098, "grad_norm": 1.2569079360388802, "learning_rate": 9.565679355197674e-06, "loss": 0.8089, "step": 5209 }, { "epoch": 0.15967880348167218, "grad_norm": 1.193860502037441, "learning_rate": 9.565477006084521e-06, "loss": 0.6409, "step": 5210 }, { "epoch": 0.1597094520044134, "grad_norm": 1.1746172190108866, "learning_rate": 9.565274611986555e-06, "loss": 0.6113, "step": 5211 }, { "epoch": 0.1597401005271546, "grad_norm": 1.2369594454021535, "learning_rate": 9.565072172905768e-06, "loss": 0.6898, "step": 5212 }, { "epoch": 0.1597707490498958, "grad_norm": 1.2281136186929529, "learning_rate": 9.564869688844158e-06, "loss": 0.6583, "step": 5213 }, { "epoch": 0.159801397572637, "grad_norm": 1.0947007494079135, "learning_rate": 9.564667159803719e-06, "loss": 0.6065, "step": 5214 }, { "epoch": 0.1598320460953782, "grad_norm": 1.358922259695214, "learning_rate": 9.564464585786447e-06, "loss": 0.7483, "step": 5215 }, { "epoch": 0.15986269461811942, "grad_norm": 1.425424064973399, "learning_rate": 9.564261966794337e-06, "loss": 0.6979, "step": 5216 }, { "epoch": 0.15989334314086062, "grad_norm": 1.958248892405114, "learning_rate": 9.564059302829386e-06, "loss": 0.6583, "step": 5217 }, { "epoch": 0.15992399166360183, "grad_norm": 1.4033462233131548, "learning_rate": 9.563856593893593e-06, "loss": 0.7134, "step": 5218 }, { "epoch": 0.159954640186343, "grad_norm": 1.3726541632683062, "learning_rate": 9.563653839988951e-06, "loss": 0.7117, "step": 5219 }, { "epoch": 0.1599852887090842, "grad_norm": 1.2163944335931522, "learning_rate": 9.56345104111746e-06, "loss": 0.7085, "step": 5220 }, { "epoch": 0.16001593723182542, "grad_norm": 1.2529345027697556, "learning_rate": 9.563248197281119e-06, "loss": 0.7682, "step": 5221 }, { "epoch": 0.16004658575456662, "grad_norm": 1.3879857238247466, "learning_rate": 9.563045308481926e-06, "loss": 0.7747, "step": 5222 }, { "epoch": 0.16007723427730783, "grad_norm": 1.379292911379295, "learning_rate": 9.56284237472188e-06, "loss": 0.7587, "step": 5223 }, { "epoch": 0.16010788280004903, "grad_norm": 1.2970053757942817, "learning_rate": 9.562639396002979e-06, "loss": 0.762, "step": 5224 }, { "epoch": 0.16013853132279024, "grad_norm": 1.1632335523666202, "learning_rate": 9.562436372327227e-06, "loss": 0.6464, "step": 5225 }, { "epoch": 0.16016917984553145, "grad_norm": 1.15747362266541, "learning_rate": 9.562233303696623e-06, "loss": 0.744, "step": 5226 }, { "epoch": 0.16019982836827265, "grad_norm": 1.3391708799722029, "learning_rate": 9.562030190113163e-06, "loss": 0.6822, "step": 5227 }, { "epoch": 0.16023047689101386, "grad_norm": 1.3668037820400727, "learning_rate": 9.561827031578855e-06, "loss": 0.7398, "step": 5228 }, { "epoch": 0.16026112541375506, "grad_norm": 1.2614706770542488, "learning_rate": 9.561623828095697e-06, "loss": 0.6503, "step": 5229 }, { "epoch": 0.16029177393649627, "grad_norm": 1.3017435934281003, "learning_rate": 9.561420579665692e-06, "loss": 0.7497, "step": 5230 }, { "epoch": 0.16032242245923747, "grad_norm": 0.6311806783811903, "learning_rate": 9.561217286290845e-06, "loss": 0.4526, "step": 5231 }, { "epoch": 0.16035307098197868, "grad_norm": 0.584219251470869, "learning_rate": 9.561013947973155e-06, "loss": 0.4786, "step": 5232 }, { "epoch": 0.16038371950471988, "grad_norm": 1.5517036025466786, "learning_rate": 9.560810564714629e-06, "loss": 0.622, "step": 5233 }, { "epoch": 0.16041436802746106, "grad_norm": 1.2096294941470098, "learning_rate": 9.560607136517268e-06, "loss": 0.6271, "step": 5234 }, { "epoch": 0.16044501655020227, "grad_norm": 0.5420049623766122, "learning_rate": 9.56040366338308e-06, "loss": 0.4701, "step": 5235 }, { "epoch": 0.16047566507294347, "grad_norm": 1.5236916879624776, "learning_rate": 9.560200145314067e-06, "loss": 0.7414, "step": 5236 }, { "epoch": 0.16050631359568468, "grad_norm": 1.3750340236545495, "learning_rate": 9.559996582312235e-06, "loss": 0.7697, "step": 5237 }, { "epoch": 0.16053696211842589, "grad_norm": 1.4329890596759274, "learning_rate": 9.55979297437959e-06, "loss": 0.6081, "step": 5238 }, { "epoch": 0.1605676106411671, "grad_norm": 1.3997404321536566, "learning_rate": 9.559589321518137e-06, "loss": 0.7893, "step": 5239 }, { "epoch": 0.1605982591639083, "grad_norm": 0.7211562315370623, "learning_rate": 9.559385623729886e-06, "loss": 0.4582, "step": 5240 }, { "epoch": 0.1606289076866495, "grad_norm": 1.5471890380728328, "learning_rate": 9.55918188101684e-06, "loss": 0.8143, "step": 5241 }, { "epoch": 0.1606595562093907, "grad_norm": 1.327152279710845, "learning_rate": 9.558978093381008e-06, "loss": 0.7028, "step": 5242 }, { "epoch": 0.1606902047321319, "grad_norm": 1.309807107380194, "learning_rate": 9.5587742608244e-06, "loss": 0.692, "step": 5243 }, { "epoch": 0.16072085325487312, "grad_norm": 1.3957053270459965, "learning_rate": 9.558570383349023e-06, "loss": 0.7647, "step": 5244 }, { "epoch": 0.16075150177761433, "grad_norm": 1.252292403396431, "learning_rate": 9.558366460956885e-06, "loss": 0.762, "step": 5245 }, { "epoch": 0.16078215030035553, "grad_norm": 1.3515160068877008, "learning_rate": 9.558162493649996e-06, "loss": 0.7262, "step": 5246 }, { "epoch": 0.16081279882309674, "grad_norm": 1.1214325110319068, "learning_rate": 9.557958481430365e-06, "loss": 0.6398, "step": 5247 }, { "epoch": 0.16084344734583794, "grad_norm": 1.524175825203456, "learning_rate": 9.557754424300004e-06, "loss": 0.7597, "step": 5248 }, { "epoch": 0.16087409586857915, "grad_norm": 1.2851121403104735, "learning_rate": 9.557550322260921e-06, "loss": 0.6742, "step": 5249 }, { "epoch": 0.16090474439132033, "grad_norm": 1.4186269575034935, "learning_rate": 9.55734617531513e-06, "loss": 0.7254, "step": 5250 }, { "epoch": 0.16093539291406153, "grad_norm": 0.5907628276999426, "learning_rate": 9.557141983464641e-06, "loss": 0.4555, "step": 5251 }, { "epoch": 0.16096604143680274, "grad_norm": 1.256495356445065, "learning_rate": 9.556937746711466e-06, "loss": 0.5499, "step": 5252 }, { "epoch": 0.16099668995954394, "grad_norm": 1.2179466793431675, "learning_rate": 9.556733465057617e-06, "loss": 0.7626, "step": 5253 }, { "epoch": 0.16102733848228515, "grad_norm": 1.3328334850531012, "learning_rate": 9.556529138505108e-06, "loss": 0.7578, "step": 5254 }, { "epoch": 0.16105798700502635, "grad_norm": 1.283701501154442, "learning_rate": 9.556324767055952e-06, "loss": 0.6873, "step": 5255 }, { "epoch": 0.16108863552776756, "grad_norm": 1.4189648433957631, "learning_rate": 9.556120350712158e-06, "loss": 0.7284, "step": 5256 }, { "epoch": 0.16111928405050877, "grad_norm": 1.3082103728575205, "learning_rate": 9.55591588947575e-06, "loss": 0.8058, "step": 5257 }, { "epoch": 0.16114993257324997, "grad_norm": 1.326120572063338, "learning_rate": 9.555711383348734e-06, "loss": 0.6701, "step": 5258 }, { "epoch": 0.16118058109599118, "grad_norm": 1.3543761650795139, "learning_rate": 9.555506832333131e-06, "loss": 0.5976, "step": 5259 }, { "epoch": 0.16121122961873238, "grad_norm": 1.3266773425312584, "learning_rate": 9.55530223643095e-06, "loss": 0.777, "step": 5260 }, { "epoch": 0.1612418781414736, "grad_norm": 1.305606691774874, "learning_rate": 9.555097595644212e-06, "loss": 0.7731, "step": 5261 }, { "epoch": 0.1612725266642148, "grad_norm": 1.3509943748173117, "learning_rate": 9.554892909974933e-06, "loss": 0.6527, "step": 5262 }, { "epoch": 0.161303175186956, "grad_norm": 1.2544594449848403, "learning_rate": 9.554688179425126e-06, "loss": 0.8005, "step": 5263 }, { "epoch": 0.1613338237096972, "grad_norm": 1.201112421491776, "learning_rate": 9.554483403996813e-06, "loss": 0.7499, "step": 5264 }, { "epoch": 0.16136447223243838, "grad_norm": 1.4555901170977517, "learning_rate": 9.554278583692009e-06, "loss": 0.7817, "step": 5265 }, { "epoch": 0.1613951207551796, "grad_norm": 1.3643573561799929, "learning_rate": 9.554073718512735e-06, "loss": 0.7773, "step": 5266 }, { "epoch": 0.1614257692779208, "grad_norm": 1.3348447095721307, "learning_rate": 9.553868808461004e-06, "loss": 0.6803, "step": 5267 }, { "epoch": 0.161456417800662, "grad_norm": 1.4332877118225444, "learning_rate": 9.553663853538841e-06, "loss": 0.6732, "step": 5268 }, { "epoch": 0.1614870663234032, "grad_norm": 1.2856532652361512, "learning_rate": 9.553458853748263e-06, "loss": 0.7266, "step": 5269 }, { "epoch": 0.1615177148461444, "grad_norm": 1.473967988462643, "learning_rate": 9.553253809091287e-06, "loss": 0.7504, "step": 5270 }, { "epoch": 0.16154836336888562, "grad_norm": 1.3108691247599507, "learning_rate": 9.55304871956994e-06, "loss": 0.7369, "step": 5271 }, { "epoch": 0.16157901189162682, "grad_norm": 1.2775623100461677, "learning_rate": 9.552843585186237e-06, "loss": 0.6626, "step": 5272 }, { "epoch": 0.16160966041436803, "grad_norm": 1.3494474927237008, "learning_rate": 9.552638405942201e-06, "loss": 0.6979, "step": 5273 }, { "epoch": 0.16164030893710923, "grad_norm": 1.3797032229478827, "learning_rate": 9.552433181839855e-06, "loss": 0.7035, "step": 5274 }, { "epoch": 0.16167095745985044, "grad_norm": 1.2706814166797396, "learning_rate": 9.55222791288122e-06, "loss": 0.7072, "step": 5275 }, { "epoch": 0.16170160598259165, "grad_norm": 1.288800702741437, "learning_rate": 9.552022599068317e-06, "loss": 0.7305, "step": 5276 }, { "epoch": 0.16173225450533285, "grad_norm": 1.333524798732837, "learning_rate": 9.551817240403172e-06, "loss": 0.7104, "step": 5277 }, { "epoch": 0.16176290302807406, "grad_norm": 1.4783784677014968, "learning_rate": 9.551611836887807e-06, "loss": 0.6926, "step": 5278 }, { "epoch": 0.16179355155081526, "grad_norm": 1.4190346905082603, "learning_rate": 9.551406388524244e-06, "loss": 0.7441, "step": 5279 }, { "epoch": 0.16182420007355647, "grad_norm": 1.4683559157121768, "learning_rate": 9.551200895314512e-06, "loss": 0.7895, "step": 5280 }, { "epoch": 0.16185484859629765, "grad_norm": 0.47753737188916995, "learning_rate": 9.550995357260633e-06, "loss": 0.4555, "step": 5281 }, { "epoch": 0.16188549711903885, "grad_norm": 1.4876254036588343, "learning_rate": 9.550789774364632e-06, "loss": 0.6793, "step": 5282 }, { "epoch": 0.16191614564178006, "grad_norm": 1.2539940569124262, "learning_rate": 9.550584146628534e-06, "loss": 0.6486, "step": 5283 }, { "epoch": 0.16194679416452126, "grad_norm": 1.3583215650742828, "learning_rate": 9.550378474054367e-06, "loss": 0.7009, "step": 5284 }, { "epoch": 0.16197744268726247, "grad_norm": 1.3726927233276782, "learning_rate": 9.550172756644156e-06, "loss": 0.7477, "step": 5285 }, { "epoch": 0.16200809121000367, "grad_norm": 1.3014030117713418, "learning_rate": 9.549966994399928e-06, "loss": 0.654, "step": 5286 }, { "epoch": 0.16203873973274488, "grad_norm": 1.366552488511825, "learning_rate": 9.549761187323714e-06, "loss": 0.8204, "step": 5287 }, { "epoch": 0.16206938825548609, "grad_norm": 1.3833600145133207, "learning_rate": 9.549555335417535e-06, "loss": 0.6844, "step": 5288 }, { "epoch": 0.1621000367782273, "grad_norm": 1.27694875367655, "learning_rate": 9.549349438683426e-06, "loss": 0.7876, "step": 5289 }, { "epoch": 0.1621306853009685, "grad_norm": 1.4215352786438025, "learning_rate": 9.549143497123412e-06, "loss": 0.6823, "step": 5290 }, { "epoch": 0.1621613338237097, "grad_norm": 2.0799617123498693, "learning_rate": 9.548937510739524e-06, "loss": 0.7092, "step": 5291 }, { "epoch": 0.1621919823464509, "grad_norm": 1.2904274400634461, "learning_rate": 9.54873147953379e-06, "loss": 0.7264, "step": 5292 }, { "epoch": 0.1622226308691921, "grad_norm": 1.1974722101474504, "learning_rate": 9.548525403508241e-06, "loss": 0.679, "step": 5293 }, { "epoch": 0.16225327939193332, "grad_norm": 0.49672240778025895, "learning_rate": 9.548319282664906e-06, "loss": 0.4504, "step": 5294 }, { "epoch": 0.16228392791467453, "grad_norm": 1.6083120282029932, "learning_rate": 9.54811311700582e-06, "loss": 0.7423, "step": 5295 }, { "epoch": 0.1623145764374157, "grad_norm": 1.4332028680688012, "learning_rate": 9.54790690653301e-06, "loss": 0.766, "step": 5296 }, { "epoch": 0.1623452249601569, "grad_norm": 1.5247861069133553, "learning_rate": 9.54770065124851e-06, "loss": 0.715, "step": 5297 }, { "epoch": 0.16237587348289811, "grad_norm": 1.3425926336150131, "learning_rate": 9.547494351154352e-06, "loss": 0.8756, "step": 5298 }, { "epoch": 0.16240652200563932, "grad_norm": 1.1510402427621582, "learning_rate": 9.547288006252568e-06, "loss": 0.6115, "step": 5299 }, { "epoch": 0.16243717052838053, "grad_norm": 0.5003114432726815, "learning_rate": 9.547081616545193e-06, "loss": 0.4752, "step": 5300 }, { "epoch": 0.16246781905112173, "grad_norm": 1.26264874571724, "learning_rate": 9.54687518203426e-06, "loss": 0.6884, "step": 5301 }, { "epoch": 0.16249846757386294, "grad_norm": 1.5233052660810555, "learning_rate": 9.546668702721801e-06, "loss": 0.6589, "step": 5302 }, { "epoch": 0.16252911609660414, "grad_norm": 1.439244209369251, "learning_rate": 9.546462178609852e-06, "loss": 0.6322, "step": 5303 }, { "epoch": 0.16255976461934535, "grad_norm": 1.2744326811710125, "learning_rate": 9.546255609700447e-06, "loss": 0.7919, "step": 5304 }, { "epoch": 0.16259041314208655, "grad_norm": 1.435556477455769, "learning_rate": 9.546048995995625e-06, "loss": 0.7628, "step": 5305 }, { "epoch": 0.16262106166482776, "grad_norm": 0.4771927488412662, "learning_rate": 9.545842337497417e-06, "loss": 0.4741, "step": 5306 }, { "epoch": 0.16265171018756897, "grad_norm": 1.2269574244579475, "learning_rate": 9.545635634207862e-06, "loss": 0.7731, "step": 5307 }, { "epoch": 0.16268235871031017, "grad_norm": 1.2714843705570673, "learning_rate": 9.545428886128996e-06, "loss": 0.7504, "step": 5308 }, { "epoch": 0.16271300723305138, "grad_norm": 1.3999124768753357, "learning_rate": 9.545222093262856e-06, "loss": 0.6687, "step": 5309 }, { "epoch": 0.16274365575579258, "grad_norm": 1.2959151849707633, "learning_rate": 9.54501525561148e-06, "loss": 0.7991, "step": 5310 }, { "epoch": 0.1627743042785338, "grad_norm": 1.4275871680848535, "learning_rate": 9.544808373176906e-06, "loss": 0.7323, "step": 5311 }, { "epoch": 0.16280495280127497, "grad_norm": 0.46391016574683375, "learning_rate": 9.544601445961172e-06, "loss": 0.4765, "step": 5312 }, { "epoch": 0.16283560132401617, "grad_norm": 1.111412753938404, "learning_rate": 9.544394473966317e-06, "loss": 0.6036, "step": 5313 }, { "epoch": 0.16286624984675738, "grad_norm": 1.3476848286677472, "learning_rate": 9.54418745719438e-06, "loss": 0.7041, "step": 5314 }, { "epoch": 0.16289689836949858, "grad_norm": 1.5606403486354532, "learning_rate": 9.543980395647403e-06, "loss": 0.7649, "step": 5315 }, { "epoch": 0.1629275468922398, "grad_norm": 1.2823043534480816, "learning_rate": 9.543773289327423e-06, "loss": 0.6333, "step": 5316 }, { "epoch": 0.162958195414981, "grad_norm": 0.4678217056312096, "learning_rate": 9.543566138236483e-06, "loss": 0.4634, "step": 5317 }, { "epoch": 0.1629888439377222, "grad_norm": 1.468088750670632, "learning_rate": 9.543358942376623e-06, "loss": 0.7461, "step": 5318 }, { "epoch": 0.1630194924604634, "grad_norm": 0.4661487328753788, "learning_rate": 9.543151701749885e-06, "loss": 0.4723, "step": 5319 }, { "epoch": 0.1630501409832046, "grad_norm": 1.5396297127280332, "learning_rate": 9.54294441635831e-06, "loss": 0.7546, "step": 5320 }, { "epoch": 0.16308078950594582, "grad_norm": 1.2185089180427127, "learning_rate": 9.542737086203943e-06, "loss": 0.7004, "step": 5321 }, { "epoch": 0.16311143802868702, "grad_norm": 1.2568587550019321, "learning_rate": 9.542529711288824e-06, "loss": 0.6374, "step": 5322 }, { "epoch": 0.16314208655142823, "grad_norm": 1.3207372117102127, "learning_rate": 9.542322291614999e-06, "loss": 0.7488, "step": 5323 }, { "epoch": 0.16317273507416943, "grad_norm": 1.5270835662430804, "learning_rate": 9.542114827184507e-06, "loss": 0.7668, "step": 5324 }, { "epoch": 0.16320338359691064, "grad_norm": 1.4846893578363891, "learning_rate": 9.541907317999397e-06, "loss": 0.8375, "step": 5325 }, { "epoch": 0.16323403211965185, "grad_norm": 1.292926600021375, "learning_rate": 9.541699764061714e-06, "loss": 0.6838, "step": 5326 }, { "epoch": 0.16326468064239302, "grad_norm": 1.396173163173864, "learning_rate": 9.5414921653735e-06, "loss": 0.7523, "step": 5327 }, { "epoch": 0.16329532916513423, "grad_norm": 1.2609477722346563, "learning_rate": 9.5412845219368e-06, "loss": 0.7011, "step": 5328 }, { "epoch": 0.16332597768787543, "grad_norm": 1.3982780301228508, "learning_rate": 9.541076833753665e-06, "loss": 0.7527, "step": 5329 }, { "epoch": 0.16335662621061664, "grad_norm": 1.2441923770976906, "learning_rate": 9.540869100826136e-06, "loss": 0.7479, "step": 5330 }, { "epoch": 0.16338727473335785, "grad_norm": 1.6034237603721255, "learning_rate": 9.540661323156261e-06, "loss": 0.8052, "step": 5331 }, { "epoch": 0.16341792325609905, "grad_norm": 0.5407195265222715, "learning_rate": 9.54045350074609e-06, "loss": 0.4447, "step": 5332 }, { "epoch": 0.16344857177884026, "grad_norm": 1.5178984245734548, "learning_rate": 9.540245633597667e-06, "loss": 0.7352, "step": 5333 }, { "epoch": 0.16347922030158146, "grad_norm": 0.5215203261235908, "learning_rate": 9.540037721713045e-06, "loss": 0.4645, "step": 5334 }, { "epoch": 0.16350986882432267, "grad_norm": 1.5790622159094492, "learning_rate": 9.539829765094265e-06, "loss": 0.9087, "step": 5335 }, { "epoch": 0.16354051734706387, "grad_norm": 1.3149419449612203, "learning_rate": 9.539621763743384e-06, "loss": 0.6929, "step": 5336 }, { "epoch": 0.16357116586980508, "grad_norm": 1.2316794048476571, "learning_rate": 9.539413717662449e-06, "loss": 0.6804, "step": 5337 }, { "epoch": 0.16360181439254629, "grad_norm": 1.45063740094742, "learning_rate": 9.53920562685351e-06, "loss": 0.7407, "step": 5338 }, { "epoch": 0.1636324629152875, "grad_norm": 1.2436091725798575, "learning_rate": 9.538997491318613e-06, "loss": 0.7946, "step": 5339 }, { "epoch": 0.1636631114380287, "grad_norm": 1.441576291190182, "learning_rate": 9.538789311059815e-06, "loss": 0.7272, "step": 5340 }, { "epoch": 0.1636937599607699, "grad_norm": 1.3304439491051192, "learning_rate": 9.538581086079164e-06, "loss": 0.6766, "step": 5341 }, { "epoch": 0.1637244084835111, "grad_norm": 1.2992338419843863, "learning_rate": 9.538372816378711e-06, "loss": 0.7728, "step": 5342 }, { "epoch": 0.16375505700625229, "grad_norm": 1.4131429249346665, "learning_rate": 9.538164501960511e-06, "loss": 0.736, "step": 5343 }, { "epoch": 0.1637857055289935, "grad_norm": 0.5657803885602447, "learning_rate": 9.537956142826615e-06, "loss": 0.4647, "step": 5344 }, { "epoch": 0.1638163540517347, "grad_norm": 1.1663357871020932, "learning_rate": 9.537747738979076e-06, "loss": 0.6121, "step": 5345 }, { "epoch": 0.1638470025744759, "grad_norm": 0.5079030742662776, "learning_rate": 9.537539290419945e-06, "loss": 0.4465, "step": 5346 }, { "epoch": 0.1638776510972171, "grad_norm": 1.3425773914480774, "learning_rate": 9.537330797151282e-06, "loss": 0.6471, "step": 5347 }, { "epoch": 0.16390829961995831, "grad_norm": 1.436894537536817, "learning_rate": 9.537122259175135e-06, "loss": 0.7885, "step": 5348 }, { "epoch": 0.16393894814269952, "grad_norm": 1.3049749227776057, "learning_rate": 9.536913676493564e-06, "loss": 0.6699, "step": 5349 }, { "epoch": 0.16396959666544073, "grad_norm": 0.5091783499924125, "learning_rate": 9.53670504910862e-06, "loss": 0.4815, "step": 5350 }, { "epoch": 0.16400024518818193, "grad_norm": 1.302554727836848, "learning_rate": 9.536496377022362e-06, "loss": 0.7794, "step": 5351 }, { "epoch": 0.16403089371092314, "grad_norm": 1.3526045961019062, "learning_rate": 9.536287660236842e-06, "loss": 0.7164, "step": 5352 }, { "epoch": 0.16406154223366434, "grad_norm": 1.3151818770249788, "learning_rate": 9.53607889875412e-06, "loss": 0.7124, "step": 5353 }, { "epoch": 0.16409219075640555, "grad_norm": 1.278334585556638, "learning_rate": 9.535870092576253e-06, "loss": 0.6573, "step": 5354 }, { "epoch": 0.16412283927914675, "grad_norm": 1.3814187611968873, "learning_rate": 9.535661241705296e-06, "loss": 0.7928, "step": 5355 }, { "epoch": 0.16415348780188796, "grad_norm": 1.303420103154487, "learning_rate": 9.53545234614331e-06, "loss": 0.6406, "step": 5356 }, { "epoch": 0.16418413632462917, "grad_norm": 1.0942390409177258, "learning_rate": 9.53524340589235e-06, "loss": 0.6423, "step": 5357 }, { "epoch": 0.16421478484737034, "grad_norm": 1.301752158140554, "learning_rate": 9.535034420954476e-06, "loss": 0.7627, "step": 5358 }, { "epoch": 0.16424543337011155, "grad_norm": 1.4071114195580432, "learning_rate": 9.53482539133175e-06, "loss": 0.721, "step": 5359 }, { "epoch": 0.16427608189285275, "grad_norm": 1.3203973770620367, "learning_rate": 9.534616317026227e-06, "loss": 0.694, "step": 5360 }, { "epoch": 0.16430673041559396, "grad_norm": 1.3575816903592866, "learning_rate": 9.53440719803997e-06, "loss": 0.6599, "step": 5361 }, { "epoch": 0.16433737893833517, "grad_norm": 1.2646277679035727, "learning_rate": 9.534198034375039e-06, "loss": 0.6759, "step": 5362 }, { "epoch": 0.16436802746107637, "grad_norm": 1.3938667222450174, "learning_rate": 9.533988826033494e-06, "loss": 0.6836, "step": 5363 }, { "epoch": 0.16439867598381758, "grad_norm": 1.4037974394137365, "learning_rate": 9.533779573017397e-06, "loss": 0.6941, "step": 5364 }, { "epoch": 0.16442932450655878, "grad_norm": 1.3569432544519624, "learning_rate": 9.53357027532881e-06, "loss": 0.6784, "step": 5365 }, { "epoch": 0.1644599730293, "grad_norm": 1.3695509441766174, "learning_rate": 9.533360932969795e-06, "loss": 0.7759, "step": 5366 }, { "epoch": 0.1644906215520412, "grad_norm": 1.4524026120901359, "learning_rate": 9.533151545942414e-06, "loss": 0.7509, "step": 5367 }, { "epoch": 0.1645212700747824, "grad_norm": 1.7054571530076506, "learning_rate": 9.532942114248734e-06, "loss": 0.6921, "step": 5368 }, { "epoch": 0.1645519185975236, "grad_norm": 0.6076160986470368, "learning_rate": 9.532732637890813e-06, "loss": 0.4641, "step": 5369 }, { "epoch": 0.1645825671202648, "grad_norm": 1.2661262141658551, "learning_rate": 9.532523116870718e-06, "loss": 0.7219, "step": 5370 }, { "epoch": 0.16461321564300602, "grad_norm": 1.2530126210801538, "learning_rate": 9.532313551190513e-06, "loss": 0.6345, "step": 5371 }, { "epoch": 0.16464386416574722, "grad_norm": 1.2917071940061446, "learning_rate": 9.532103940852263e-06, "loss": 0.782, "step": 5372 }, { "epoch": 0.16467451268848843, "grad_norm": 1.3268244411300558, "learning_rate": 9.531894285858032e-06, "loss": 0.6854, "step": 5373 }, { "epoch": 0.1647051612112296, "grad_norm": 1.4776819745105068, "learning_rate": 9.53168458620989e-06, "loss": 0.7521, "step": 5374 }, { "epoch": 0.1647358097339708, "grad_norm": 1.3555368459174038, "learning_rate": 9.531474841909898e-06, "loss": 0.6742, "step": 5375 }, { "epoch": 0.16476645825671202, "grad_norm": 0.46296750291844513, "learning_rate": 9.531265052960126e-06, "loss": 0.4376, "step": 5376 }, { "epoch": 0.16479710677945322, "grad_norm": 1.3211653536147479, "learning_rate": 9.531055219362639e-06, "loss": 0.7744, "step": 5377 }, { "epoch": 0.16482775530219443, "grad_norm": 1.5411089987967448, "learning_rate": 9.530845341119506e-06, "loss": 0.8694, "step": 5378 }, { "epoch": 0.16485840382493563, "grad_norm": 0.48221363191386524, "learning_rate": 9.530635418232795e-06, "loss": 0.4707, "step": 5379 }, { "epoch": 0.16488905234767684, "grad_norm": 1.393193177834104, "learning_rate": 9.530425450704574e-06, "loss": 0.7424, "step": 5380 }, { "epoch": 0.16491970087041805, "grad_norm": 1.3443290826277838, "learning_rate": 9.530215438536912e-06, "loss": 0.7218, "step": 5381 }, { "epoch": 0.16495034939315925, "grad_norm": 1.3739540952987712, "learning_rate": 9.530005381731876e-06, "loss": 0.7356, "step": 5382 }, { "epoch": 0.16498099791590046, "grad_norm": 0.4674603604195584, "learning_rate": 9.529795280291542e-06, "loss": 0.4619, "step": 5383 }, { "epoch": 0.16501164643864166, "grad_norm": 1.0882021053605422, "learning_rate": 9.529585134217973e-06, "loss": 0.6987, "step": 5384 }, { "epoch": 0.16504229496138287, "grad_norm": 1.356886165644676, "learning_rate": 9.529374943513244e-06, "loss": 0.7429, "step": 5385 }, { "epoch": 0.16507294348412407, "grad_norm": 1.346784854086394, "learning_rate": 9.529164708179424e-06, "loss": 0.6953, "step": 5386 }, { "epoch": 0.16510359200686528, "grad_norm": 1.3551751387590065, "learning_rate": 9.528954428218586e-06, "loss": 0.7502, "step": 5387 }, { "epoch": 0.16513424052960649, "grad_norm": 1.2643143608369913, "learning_rate": 9.528744103632802e-06, "loss": 0.5257, "step": 5388 }, { "epoch": 0.16516488905234766, "grad_norm": 1.3109782089671032, "learning_rate": 9.52853373442414e-06, "loss": 0.6417, "step": 5389 }, { "epoch": 0.16519553757508887, "grad_norm": 1.4209768853204228, "learning_rate": 9.52832332059468e-06, "loss": 0.7149, "step": 5390 }, { "epoch": 0.16522618609783007, "grad_norm": 1.3392857312344075, "learning_rate": 9.528112862146492e-06, "loss": 0.6764, "step": 5391 }, { "epoch": 0.16525683462057128, "grad_norm": 1.352894149030972, "learning_rate": 9.527902359081649e-06, "loss": 0.8071, "step": 5392 }, { "epoch": 0.16528748314331249, "grad_norm": 1.3374252735777916, "learning_rate": 9.527691811402224e-06, "loss": 0.697, "step": 5393 }, { "epoch": 0.1653181316660537, "grad_norm": 1.4581257087486512, "learning_rate": 9.527481219110293e-06, "loss": 0.6667, "step": 5394 }, { "epoch": 0.1653487801887949, "grad_norm": 1.282047306342577, "learning_rate": 9.527270582207933e-06, "loss": 0.7029, "step": 5395 }, { "epoch": 0.1653794287115361, "grad_norm": 1.3428373630035473, "learning_rate": 9.527059900697216e-06, "loss": 0.6993, "step": 5396 }, { "epoch": 0.1654100772342773, "grad_norm": 1.3090382972398562, "learning_rate": 9.52684917458022e-06, "loss": 0.712, "step": 5397 }, { "epoch": 0.16544072575701851, "grad_norm": 1.271651423255241, "learning_rate": 9.526638403859021e-06, "loss": 0.7482, "step": 5398 }, { "epoch": 0.16547137427975972, "grad_norm": 1.5161944431749796, "learning_rate": 9.526427588535696e-06, "loss": 0.7554, "step": 5399 }, { "epoch": 0.16550202280250093, "grad_norm": 1.3990134442937765, "learning_rate": 9.526216728612321e-06, "loss": 0.6748, "step": 5400 }, { "epoch": 0.16553267132524213, "grad_norm": 1.4029243215327503, "learning_rate": 9.526005824090975e-06, "loss": 0.8346, "step": 5401 }, { "epoch": 0.16556331984798334, "grad_norm": 1.3725454489201923, "learning_rate": 9.525794874973735e-06, "loss": 0.6857, "step": 5402 }, { "epoch": 0.16559396837072454, "grad_norm": 1.3164474624557714, "learning_rate": 9.525583881262681e-06, "loss": 0.8159, "step": 5403 }, { "epoch": 0.16562461689346575, "grad_norm": 1.277201752053776, "learning_rate": 9.52537284295989e-06, "loss": 0.7447, "step": 5404 }, { "epoch": 0.16565526541620693, "grad_norm": 1.3473106267453774, "learning_rate": 9.525161760067443e-06, "loss": 0.743, "step": 5405 }, { "epoch": 0.16568591393894813, "grad_norm": 0.6483967373412437, "learning_rate": 9.52495063258742e-06, "loss": 0.4853, "step": 5406 }, { "epoch": 0.16571656246168934, "grad_norm": 1.4542954406087747, "learning_rate": 9.5247394605219e-06, "loss": 0.7737, "step": 5407 }, { "epoch": 0.16574721098443054, "grad_norm": 1.2035450352135912, "learning_rate": 9.524528243872964e-06, "loss": 0.6121, "step": 5408 }, { "epoch": 0.16577785950717175, "grad_norm": 1.269050245888178, "learning_rate": 9.524316982642693e-06, "loss": 0.7318, "step": 5409 }, { "epoch": 0.16580850802991295, "grad_norm": 0.4907855554271819, "learning_rate": 9.524105676833172e-06, "loss": 0.4491, "step": 5410 }, { "epoch": 0.16583915655265416, "grad_norm": 1.1806676865419587, "learning_rate": 9.523894326446478e-06, "loss": 0.6725, "step": 5411 }, { "epoch": 0.16586980507539537, "grad_norm": 1.29797687170423, "learning_rate": 9.523682931484696e-06, "loss": 0.6012, "step": 5412 }, { "epoch": 0.16590045359813657, "grad_norm": 1.3320460331640218, "learning_rate": 9.523471491949909e-06, "loss": 0.6529, "step": 5413 }, { "epoch": 0.16593110212087778, "grad_norm": 1.4278652486085253, "learning_rate": 9.5232600078442e-06, "loss": 0.7271, "step": 5414 }, { "epoch": 0.16596175064361898, "grad_norm": 1.1728603372271997, "learning_rate": 9.523048479169653e-06, "loss": 0.7583, "step": 5415 }, { "epoch": 0.1659923991663602, "grad_norm": 1.2140815891313639, "learning_rate": 9.522836905928352e-06, "loss": 0.6656, "step": 5416 }, { "epoch": 0.1660230476891014, "grad_norm": 1.4107877812164067, "learning_rate": 9.522625288122381e-06, "loss": 0.713, "step": 5417 }, { "epoch": 0.1660536962118426, "grad_norm": 1.161799468231437, "learning_rate": 9.522413625753827e-06, "loss": 0.7357, "step": 5418 }, { "epoch": 0.1660843447345838, "grad_norm": 1.3488534699559085, "learning_rate": 9.522201918824774e-06, "loss": 0.7744, "step": 5419 }, { "epoch": 0.16611499325732498, "grad_norm": 1.3423750674711505, "learning_rate": 9.521990167337309e-06, "loss": 0.7631, "step": 5420 }, { "epoch": 0.1661456417800662, "grad_norm": 1.2781166149453196, "learning_rate": 9.521778371293517e-06, "loss": 0.7645, "step": 5421 }, { "epoch": 0.1661762903028074, "grad_norm": 1.2518221224256088, "learning_rate": 9.521566530695485e-06, "loss": 0.7375, "step": 5422 }, { "epoch": 0.1662069388255486, "grad_norm": 1.2159205182987562, "learning_rate": 9.521354645545303e-06, "loss": 0.7538, "step": 5423 }, { "epoch": 0.1662375873482898, "grad_norm": 1.2596495568400257, "learning_rate": 9.521142715845055e-06, "loss": 0.6326, "step": 5424 }, { "epoch": 0.166268235871031, "grad_norm": 1.2418187480883285, "learning_rate": 9.520930741596831e-06, "loss": 0.6907, "step": 5425 }, { "epoch": 0.16629888439377222, "grad_norm": 0.7351172507484042, "learning_rate": 9.520718722802722e-06, "loss": 0.4638, "step": 5426 }, { "epoch": 0.16632953291651342, "grad_norm": 1.3239985515076296, "learning_rate": 9.520506659464812e-06, "loss": 0.6954, "step": 5427 }, { "epoch": 0.16636018143925463, "grad_norm": 1.3062050638556493, "learning_rate": 9.520294551585195e-06, "loss": 0.7955, "step": 5428 }, { "epoch": 0.16639082996199583, "grad_norm": 1.3652698958763583, "learning_rate": 9.520082399165958e-06, "loss": 0.648, "step": 5429 }, { "epoch": 0.16642147848473704, "grad_norm": 1.369382750296715, "learning_rate": 9.519870202209194e-06, "loss": 0.7001, "step": 5430 }, { "epoch": 0.16645212700747825, "grad_norm": 1.2698768368733484, "learning_rate": 9.519657960716992e-06, "loss": 0.7299, "step": 5431 }, { "epoch": 0.16648277553021945, "grad_norm": 1.3771831330890718, "learning_rate": 9.519445674691443e-06, "loss": 0.7082, "step": 5432 }, { "epoch": 0.16651342405296066, "grad_norm": 1.2176070408822948, "learning_rate": 9.51923334413464e-06, "loss": 0.7122, "step": 5433 }, { "epoch": 0.16654407257570186, "grad_norm": 1.2679719116491461, "learning_rate": 9.519020969048676e-06, "loss": 0.6858, "step": 5434 }, { "epoch": 0.16657472109844307, "grad_norm": 1.2459520913896796, "learning_rate": 9.518808549435639e-06, "loss": 0.7348, "step": 5435 }, { "epoch": 0.16660536962118425, "grad_norm": 1.2533941466333023, "learning_rate": 9.518596085297627e-06, "loss": 0.687, "step": 5436 }, { "epoch": 0.16663601814392545, "grad_norm": 1.1206096547094797, "learning_rate": 9.518383576636732e-06, "loss": 0.6671, "step": 5437 }, { "epoch": 0.16666666666666666, "grad_norm": 1.3959292075324414, "learning_rate": 9.518171023455047e-06, "loss": 0.7128, "step": 5438 }, { "epoch": 0.16669731518940786, "grad_norm": 0.6677399042561527, "learning_rate": 9.517958425754668e-06, "loss": 0.4796, "step": 5439 }, { "epoch": 0.16672796371214907, "grad_norm": 1.3420888403452917, "learning_rate": 9.517745783537686e-06, "loss": 0.7624, "step": 5440 }, { "epoch": 0.16675861223489027, "grad_norm": 1.4151653001577944, "learning_rate": 9.517533096806201e-06, "loss": 0.76, "step": 5441 }, { "epoch": 0.16678926075763148, "grad_norm": 1.2971055418706252, "learning_rate": 9.517320365562306e-06, "loss": 0.7311, "step": 5442 }, { "epoch": 0.16681990928037269, "grad_norm": 1.3787151434618683, "learning_rate": 9.517107589808098e-06, "loss": 0.7332, "step": 5443 }, { "epoch": 0.1668505578031139, "grad_norm": 1.2708810241285224, "learning_rate": 9.516894769545672e-06, "loss": 0.7307, "step": 5444 }, { "epoch": 0.1668812063258551, "grad_norm": 1.5426464506732596, "learning_rate": 9.516681904777128e-06, "loss": 0.7402, "step": 5445 }, { "epoch": 0.1669118548485963, "grad_norm": 1.4842963276834784, "learning_rate": 9.51646899550456e-06, "loss": 0.7613, "step": 5446 }, { "epoch": 0.1669425033713375, "grad_norm": 1.4558177287570524, "learning_rate": 9.516256041730068e-06, "loss": 0.7286, "step": 5447 }, { "epoch": 0.16697315189407871, "grad_norm": 1.4712599105798736, "learning_rate": 9.516043043455749e-06, "loss": 0.835, "step": 5448 }, { "epoch": 0.16700380041681992, "grad_norm": 1.2970956691343558, "learning_rate": 9.515830000683703e-06, "loss": 0.7204, "step": 5449 }, { "epoch": 0.16703444893956113, "grad_norm": 1.267236542007035, "learning_rate": 9.515616913416029e-06, "loss": 0.6639, "step": 5450 }, { "epoch": 0.1670650974623023, "grad_norm": 1.4358055811936232, "learning_rate": 9.515403781654825e-06, "loss": 0.7317, "step": 5451 }, { "epoch": 0.1670957459850435, "grad_norm": 1.361678467545047, "learning_rate": 9.515190605402194e-06, "loss": 0.7184, "step": 5452 }, { "epoch": 0.16712639450778471, "grad_norm": 1.4188923212417297, "learning_rate": 9.514977384660233e-06, "loss": 0.7862, "step": 5453 }, { "epoch": 0.16715704303052592, "grad_norm": 1.2769190804003119, "learning_rate": 9.514764119431047e-06, "loss": 0.8025, "step": 5454 }, { "epoch": 0.16718769155326713, "grad_norm": 0.6657281813383333, "learning_rate": 9.514550809716731e-06, "loss": 0.4341, "step": 5455 }, { "epoch": 0.16721834007600833, "grad_norm": 1.4365490716898657, "learning_rate": 9.514337455519394e-06, "loss": 0.7477, "step": 5456 }, { "epoch": 0.16724898859874954, "grad_norm": 1.4436282422254765, "learning_rate": 9.514124056841133e-06, "loss": 0.7444, "step": 5457 }, { "epoch": 0.16727963712149074, "grad_norm": 1.1669935947207564, "learning_rate": 9.513910613684054e-06, "loss": 0.7046, "step": 5458 }, { "epoch": 0.16731028564423195, "grad_norm": 1.5540864063431314, "learning_rate": 9.513697126050258e-06, "loss": 0.7398, "step": 5459 }, { "epoch": 0.16734093416697315, "grad_norm": 1.3214760467975937, "learning_rate": 9.51348359394185e-06, "loss": 0.6899, "step": 5460 }, { "epoch": 0.16737158268971436, "grad_norm": 1.3518970615409578, "learning_rate": 9.513270017360933e-06, "loss": 0.6059, "step": 5461 }, { "epoch": 0.16740223121245557, "grad_norm": 1.4828929656561376, "learning_rate": 9.513056396309613e-06, "loss": 0.7004, "step": 5462 }, { "epoch": 0.16743287973519677, "grad_norm": 1.2539582844663952, "learning_rate": 9.512842730789992e-06, "loss": 0.6881, "step": 5463 }, { "epoch": 0.16746352825793798, "grad_norm": 1.247992681689965, "learning_rate": 9.512629020804176e-06, "loss": 0.737, "step": 5464 }, { "epoch": 0.16749417678067918, "grad_norm": 1.4946581628777873, "learning_rate": 9.512415266354274e-06, "loss": 0.7838, "step": 5465 }, { "epoch": 0.1675248253034204, "grad_norm": 1.3778299318211098, "learning_rate": 9.512201467442389e-06, "loss": 0.6738, "step": 5466 }, { "epoch": 0.16755547382616157, "grad_norm": 1.257357170914551, "learning_rate": 9.511987624070629e-06, "loss": 0.7619, "step": 5467 }, { "epoch": 0.16758612234890277, "grad_norm": 1.492644982874719, "learning_rate": 9.5117737362411e-06, "loss": 0.8147, "step": 5468 }, { "epoch": 0.16761677087164398, "grad_norm": 0.6917840656297874, "learning_rate": 9.51155980395591e-06, "loss": 0.4725, "step": 5469 }, { "epoch": 0.16764741939438518, "grad_norm": 1.3947834409302426, "learning_rate": 9.511345827217167e-06, "loss": 0.7463, "step": 5470 }, { "epoch": 0.1676780679171264, "grad_norm": 1.5613718503449385, "learning_rate": 9.511131806026979e-06, "loss": 0.711, "step": 5471 }, { "epoch": 0.1677087164398676, "grad_norm": 1.475637295741976, "learning_rate": 9.510917740387456e-06, "loss": 0.7209, "step": 5472 }, { "epoch": 0.1677393649626088, "grad_norm": 1.4234966846501182, "learning_rate": 9.510703630300704e-06, "loss": 0.8019, "step": 5473 }, { "epoch": 0.16777001348535, "grad_norm": 1.6503836818095121, "learning_rate": 9.510489475768836e-06, "loss": 0.7484, "step": 5474 }, { "epoch": 0.1678006620080912, "grad_norm": 1.3471531608574696, "learning_rate": 9.510275276793963e-06, "loss": 0.7328, "step": 5475 }, { "epoch": 0.16783131053083242, "grad_norm": 1.3132335361635241, "learning_rate": 9.510061033378191e-06, "loss": 0.6285, "step": 5476 }, { "epoch": 0.16786195905357362, "grad_norm": 1.6696103562546891, "learning_rate": 9.509846745523635e-06, "loss": 0.7817, "step": 5477 }, { "epoch": 0.16789260757631483, "grad_norm": 1.262171882576912, "learning_rate": 9.509632413232406e-06, "loss": 0.6735, "step": 5478 }, { "epoch": 0.16792325609905603, "grad_norm": 1.4425488268596294, "learning_rate": 9.509418036506614e-06, "loss": 0.698, "step": 5479 }, { "epoch": 0.16795390462179724, "grad_norm": 1.2952751966357832, "learning_rate": 9.509203615348372e-06, "loss": 0.6342, "step": 5480 }, { "epoch": 0.16798455314453845, "grad_norm": 1.3938699125022054, "learning_rate": 9.508989149759792e-06, "loss": 0.7438, "step": 5481 }, { "epoch": 0.16801520166727962, "grad_norm": 1.31626374358005, "learning_rate": 9.508774639742992e-06, "loss": 0.7025, "step": 5482 }, { "epoch": 0.16804585019002083, "grad_norm": 1.2861378240817856, "learning_rate": 9.508560085300078e-06, "loss": 0.7204, "step": 5483 }, { "epoch": 0.16807649871276203, "grad_norm": 1.2769702313034632, "learning_rate": 9.508345486433171e-06, "loss": 0.7136, "step": 5484 }, { "epoch": 0.16810714723550324, "grad_norm": 1.5161480381775738, "learning_rate": 9.508130843144382e-06, "loss": 0.7342, "step": 5485 }, { "epoch": 0.16813779575824445, "grad_norm": 1.3539384962874066, "learning_rate": 9.507916155435824e-06, "loss": 0.6938, "step": 5486 }, { "epoch": 0.16816844428098565, "grad_norm": 1.3750962548732966, "learning_rate": 9.507701423309616e-06, "loss": 0.6821, "step": 5487 }, { "epoch": 0.16819909280372686, "grad_norm": 1.3737856134003417, "learning_rate": 9.507486646767872e-06, "loss": 0.7699, "step": 5488 }, { "epoch": 0.16822974132646806, "grad_norm": 1.444679095076587, "learning_rate": 9.507271825812709e-06, "loss": 0.769, "step": 5489 }, { "epoch": 0.16826038984920927, "grad_norm": 1.263129961409721, "learning_rate": 9.507056960446243e-06, "loss": 0.6826, "step": 5490 }, { "epoch": 0.16829103837195047, "grad_norm": 1.3392201907509822, "learning_rate": 9.506842050670593e-06, "loss": 0.7431, "step": 5491 }, { "epoch": 0.16832168689469168, "grad_norm": 1.3651810283937842, "learning_rate": 9.506627096487875e-06, "loss": 0.7188, "step": 5492 }, { "epoch": 0.16835233541743289, "grad_norm": 1.5010291833416307, "learning_rate": 9.506412097900206e-06, "loss": 0.785, "step": 5493 }, { "epoch": 0.1683829839401741, "grad_norm": 1.4427867457293064, "learning_rate": 9.506197054909708e-06, "loss": 0.7204, "step": 5494 }, { "epoch": 0.1684136324629153, "grad_norm": 1.30189315449641, "learning_rate": 9.505981967518493e-06, "loss": 0.6677, "step": 5495 }, { "epoch": 0.1684442809856565, "grad_norm": 1.3778726431549202, "learning_rate": 9.50576683572869e-06, "loss": 0.698, "step": 5496 }, { "epoch": 0.1684749295083977, "grad_norm": 1.3005882157382291, "learning_rate": 9.50555165954241e-06, "loss": 0.722, "step": 5497 }, { "epoch": 0.1685055780311389, "grad_norm": 1.593672367100719, "learning_rate": 9.505336438961778e-06, "loss": 0.7797, "step": 5498 }, { "epoch": 0.1685362265538801, "grad_norm": 1.3280347444665892, "learning_rate": 9.505121173988913e-06, "loss": 0.7579, "step": 5499 }, { "epoch": 0.1685668750766213, "grad_norm": 1.4059647950670584, "learning_rate": 9.504905864625935e-06, "loss": 0.848, "step": 5500 }, { "epoch": 0.1685975235993625, "grad_norm": 1.460774011154665, "learning_rate": 9.50469051087497e-06, "loss": 0.7451, "step": 5501 }, { "epoch": 0.1686281721221037, "grad_norm": 1.4144613160116521, "learning_rate": 9.504475112738134e-06, "loss": 0.7237, "step": 5502 }, { "epoch": 0.16865882064484491, "grad_norm": 1.4626603253480175, "learning_rate": 9.504259670217553e-06, "loss": 0.6868, "step": 5503 }, { "epoch": 0.16868946916758612, "grad_norm": 0.7526069362064435, "learning_rate": 9.50404418331535e-06, "loss": 0.4762, "step": 5504 }, { "epoch": 0.16872011769032733, "grad_norm": 0.6660491609651826, "learning_rate": 9.503828652033647e-06, "loss": 0.4754, "step": 5505 }, { "epoch": 0.16875076621306853, "grad_norm": 1.6676490953066174, "learning_rate": 9.503613076374568e-06, "loss": 0.7674, "step": 5506 }, { "epoch": 0.16878141473580974, "grad_norm": 0.4842177956027913, "learning_rate": 9.503397456340235e-06, "loss": 0.4626, "step": 5507 }, { "epoch": 0.16881206325855094, "grad_norm": 1.4484761294622286, "learning_rate": 9.503181791932777e-06, "loss": 0.678, "step": 5508 }, { "epoch": 0.16884271178129215, "grad_norm": 0.670405230437031, "learning_rate": 9.502966083154314e-06, "loss": 0.4799, "step": 5509 }, { "epoch": 0.16887336030403335, "grad_norm": 1.3414541887624876, "learning_rate": 9.502750330006977e-06, "loss": 0.6812, "step": 5510 }, { "epoch": 0.16890400882677456, "grad_norm": 1.3750710729229858, "learning_rate": 9.502534532492889e-06, "loss": 0.7257, "step": 5511 }, { "epoch": 0.16893465734951577, "grad_norm": 1.4050801056155977, "learning_rate": 9.502318690614175e-06, "loss": 0.6766, "step": 5512 }, { "epoch": 0.16896530587225694, "grad_norm": 0.6612090898735612, "learning_rate": 9.502102804372962e-06, "loss": 0.4769, "step": 5513 }, { "epoch": 0.16899595439499815, "grad_norm": 1.423651958829177, "learning_rate": 9.501886873771378e-06, "loss": 0.7015, "step": 5514 }, { "epoch": 0.16902660291773935, "grad_norm": 1.404839072209547, "learning_rate": 9.501670898811552e-06, "loss": 0.6862, "step": 5515 }, { "epoch": 0.16905725144048056, "grad_norm": 1.375585563567544, "learning_rate": 9.50145487949561e-06, "loss": 0.7148, "step": 5516 }, { "epoch": 0.16908789996322177, "grad_norm": 1.386001539013768, "learning_rate": 9.501238815825684e-06, "loss": 0.7438, "step": 5517 }, { "epoch": 0.16911854848596297, "grad_norm": 0.5071635321617335, "learning_rate": 9.501022707803898e-06, "loss": 0.469, "step": 5518 }, { "epoch": 0.16914919700870418, "grad_norm": 1.2294340857550528, "learning_rate": 9.500806555432384e-06, "loss": 0.6973, "step": 5519 }, { "epoch": 0.16917984553144538, "grad_norm": 1.3421728434540021, "learning_rate": 9.50059035871327e-06, "loss": 0.6742, "step": 5520 }, { "epoch": 0.1692104940541866, "grad_norm": 1.315897356210601, "learning_rate": 9.500374117648689e-06, "loss": 0.6783, "step": 5521 }, { "epoch": 0.1692411425769278, "grad_norm": 1.352710152378275, "learning_rate": 9.500157832240772e-06, "loss": 0.6405, "step": 5522 }, { "epoch": 0.169271791099669, "grad_norm": 1.266171771883531, "learning_rate": 9.499941502491646e-06, "loss": 0.6972, "step": 5523 }, { "epoch": 0.1693024396224102, "grad_norm": 1.4026518513779225, "learning_rate": 9.499725128403446e-06, "loss": 0.6586, "step": 5524 }, { "epoch": 0.1693330881451514, "grad_norm": 1.384826912084575, "learning_rate": 9.499508709978303e-06, "loss": 0.7134, "step": 5525 }, { "epoch": 0.16936373666789262, "grad_norm": 1.284562593708939, "learning_rate": 9.499292247218348e-06, "loss": 0.738, "step": 5526 }, { "epoch": 0.16939438519063382, "grad_norm": 1.3449317824783977, "learning_rate": 9.499075740125719e-06, "loss": 0.7727, "step": 5527 }, { "epoch": 0.16942503371337503, "grad_norm": 1.3054318881403837, "learning_rate": 9.498859188702541e-06, "loss": 0.7136, "step": 5528 }, { "epoch": 0.1694556822361162, "grad_norm": 1.2692533344453787, "learning_rate": 9.498642592950955e-06, "loss": 0.656, "step": 5529 }, { "epoch": 0.1694863307588574, "grad_norm": 1.3998979868913979, "learning_rate": 9.498425952873092e-06, "loss": 0.6426, "step": 5530 }, { "epoch": 0.16951697928159862, "grad_norm": 1.5604975987707324, "learning_rate": 9.498209268471089e-06, "loss": 0.7276, "step": 5531 }, { "epoch": 0.16954762780433982, "grad_norm": 1.3526537129726128, "learning_rate": 9.497992539747076e-06, "loss": 0.7107, "step": 5532 }, { "epoch": 0.16957827632708103, "grad_norm": 1.2935994531104151, "learning_rate": 9.497775766703193e-06, "loss": 0.727, "step": 5533 }, { "epoch": 0.16960892484982223, "grad_norm": 0.7050376276255015, "learning_rate": 9.497558949341575e-06, "loss": 0.4756, "step": 5534 }, { "epoch": 0.16963957337256344, "grad_norm": 1.440797622091407, "learning_rate": 9.497342087664355e-06, "loss": 0.7643, "step": 5535 }, { "epoch": 0.16967022189530465, "grad_norm": 1.4461841350140132, "learning_rate": 9.497125181673676e-06, "loss": 0.8219, "step": 5536 }, { "epoch": 0.16970087041804585, "grad_norm": 1.2872902206732473, "learning_rate": 9.496908231371672e-06, "loss": 0.7572, "step": 5537 }, { "epoch": 0.16973151894078706, "grad_norm": 0.488121764526583, "learning_rate": 9.49669123676048e-06, "loss": 0.498, "step": 5538 }, { "epoch": 0.16976216746352826, "grad_norm": 1.267540180057368, "learning_rate": 9.496474197842238e-06, "loss": 0.6702, "step": 5539 }, { "epoch": 0.16979281598626947, "grad_norm": 1.3063406918372777, "learning_rate": 9.496257114619085e-06, "loss": 0.7354, "step": 5540 }, { "epoch": 0.16982346450901067, "grad_norm": 0.5328612383539092, "learning_rate": 9.496039987093162e-06, "loss": 0.4505, "step": 5541 }, { "epoch": 0.16985411303175188, "grad_norm": 1.425945993835667, "learning_rate": 9.495822815266605e-06, "loss": 0.6421, "step": 5542 }, { "epoch": 0.16988476155449309, "grad_norm": 1.2872453344235433, "learning_rate": 9.495605599141555e-06, "loss": 0.7794, "step": 5543 }, { "epoch": 0.16991541007723426, "grad_norm": 1.3970523547351716, "learning_rate": 9.495388338720155e-06, "loss": 0.658, "step": 5544 }, { "epoch": 0.16994605859997547, "grad_norm": 1.3752653035865885, "learning_rate": 9.495171034004542e-06, "loss": 0.7322, "step": 5545 }, { "epoch": 0.16997670712271667, "grad_norm": 0.48313593663574267, "learning_rate": 9.494953684996859e-06, "loss": 0.4486, "step": 5546 }, { "epoch": 0.17000735564545788, "grad_norm": 1.5100887108260557, "learning_rate": 9.494736291699247e-06, "loss": 0.7102, "step": 5547 }, { "epoch": 0.1700380041681991, "grad_norm": 1.331213217165913, "learning_rate": 9.49451885411385e-06, "loss": 0.7504, "step": 5548 }, { "epoch": 0.1700686526909403, "grad_norm": 1.278712321204489, "learning_rate": 9.494301372242807e-06, "loss": 0.7575, "step": 5549 }, { "epoch": 0.1700993012136815, "grad_norm": 1.245302451441725, "learning_rate": 9.494083846088263e-06, "loss": 0.7488, "step": 5550 }, { "epoch": 0.1701299497364227, "grad_norm": 1.5061945597145903, "learning_rate": 9.493866275652359e-06, "loss": 0.6706, "step": 5551 }, { "epoch": 0.1701605982591639, "grad_norm": 1.6461924347267765, "learning_rate": 9.493648660937244e-06, "loss": 0.7664, "step": 5552 }, { "epoch": 0.17019124678190511, "grad_norm": 1.3173423629614172, "learning_rate": 9.493431001945056e-06, "loss": 0.7116, "step": 5553 }, { "epoch": 0.17022189530464632, "grad_norm": 0.5449694074980025, "learning_rate": 9.493213298677945e-06, "loss": 0.4569, "step": 5554 }, { "epoch": 0.17025254382738753, "grad_norm": 1.478135322807675, "learning_rate": 9.492995551138054e-06, "loss": 0.7342, "step": 5555 }, { "epoch": 0.17028319235012873, "grad_norm": 1.5070079996824288, "learning_rate": 9.492777759327528e-06, "loss": 0.7594, "step": 5556 }, { "epoch": 0.17031384087286994, "grad_norm": 0.48539429577147497, "learning_rate": 9.492559923248512e-06, "loss": 0.4701, "step": 5557 }, { "epoch": 0.17034448939561114, "grad_norm": 1.3685303434301945, "learning_rate": 9.492342042903153e-06, "loss": 0.7702, "step": 5558 }, { "epoch": 0.17037513791835235, "grad_norm": 1.2817549937167945, "learning_rate": 9.4921241182936e-06, "loss": 0.785, "step": 5559 }, { "epoch": 0.17040578644109353, "grad_norm": 1.4172464509180966, "learning_rate": 9.491906149421998e-06, "loss": 0.8135, "step": 5560 }, { "epoch": 0.17043643496383473, "grad_norm": 1.31232645498959, "learning_rate": 9.491688136290496e-06, "loss": 0.6137, "step": 5561 }, { "epoch": 0.17046708348657594, "grad_norm": 0.5175099374288072, "learning_rate": 9.491470078901241e-06, "loss": 0.4453, "step": 5562 }, { "epoch": 0.17049773200931714, "grad_norm": 0.4861529063971239, "learning_rate": 9.491251977256383e-06, "loss": 0.4644, "step": 5563 }, { "epoch": 0.17052838053205835, "grad_norm": 1.313184292267549, "learning_rate": 9.49103383135807e-06, "loss": 0.7649, "step": 5564 }, { "epoch": 0.17055902905479955, "grad_norm": 1.260445108387929, "learning_rate": 9.49081564120845e-06, "loss": 0.6729, "step": 5565 }, { "epoch": 0.17058967757754076, "grad_norm": 0.46023767954622435, "learning_rate": 9.490597406809676e-06, "loss": 0.4578, "step": 5566 }, { "epoch": 0.17062032610028197, "grad_norm": 1.31485892711154, "learning_rate": 9.490379128163897e-06, "loss": 0.6091, "step": 5567 }, { "epoch": 0.17065097462302317, "grad_norm": 1.8067064363605965, "learning_rate": 9.490160805273262e-06, "loss": 0.7408, "step": 5568 }, { "epoch": 0.17068162314576438, "grad_norm": 1.34275355718046, "learning_rate": 9.489942438139925e-06, "loss": 0.743, "step": 5569 }, { "epoch": 0.17071227166850558, "grad_norm": 1.3749272957620318, "learning_rate": 9.489724026766037e-06, "loss": 0.8036, "step": 5570 }, { "epoch": 0.1707429201912468, "grad_norm": 1.9987407533962243, "learning_rate": 9.489505571153747e-06, "loss": 0.8167, "step": 5571 }, { "epoch": 0.170773568713988, "grad_norm": 0.5487044798612338, "learning_rate": 9.489287071305212e-06, "loss": 0.4732, "step": 5572 }, { "epoch": 0.1708042172367292, "grad_norm": 1.3107332981724023, "learning_rate": 9.489068527222583e-06, "loss": 0.7754, "step": 5573 }, { "epoch": 0.1708348657594704, "grad_norm": 1.3045178227425875, "learning_rate": 9.488849938908011e-06, "loss": 0.638, "step": 5574 }, { "epoch": 0.17086551428221158, "grad_norm": 0.46683261857123387, "learning_rate": 9.488631306363654e-06, "loss": 0.4617, "step": 5575 }, { "epoch": 0.1708961628049528, "grad_norm": 1.2165838274984841, "learning_rate": 9.488412629591663e-06, "loss": 0.7712, "step": 5576 }, { "epoch": 0.170926811327694, "grad_norm": 1.302590553580393, "learning_rate": 9.488193908594195e-06, "loss": 0.6407, "step": 5577 }, { "epoch": 0.1709574598504352, "grad_norm": 1.5224498461409712, "learning_rate": 9.487975143373404e-06, "loss": 0.7273, "step": 5578 }, { "epoch": 0.1709881083731764, "grad_norm": 1.2457901423793591, "learning_rate": 9.487756333931446e-06, "loss": 0.7134, "step": 5579 }, { "epoch": 0.1710187568959176, "grad_norm": 1.3132000989742962, "learning_rate": 9.487537480270474e-06, "loss": 0.6165, "step": 5580 }, { "epoch": 0.17104940541865882, "grad_norm": 1.2025478829273608, "learning_rate": 9.48731858239265e-06, "loss": 0.7174, "step": 5581 }, { "epoch": 0.17108005394140002, "grad_norm": 1.3680039813531744, "learning_rate": 9.487099640300126e-06, "loss": 0.7327, "step": 5582 }, { "epoch": 0.17111070246414123, "grad_norm": 1.4942550310459168, "learning_rate": 9.486880653995063e-06, "loss": 0.8328, "step": 5583 }, { "epoch": 0.17114135098688243, "grad_norm": 0.4965630703346118, "learning_rate": 9.486661623479616e-06, "loss": 0.4554, "step": 5584 }, { "epoch": 0.17117199950962364, "grad_norm": 1.5222365249928589, "learning_rate": 9.486442548755942e-06, "loss": 0.7992, "step": 5585 }, { "epoch": 0.17120264803236485, "grad_norm": 1.363823248805395, "learning_rate": 9.486223429826205e-06, "loss": 0.6473, "step": 5586 }, { "epoch": 0.17123329655510605, "grad_norm": 1.3541041938575247, "learning_rate": 9.486004266692558e-06, "loss": 0.752, "step": 5587 }, { "epoch": 0.17126394507784726, "grad_norm": 1.4867127228192076, "learning_rate": 9.485785059357166e-06, "loss": 0.7357, "step": 5588 }, { "epoch": 0.17129459360058846, "grad_norm": 1.218860134757822, "learning_rate": 9.485565807822183e-06, "loss": 0.6317, "step": 5589 }, { "epoch": 0.17132524212332967, "grad_norm": 1.2433790199132595, "learning_rate": 9.485346512089775e-06, "loss": 0.8004, "step": 5590 }, { "epoch": 0.17135589064607085, "grad_norm": 1.1328458325903212, "learning_rate": 9.485127172162098e-06, "loss": 0.5664, "step": 5591 }, { "epoch": 0.17138653916881205, "grad_norm": 1.271541408414005, "learning_rate": 9.484907788041318e-06, "loss": 0.8373, "step": 5592 }, { "epoch": 0.17141718769155326, "grad_norm": 1.3752874665478827, "learning_rate": 9.484688359729592e-06, "loss": 0.6872, "step": 5593 }, { "epoch": 0.17144783621429446, "grad_norm": 1.2868356563267092, "learning_rate": 9.484468887229085e-06, "loss": 0.6797, "step": 5594 }, { "epoch": 0.17147848473703567, "grad_norm": 1.2481580397692016, "learning_rate": 9.484249370541958e-06, "loss": 0.717, "step": 5595 }, { "epoch": 0.17150913325977687, "grad_norm": 1.3516195485470974, "learning_rate": 9.484029809670377e-06, "loss": 0.7185, "step": 5596 }, { "epoch": 0.17153978178251808, "grad_norm": 1.1431599375783643, "learning_rate": 9.483810204616498e-06, "loss": 0.6221, "step": 5597 }, { "epoch": 0.17157043030525929, "grad_norm": 1.2383022539936317, "learning_rate": 9.483590555382493e-06, "loss": 0.7015, "step": 5598 }, { "epoch": 0.1716010788280005, "grad_norm": 1.3800869876391308, "learning_rate": 9.483370861970525e-06, "loss": 0.7485, "step": 5599 }, { "epoch": 0.1716317273507417, "grad_norm": 1.1907291234305741, "learning_rate": 9.483151124382755e-06, "loss": 0.7453, "step": 5600 }, { "epoch": 0.1716623758734829, "grad_norm": 1.3164003255250163, "learning_rate": 9.48293134262135e-06, "loss": 0.6509, "step": 5601 }, { "epoch": 0.1716930243962241, "grad_norm": 1.2983756873443772, "learning_rate": 9.482711516688475e-06, "loss": 0.7922, "step": 5602 }, { "epoch": 0.17172367291896531, "grad_norm": 1.2473189784125043, "learning_rate": 9.482491646586297e-06, "loss": 0.7789, "step": 5603 }, { "epoch": 0.17175432144170652, "grad_norm": 1.524580971064935, "learning_rate": 9.48227173231698e-06, "loss": 0.7261, "step": 5604 }, { "epoch": 0.17178496996444773, "grad_norm": 1.217124322500425, "learning_rate": 9.482051773882695e-06, "loss": 0.6695, "step": 5605 }, { "epoch": 0.1718156184871889, "grad_norm": 1.3516508723289682, "learning_rate": 9.481831771285606e-06, "loss": 0.7001, "step": 5606 }, { "epoch": 0.1718462670099301, "grad_norm": 0.5571718188589034, "learning_rate": 9.48161172452788e-06, "loss": 0.4906, "step": 5607 }, { "epoch": 0.17187691553267131, "grad_norm": 0.47957027956034065, "learning_rate": 9.481391633611689e-06, "loss": 0.4579, "step": 5608 }, { "epoch": 0.17190756405541252, "grad_norm": 1.2939019051734841, "learning_rate": 9.4811714985392e-06, "loss": 0.6304, "step": 5609 }, { "epoch": 0.17193821257815373, "grad_norm": 1.4616501293730912, "learning_rate": 9.480951319312582e-06, "loss": 0.7205, "step": 5610 }, { "epoch": 0.17196886110089493, "grad_norm": 1.383187085256904, "learning_rate": 9.480731095934003e-06, "loss": 0.7017, "step": 5611 }, { "epoch": 0.17199950962363614, "grad_norm": 1.370478682035714, "learning_rate": 9.480510828405636e-06, "loss": 0.6867, "step": 5612 }, { "epoch": 0.17203015814637734, "grad_norm": 1.2685369981953032, "learning_rate": 9.480290516729648e-06, "loss": 0.7258, "step": 5613 }, { "epoch": 0.17206080666911855, "grad_norm": 1.3985159099501563, "learning_rate": 9.480070160908212e-06, "loss": 0.7151, "step": 5614 }, { "epoch": 0.17209145519185975, "grad_norm": 1.3300136128166893, "learning_rate": 9.479849760943498e-06, "loss": 0.7197, "step": 5615 }, { "epoch": 0.17212210371460096, "grad_norm": 1.2638078362623162, "learning_rate": 9.479629316837676e-06, "loss": 0.7161, "step": 5616 }, { "epoch": 0.17215275223734217, "grad_norm": 1.2219537771959372, "learning_rate": 9.479408828592923e-06, "loss": 0.7486, "step": 5617 }, { "epoch": 0.17218340076008337, "grad_norm": 1.3928025252137448, "learning_rate": 9.479188296211407e-06, "loss": 0.7705, "step": 5618 }, { "epoch": 0.17221404928282458, "grad_norm": 1.4105316151771472, "learning_rate": 9.478967719695303e-06, "loss": 0.6948, "step": 5619 }, { "epoch": 0.17224469780556578, "grad_norm": 1.2438584389090404, "learning_rate": 9.478747099046786e-06, "loss": 0.6479, "step": 5620 }, { "epoch": 0.172275346328307, "grad_norm": 0.578464551215894, "learning_rate": 9.478526434268026e-06, "loss": 0.4717, "step": 5621 }, { "epoch": 0.17230599485104817, "grad_norm": 1.3013781700794589, "learning_rate": 9.478305725361198e-06, "loss": 0.667, "step": 5622 }, { "epoch": 0.17233664337378937, "grad_norm": 0.516140371180163, "learning_rate": 9.478084972328481e-06, "loss": 0.4633, "step": 5623 }, { "epoch": 0.17236729189653058, "grad_norm": 1.3808308429649545, "learning_rate": 9.477864175172044e-06, "loss": 0.7453, "step": 5624 }, { "epoch": 0.17239794041927178, "grad_norm": 1.146717832395344, "learning_rate": 9.477643333894067e-06, "loss": 0.7466, "step": 5625 }, { "epoch": 0.172428588942013, "grad_norm": 1.3473660194991601, "learning_rate": 9.477422448496724e-06, "loss": 0.7615, "step": 5626 }, { "epoch": 0.1724592374647542, "grad_norm": 0.4607949518499756, "learning_rate": 9.477201518982193e-06, "loss": 0.4762, "step": 5627 }, { "epoch": 0.1724898859874954, "grad_norm": 1.4068115251940738, "learning_rate": 9.47698054535265e-06, "loss": 0.7155, "step": 5628 }, { "epoch": 0.1725205345102366, "grad_norm": 1.2585074991717187, "learning_rate": 9.47675952761027e-06, "loss": 0.703, "step": 5629 }, { "epoch": 0.1725511830329778, "grad_norm": 1.2143066366329738, "learning_rate": 9.476538465757236e-06, "loss": 0.6602, "step": 5630 }, { "epoch": 0.17258183155571902, "grad_norm": 1.3368434752402207, "learning_rate": 9.47631735979572e-06, "loss": 0.7686, "step": 5631 }, { "epoch": 0.17261248007846022, "grad_norm": 1.3250565583443727, "learning_rate": 9.476096209727907e-06, "loss": 0.6936, "step": 5632 }, { "epoch": 0.17264312860120143, "grad_norm": 0.5883176363193691, "learning_rate": 9.47587501555597e-06, "loss": 0.4862, "step": 5633 }, { "epoch": 0.17267377712394263, "grad_norm": 0.5136052610036456, "learning_rate": 9.475653777282093e-06, "loss": 0.4598, "step": 5634 }, { "epoch": 0.17270442564668384, "grad_norm": 1.2903007166162441, "learning_rate": 9.475432494908454e-06, "loss": 0.6562, "step": 5635 }, { "epoch": 0.17273507416942505, "grad_norm": 1.6210062109399908, "learning_rate": 9.475211168437234e-06, "loss": 0.7672, "step": 5636 }, { "epoch": 0.17276572269216625, "grad_norm": 1.2898008438586948, "learning_rate": 9.474989797870611e-06, "loss": 0.5472, "step": 5637 }, { "epoch": 0.17279637121490743, "grad_norm": 1.2363038452241308, "learning_rate": 9.47476838321077e-06, "loss": 0.7118, "step": 5638 }, { "epoch": 0.17282701973764864, "grad_norm": 1.3658982306615297, "learning_rate": 9.474546924459892e-06, "loss": 0.7495, "step": 5639 }, { "epoch": 0.17285766826038984, "grad_norm": 1.3751438803400893, "learning_rate": 9.474325421620158e-06, "loss": 0.7761, "step": 5640 }, { "epoch": 0.17288831678313105, "grad_norm": 1.9637725041964156, "learning_rate": 9.47410387469375e-06, "loss": 0.6919, "step": 5641 }, { "epoch": 0.17291896530587225, "grad_norm": 1.2776972347121078, "learning_rate": 9.473882283682852e-06, "loss": 0.8785, "step": 5642 }, { "epoch": 0.17294961382861346, "grad_norm": 1.230139401502492, "learning_rate": 9.473660648589648e-06, "loss": 0.6943, "step": 5643 }, { "epoch": 0.17298026235135466, "grad_norm": 1.2768370659191588, "learning_rate": 9.47343896941632e-06, "loss": 0.7617, "step": 5644 }, { "epoch": 0.17301091087409587, "grad_norm": 1.3209634333730575, "learning_rate": 9.473217246165055e-06, "loss": 0.7504, "step": 5645 }, { "epoch": 0.17304155939683707, "grad_norm": 1.3281831653876561, "learning_rate": 9.472995478838034e-06, "loss": 0.7556, "step": 5646 }, { "epoch": 0.17307220791957828, "grad_norm": 1.2964622214729957, "learning_rate": 9.472773667437444e-06, "loss": 0.7243, "step": 5647 }, { "epoch": 0.17310285644231949, "grad_norm": 1.2957234933502288, "learning_rate": 9.47255181196547e-06, "loss": 0.7192, "step": 5648 }, { "epoch": 0.1731335049650607, "grad_norm": 1.3949506636538116, "learning_rate": 9.4723299124243e-06, "loss": 0.7868, "step": 5649 }, { "epoch": 0.1731641534878019, "grad_norm": 1.4543131013821846, "learning_rate": 9.47210796881612e-06, "loss": 0.7479, "step": 5650 }, { "epoch": 0.1731948020105431, "grad_norm": 1.3247667255399138, "learning_rate": 9.471885981143114e-06, "loss": 0.7398, "step": 5651 }, { "epoch": 0.1732254505332843, "grad_norm": 1.0089460701706632, "learning_rate": 9.471663949407472e-06, "loss": 0.4817, "step": 5652 }, { "epoch": 0.1732560990560255, "grad_norm": 1.2954766937419708, "learning_rate": 9.471441873611382e-06, "loss": 0.6836, "step": 5653 }, { "epoch": 0.1732867475787667, "grad_norm": 1.4307831361060592, "learning_rate": 9.47121975375703e-06, "loss": 0.7354, "step": 5654 }, { "epoch": 0.1733173961015079, "grad_norm": 1.2612795477152081, "learning_rate": 9.470997589846607e-06, "loss": 0.8109, "step": 5655 }, { "epoch": 0.1733480446242491, "grad_norm": 1.349210973342521, "learning_rate": 9.4707753818823e-06, "loss": 0.6991, "step": 5656 }, { "epoch": 0.1733786931469903, "grad_norm": 1.4366114068987903, "learning_rate": 9.470553129866297e-06, "loss": 0.7635, "step": 5657 }, { "epoch": 0.17340934166973151, "grad_norm": 1.179462292017407, "learning_rate": 9.470330833800794e-06, "loss": 0.703, "step": 5658 }, { "epoch": 0.17343999019247272, "grad_norm": 0.63518481939105, "learning_rate": 9.470108493687976e-06, "loss": 0.4799, "step": 5659 }, { "epoch": 0.17347063871521393, "grad_norm": 1.256003137571788, "learning_rate": 9.469886109530034e-06, "loss": 0.6814, "step": 5660 }, { "epoch": 0.17350128723795513, "grad_norm": 1.4311158291833748, "learning_rate": 9.469663681329161e-06, "loss": 0.7081, "step": 5661 }, { "epoch": 0.17353193576069634, "grad_norm": 1.3129092204728143, "learning_rate": 9.469441209087549e-06, "loss": 0.7356, "step": 5662 }, { "epoch": 0.17356258428343754, "grad_norm": 1.428434146459372, "learning_rate": 9.469218692807389e-06, "loss": 0.773, "step": 5663 }, { "epoch": 0.17359323280617875, "grad_norm": 1.2879018588196585, "learning_rate": 9.468996132490874e-06, "loss": 0.7759, "step": 5664 }, { "epoch": 0.17362388132891995, "grad_norm": 1.1406263604774731, "learning_rate": 9.468773528140195e-06, "loss": 0.688, "step": 5665 }, { "epoch": 0.17365452985166116, "grad_norm": 0.5849892823445699, "learning_rate": 9.46855087975755e-06, "loss": 0.4675, "step": 5666 }, { "epoch": 0.17368517837440237, "grad_norm": 0.4846604413365972, "learning_rate": 9.468328187345128e-06, "loss": 0.4807, "step": 5667 }, { "epoch": 0.17371582689714357, "grad_norm": 1.3409620090698955, "learning_rate": 9.468105450905125e-06, "loss": 0.7192, "step": 5668 }, { "epoch": 0.17374647541988475, "grad_norm": 1.43920673708852, "learning_rate": 9.467882670439736e-06, "loss": 0.7562, "step": 5669 }, { "epoch": 0.17377712394262596, "grad_norm": 1.2294408717391443, "learning_rate": 9.467659845951156e-06, "loss": 0.6272, "step": 5670 }, { "epoch": 0.17380777246536716, "grad_norm": 1.430067290632522, "learning_rate": 9.46743697744158e-06, "loss": 0.7947, "step": 5671 }, { "epoch": 0.17383842098810837, "grad_norm": 1.23646745666193, "learning_rate": 9.467214064913205e-06, "loss": 0.671, "step": 5672 }, { "epoch": 0.17386906951084957, "grad_norm": 1.2402919954890015, "learning_rate": 9.466991108368226e-06, "loss": 0.7483, "step": 5673 }, { "epoch": 0.17389971803359078, "grad_norm": 1.3795687061400856, "learning_rate": 9.466768107808842e-06, "loss": 0.7121, "step": 5674 }, { "epoch": 0.17393036655633198, "grad_norm": 1.4536568535872438, "learning_rate": 9.466545063237248e-06, "loss": 0.7325, "step": 5675 }, { "epoch": 0.1739610150790732, "grad_norm": 1.4390709786403728, "learning_rate": 9.466321974655644e-06, "loss": 0.7296, "step": 5676 }, { "epoch": 0.1739916636018144, "grad_norm": 1.2881093292423431, "learning_rate": 9.466098842066224e-06, "loss": 0.7545, "step": 5677 }, { "epoch": 0.1740223121245556, "grad_norm": 1.3763504068042696, "learning_rate": 9.465875665471193e-06, "loss": 0.7352, "step": 5678 }, { "epoch": 0.1740529606472968, "grad_norm": 1.4369661003606276, "learning_rate": 9.465652444872744e-06, "loss": 0.6756, "step": 5679 }, { "epoch": 0.174083609170038, "grad_norm": 1.4249555880760256, "learning_rate": 9.46542918027308e-06, "loss": 0.6768, "step": 5680 }, { "epoch": 0.17411425769277922, "grad_norm": 1.3234431663775388, "learning_rate": 9.465205871674399e-06, "loss": 0.6471, "step": 5681 }, { "epoch": 0.17414490621552042, "grad_norm": 1.160411378000852, "learning_rate": 9.464982519078903e-06, "loss": 0.6978, "step": 5682 }, { "epoch": 0.17417555473826163, "grad_norm": 0.9098227031205262, "learning_rate": 9.46475912248879e-06, "loss": 0.4649, "step": 5683 }, { "epoch": 0.1742062032610028, "grad_norm": 1.4346328083781341, "learning_rate": 9.464535681906264e-06, "loss": 0.7377, "step": 5684 }, { "epoch": 0.174236851783744, "grad_norm": 1.3887498674496257, "learning_rate": 9.464312197333526e-06, "loss": 0.6972, "step": 5685 }, { "epoch": 0.17426750030648522, "grad_norm": 0.5380655473321074, "learning_rate": 9.464088668772777e-06, "loss": 0.4534, "step": 5686 }, { "epoch": 0.17429814882922642, "grad_norm": 1.3502982256463716, "learning_rate": 9.463865096226221e-06, "loss": 0.7367, "step": 5687 }, { "epoch": 0.17432879735196763, "grad_norm": 1.3113489202711008, "learning_rate": 9.46364147969606e-06, "loss": 0.6956, "step": 5688 }, { "epoch": 0.17435944587470883, "grad_norm": 1.4002408499215564, "learning_rate": 9.463417819184498e-06, "loss": 0.7353, "step": 5689 }, { "epoch": 0.17439009439745004, "grad_norm": 1.215214815840239, "learning_rate": 9.463194114693736e-06, "loss": 0.6794, "step": 5690 }, { "epoch": 0.17442074292019125, "grad_norm": 0.6593866079948408, "learning_rate": 9.462970366225983e-06, "loss": 0.4808, "step": 5691 }, { "epoch": 0.17445139144293245, "grad_norm": 1.3812079709247371, "learning_rate": 9.46274657378344e-06, "loss": 0.7315, "step": 5692 }, { "epoch": 0.17448203996567366, "grad_norm": 1.4363981775979657, "learning_rate": 9.462522737368311e-06, "loss": 0.6755, "step": 5693 }, { "epoch": 0.17451268848841486, "grad_norm": 1.3909885350714943, "learning_rate": 9.462298856982808e-06, "loss": 0.6331, "step": 5694 }, { "epoch": 0.17454333701115607, "grad_norm": 1.3486315951419245, "learning_rate": 9.46207493262913e-06, "loss": 0.6623, "step": 5695 }, { "epoch": 0.17457398553389727, "grad_norm": 1.3679744220370689, "learning_rate": 9.461850964309485e-06, "loss": 0.8024, "step": 5696 }, { "epoch": 0.17460463405663848, "grad_norm": 1.3957154193334051, "learning_rate": 9.461626952026083e-06, "loss": 0.6771, "step": 5697 }, { "epoch": 0.17463528257937969, "grad_norm": 1.2859549934280652, "learning_rate": 9.46140289578113e-06, "loss": 0.7495, "step": 5698 }, { "epoch": 0.1746659311021209, "grad_norm": 1.514141388569359, "learning_rate": 9.461178795576829e-06, "loss": 0.7353, "step": 5699 }, { "epoch": 0.17469657962486207, "grad_norm": 1.4486771684838733, "learning_rate": 9.460954651415395e-06, "loss": 0.6279, "step": 5700 }, { "epoch": 0.17472722814760328, "grad_norm": 1.1939329095382338, "learning_rate": 9.460730463299032e-06, "loss": 0.7386, "step": 5701 }, { "epoch": 0.17475787667034448, "grad_norm": 1.2875903203854102, "learning_rate": 9.46050623122995e-06, "loss": 0.7065, "step": 5702 }, { "epoch": 0.1747885251930857, "grad_norm": 1.437954070069678, "learning_rate": 9.46028195521036e-06, "loss": 0.8177, "step": 5703 }, { "epoch": 0.1748191737158269, "grad_norm": 1.4182022106374421, "learning_rate": 9.46005763524247e-06, "loss": 0.6607, "step": 5704 }, { "epoch": 0.1748498222385681, "grad_norm": 1.35004173932661, "learning_rate": 9.459833271328491e-06, "loss": 0.7427, "step": 5705 }, { "epoch": 0.1748804707613093, "grad_norm": 1.5616205208087108, "learning_rate": 9.459608863470635e-06, "loss": 0.7904, "step": 5706 }, { "epoch": 0.1749111192840505, "grad_norm": 0.6603320115532495, "learning_rate": 9.45938441167111e-06, "loss": 0.4759, "step": 5707 }, { "epoch": 0.17494176780679171, "grad_norm": 1.346633781246226, "learning_rate": 9.45915991593213e-06, "loss": 0.7645, "step": 5708 }, { "epoch": 0.17497241632953292, "grad_norm": 1.3029609504642732, "learning_rate": 9.458935376255907e-06, "loss": 0.7554, "step": 5709 }, { "epoch": 0.17500306485227413, "grad_norm": 1.2418210999199035, "learning_rate": 9.458710792644652e-06, "loss": 0.7241, "step": 5710 }, { "epoch": 0.17503371337501533, "grad_norm": 1.3029094893647681, "learning_rate": 9.45848616510058e-06, "loss": 0.555, "step": 5711 }, { "epoch": 0.17506436189775654, "grad_norm": 1.3808359280118438, "learning_rate": 9.458261493625903e-06, "loss": 0.6929, "step": 5712 }, { "epoch": 0.17509501042049774, "grad_norm": 0.49572308944848864, "learning_rate": 9.458036778222833e-06, "loss": 0.4751, "step": 5713 }, { "epoch": 0.17512565894323895, "grad_norm": 1.4267617301808952, "learning_rate": 9.457812018893587e-06, "loss": 0.6487, "step": 5714 }, { "epoch": 0.17515630746598013, "grad_norm": 1.3156763527108097, "learning_rate": 9.457587215640379e-06, "loss": 0.7574, "step": 5715 }, { "epoch": 0.17518695598872133, "grad_norm": 1.283702018018665, "learning_rate": 9.457362368465424e-06, "loss": 0.7321, "step": 5716 }, { "epoch": 0.17521760451146254, "grad_norm": 0.4818703978788811, "learning_rate": 9.457137477370936e-06, "loss": 0.4861, "step": 5717 }, { "epoch": 0.17524825303420374, "grad_norm": 1.2655708602083486, "learning_rate": 9.456912542359132e-06, "loss": 0.7837, "step": 5718 }, { "epoch": 0.17527890155694495, "grad_norm": 1.3427740884722053, "learning_rate": 9.45668756343223e-06, "loss": 0.6824, "step": 5719 }, { "epoch": 0.17530955007968615, "grad_norm": 1.146550110519724, "learning_rate": 9.456462540592442e-06, "loss": 0.6019, "step": 5720 }, { "epoch": 0.17534019860242736, "grad_norm": 1.4097531846570392, "learning_rate": 9.456237473841991e-06, "loss": 0.7434, "step": 5721 }, { "epoch": 0.17537084712516857, "grad_norm": 1.5241815236549767, "learning_rate": 9.456012363183091e-06, "loss": 0.7731, "step": 5722 }, { "epoch": 0.17540149564790977, "grad_norm": 1.306241087785422, "learning_rate": 9.455787208617962e-06, "loss": 0.7369, "step": 5723 }, { "epoch": 0.17543214417065098, "grad_norm": 1.3501674999321573, "learning_rate": 9.455562010148821e-06, "loss": 0.7701, "step": 5724 }, { "epoch": 0.17546279269339218, "grad_norm": 1.2035270978337138, "learning_rate": 9.455336767777888e-06, "loss": 0.6541, "step": 5725 }, { "epoch": 0.1754934412161334, "grad_norm": 1.248080941147853, "learning_rate": 9.45511148150738e-06, "loss": 0.671, "step": 5726 }, { "epoch": 0.1755240897388746, "grad_norm": 0.6161703261245342, "learning_rate": 9.45488615133952e-06, "loss": 0.4649, "step": 5727 }, { "epoch": 0.1755547382616158, "grad_norm": 1.3633545732986438, "learning_rate": 9.454660777276528e-06, "loss": 0.7115, "step": 5728 }, { "epoch": 0.175585386784357, "grad_norm": 1.2820425637533208, "learning_rate": 9.454435359320622e-06, "loss": 0.702, "step": 5729 }, { "epoch": 0.1756160353070982, "grad_norm": 0.46656366049449793, "learning_rate": 9.454209897474025e-06, "loss": 0.4753, "step": 5730 }, { "epoch": 0.1756466838298394, "grad_norm": 0.4676285588423909, "learning_rate": 9.45398439173896e-06, "loss": 0.465, "step": 5731 }, { "epoch": 0.1756773323525806, "grad_norm": 1.2123148928449252, "learning_rate": 9.453758842117645e-06, "loss": 0.7193, "step": 5732 }, { "epoch": 0.1757079808753218, "grad_norm": 0.46782648908793323, "learning_rate": 9.453533248612305e-06, "loss": 0.4499, "step": 5733 }, { "epoch": 0.175738629398063, "grad_norm": 1.2854386890683758, "learning_rate": 9.453307611225162e-06, "loss": 0.7242, "step": 5734 }, { "epoch": 0.1757692779208042, "grad_norm": 1.6432098791205738, "learning_rate": 9.45308192995844e-06, "loss": 0.7214, "step": 5735 }, { "epoch": 0.17579992644354542, "grad_norm": 1.4353826337677227, "learning_rate": 9.452856204814364e-06, "loss": 0.6999, "step": 5736 }, { "epoch": 0.17583057496628662, "grad_norm": 1.2523817484355941, "learning_rate": 9.452630435795155e-06, "loss": 0.7002, "step": 5737 }, { "epoch": 0.17586122348902783, "grad_norm": 1.2217363117506588, "learning_rate": 9.452404622903039e-06, "loss": 0.692, "step": 5738 }, { "epoch": 0.17589187201176903, "grad_norm": 0.5215354536447909, "learning_rate": 9.452178766140241e-06, "loss": 0.4758, "step": 5739 }, { "epoch": 0.17592252053451024, "grad_norm": 1.365258424589086, "learning_rate": 9.451952865508986e-06, "loss": 0.7367, "step": 5740 }, { "epoch": 0.17595316905725145, "grad_norm": 1.317488546219128, "learning_rate": 9.451726921011501e-06, "loss": 0.7318, "step": 5741 }, { "epoch": 0.17598381757999265, "grad_norm": 1.5312669757534003, "learning_rate": 9.451500932650014e-06, "loss": 0.7972, "step": 5742 }, { "epoch": 0.17601446610273386, "grad_norm": 1.3832804178020113, "learning_rate": 9.451274900426746e-06, "loss": 0.6945, "step": 5743 }, { "epoch": 0.17604511462547506, "grad_norm": 1.2798928020114002, "learning_rate": 9.451048824343929e-06, "loss": 0.6546, "step": 5744 }, { "epoch": 0.17607576314821627, "grad_norm": 1.2664845659267092, "learning_rate": 9.450822704403788e-06, "loss": 0.7584, "step": 5745 }, { "epoch": 0.17610641167095745, "grad_norm": 1.2261640904017024, "learning_rate": 9.450596540608553e-06, "loss": 0.6388, "step": 5746 }, { "epoch": 0.17613706019369865, "grad_norm": 1.2132451493489431, "learning_rate": 9.450370332960452e-06, "loss": 0.6992, "step": 5747 }, { "epoch": 0.17616770871643986, "grad_norm": 0.5372178526832345, "learning_rate": 9.450144081461711e-06, "loss": 0.4709, "step": 5748 }, { "epoch": 0.17619835723918106, "grad_norm": 1.282735225068405, "learning_rate": 9.449917786114564e-06, "loss": 0.6445, "step": 5749 }, { "epoch": 0.17622900576192227, "grad_norm": 1.367639468303448, "learning_rate": 9.449691446921238e-06, "loss": 0.69, "step": 5750 }, { "epoch": 0.17625965428466348, "grad_norm": 0.49723674513989236, "learning_rate": 9.449465063883964e-06, "loss": 0.4545, "step": 5751 }, { "epoch": 0.17629030280740468, "grad_norm": 1.3979421311031404, "learning_rate": 9.449238637004973e-06, "loss": 0.7501, "step": 5752 }, { "epoch": 0.1763209513301459, "grad_norm": 1.3070797018916738, "learning_rate": 9.449012166286493e-06, "loss": 0.6641, "step": 5753 }, { "epoch": 0.1763515998528871, "grad_norm": 1.404868685073891, "learning_rate": 9.44878565173076e-06, "loss": 0.7125, "step": 5754 }, { "epoch": 0.1763822483756283, "grad_norm": 1.2538401436987021, "learning_rate": 9.448559093340003e-06, "loss": 0.6657, "step": 5755 }, { "epoch": 0.1764128968983695, "grad_norm": 1.5643314541619258, "learning_rate": 9.448332491116454e-06, "loss": 0.8119, "step": 5756 }, { "epoch": 0.1764435454211107, "grad_norm": 1.4868502731739524, "learning_rate": 9.448105845062348e-06, "loss": 0.6956, "step": 5757 }, { "epoch": 0.17647419394385191, "grad_norm": 1.2242742975291048, "learning_rate": 9.447879155179916e-06, "loss": 0.7568, "step": 5758 }, { "epoch": 0.17650484246659312, "grad_norm": 1.5066782115265485, "learning_rate": 9.447652421471394e-06, "loss": 0.6557, "step": 5759 }, { "epoch": 0.17653549098933433, "grad_norm": 1.3119407882535445, "learning_rate": 9.447425643939014e-06, "loss": 0.7694, "step": 5760 }, { "epoch": 0.17656613951207553, "grad_norm": 1.4634633204031706, "learning_rate": 9.447198822585011e-06, "loss": 0.7095, "step": 5761 }, { "epoch": 0.1765967880348167, "grad_norm": 1.3647546392240648, "learning_rate": 9.44697195741162e-06, "loss": 0.748, "step": 5762 }, { "epoch": 0.17662743655755792, "grad_norm": 0.5961821194146698, "learning_rate": 9.446745048421077e-06, "loss": 0.4467, "step": 5763 }, { "epoch": 0.17665808508029912, "grad_norm": 1.3294956374155475, "learning_rate": 9.446518095615618e-06, "loss": 0.7524, "step": 5764 }, { "epoch": 0.17668873360304033, "grad_norm": 1.440756835703963, "learning_rate": 9.446291098997477e-06, "loss": 0.7669, "step": 5765 }, { "epoch": 0.17671938212578153, "grad_norm": 0.48469853155441717, "learning_rate": 9.446064058568894e-06, "loss": 0.4545, "step": 5766 }, { "epoch": 0.17675003064852274, "grad_norm": 1.1511636440495108, "learning_rate": 9.445836974332103e-06, "loss": 0.724, "step": 5767 }, { "epoch": 0.17678067917126394, "grad_norm": 1.386492882112317, "learning_rate": 9.445609846289342e-06, "loss": 0.8423, "step": 5768 }, { "epoch": 0.17681132769400515, "grad_norm": 1.604932869177489, "learning_rate": 9.44538267444285e-06, "loss": 0.7798, "step": 5769 }, { "epoch": 0.17684197621674635, "grad_norm": 1.2743585526523447, "learning_rate": 9.445155458794867e-06, "loss": 0.7409, "step": 5770 }, { "epoch": 0.17687262473948756, "grad_norm": 1.1968835412697305, "learning_rate": 9.444928199347627e-06, "loss": 0.7925, "step": 5771 }, { "epoch": 0.17690327326222877, "grad_norm": 1.2353659991845567, "learning_rate": 9.444700896103373e-06, "loss": 0.7678, "step": 5772 }, { "epoch": 0.17693392178496997, "grad_norm": 1.469001326606356, "learning_rate": 9.444473549064346e-06, "loss": 0.6832, "step": 5773 }, { "epoch": 0.17696457030771118, "grad_norm": 1.2661031331426695, "learning_rate": 9.444246158232783e-06, "loss": 0.6625, "step": 5774 }, { "epoch": 0.17699521883045238, "grad_norm": 1.2906046305403884, "learning_rate": 9.444018723610925e-06, "loss": 0.744, "step": 5775 }, { "epoch": 0.1770258673531936, "grad_norm": 1.323210992573251, "learning_rate": 9.443791245201013e-06, "loss": 0.7048, "step": 5776 }, { "epoch": 0.17705651587593477, "grad_norm": 1.301735494553793, "learning_rate": 9.443563723005288e-06, "loss": 0.7337, "step": 5777 }, { "epoch": 0.17708716439867597, "grad_norm": 1.4831019757646076, "learning_rate": 9.443336157025995e-06, "loss": 0.6079, "step": 5778 }, { "epoch": 0.17711781292141718, "grad_norm": 1.4173486091469187, "learning_rate": 9.443108547265375e-06, "loss": 0.7167, "step": 5779 }, { "epoch": 0.17714846144415838, "grad_norm": 1.4894654523363933, "learning_rate": 9.442880893725667e-06, "loss": 0.722, "step": 5780 }, { "epoch": 0.1771791099668996, "grad_norm": 1.5294717195687295, "learning_rate": 9.442653196409117e-06, "loss": 0.6203, "step": 5781 }, { "epoch": 0.1772097584896408, "grad_norm": 1.3075438036178193, "learning_rate": 9.44242545531797e-06, "loss": 0.8001, "step": 5782 }, { "epoch": 0.177240407012382, "grad_norm": 1.4027833654455928, "learning_rate": 9.442197670454466e-06, "loss": 0.7401, "step": 5783 }, { "epoch": 0.1772710555351232, "grad_norm": 1.3077881239507048, "learning_rate": 9.441969841820853e-06, "loss": 0.6869, "step": 5784 }, { "epoch": 0.1773017040578644, "grad_norm": 1.2672954731893085, "learning_rate": 9.441741969419374e-06, "loss": 0.7304, "step": 5785 }, { "epoch": 0.17733235258060562, "grad_norm": 1.2378802260663726, "learning_rate": 9.441514053252276e-06, "loss": 0.7217, "step": 5786 }, { "epoch": 0.17736300110334682, "grad_norm": 1.378784177532091, "learning_rate": 9.441286093321803e-06, "loss": 0.7162, "step": 5787 }, { "epoch": 0.17739364962608803, "grad_norm": 1.3144814381298022, "learning_rate": 9.441058089630201e-06, "loss": 0.7317, "step": 5788 }, { "epoch": 0.17742429814882923, "grad_norm": 1.2631641229988648, "learning_rate": 9.44083004217972e-06, "loss": 0.6974, "step": 5789 }, { "epoch": 0.17745494667157044, "grad_norm": 1.2938320619146055, "learning_rate": 9.440601950972603e-06, "loss": 0.68, "step": 5790 }, { "epoch": 0.17748559519431165, "grad_norm": 1.2324749210058215, "learning_rate": 9.440373816011097e-06, "loss": 0.7011, "step": 5791 }, { "epoch": 0.17751624371705285, "grad_norm": 1.4807201556718688, "learning_rate": 9.440145637297453e-06, "loss": 0.744, "step": 5792 }, { "epoch": 0.17754689223979403, "grad_norm": 1.2541612556137094, "learning_rate": 9.439917414833919e-06, "loss": 0.7347, "step": 5793 }, { "epoch": 0.17757754076253524, "grad_norm": 1.4578561247206074, "learning_rate": 9.43968914862274e-06, "loss": 0.7464, "step": 5794 }, { "epoch": 0.17760818928527644, "grad_norm": 1.2281577726991666, "learning_rate": 9.439460838666172e-06, "loss": 0.7207, "step": 5795 }, { "epoch": 0.17763883780801765, "grad_norm": 1.3292077422038038, "learning_rate": 9.439232484966458e-06, "loss": 0.7233, "step": 5796 }, { "epoch": 0.17766948633075885, "grad_norm": 1.2947920676577171, "learning_rate": 9.439004087525849e-06, "loss": 0.6809, "step": 5797 }, { "epoch": 0.17770013485350006, "grad_norm": 0.7220274900020562, "learning_rate": 9.4387756463466e-06, "loss": 0.4567, "step": 5798 }, { "epoch": 0.17773078337624126, "grad_norm": 0.5785907627651738, "learning_rate": 9.438547161430957e-06, "loss": 0.4613, "step": 5799 }, { "epoch": 0.17776143189898247, "grad_norm": 0.4725663414713345, "learning_rate": 9.438318632781174e-06, "loss": 0.4717, "step": 5800 }, { "epoch": 0.17779208042172367, "grad_norm": 1.1897783202392627, "learning_rate": 9.4380900603995e-06, "loss": 0.719, "step": 5801 }, { "epoch": 0.17782272894446488, "grad_norm": 1.524962915115922, "learning_rate": 9.437861444288193e-06, "loss": 0.7817, "step": 5802 }, { "epoch": 0.1778533774672061, "grad_norm": 1.313115266974079, "learning_rate": 9.437632784449498e-06, "loss": 0.7072, "step": 5803 }, { "epoch": 0.1778840259899473, "grad_norm": 1.510474836278439, "learning_rate": 9.437404080885673e-06, "loss": 0.7099, "step": 5804 }, { "epoch": 0.1779146745126885, "grad_norm": 1.4507669914179788, "learning_rate": 9.437175333598971e-06, "loss": 0.7226, "step": 5805 }, { "epoch": 0.1779453230354297, "grad_norm": 1.2604776823652455, "learning_rate": 9.436946542591644e-06, "loss": 0.7126, "step": 5806 }, { "epoch": 0.1779759715581709, "grad_norm": 1.364451540259535, "learning_rate": 9.436717707865948e-06, "loss": 0.8322, "step": 5807 }, { "epoch": 0.1780066200809121, "grad_norm": 1.532861948793549, "learning_rate": 9.436488829424138e-06, "loss": 0.6566, "step": 5808 }, { "epoch": 0.1780372686036533, "grad_norm": 1.3193921143469178, "learning_rate": 9.436259907268466e-06, "loss": 0.7154, "step": 5809 }, { "epoch": 0.1780679171263945, "grad_norm": 1.474929734524354, "learning_rate": 9.436030941401192e-06, "loss": 0.6737, "step": 5810 }, { "epoch": 0.1780985656491357, "grad_norm": 1.1666867241224466, "learning_rate": 9.43580193182457e-06, "loss": 0.6088, "step": 5811 }, { "epoch": 0.1781292141718769, "grad_norm": 1.4465232829027947, "learning_rate": 9.435572878540857e-06, "loss": 0.6522, "step": 5812 }, { "epoch": 0.17815986269461812, "grad_norm": 1.380098156511964, "learning_rate": 9.435343781552308e-06, "loss": 0.6261, "step": 5813 }, { "epoch": 0.17819051121735932, "grad_norm": 1.3565893398122806, "learning_rate": 9.435114640861183e-06, "loss": 0.7653, "step": 5814 }, { "epoch": 0.17822115974010053, "grad_norm": 1.4325407714137446, "learning_rate": 9.434885456469739e-06, "loss": 0.6837, "step": 5815 }, { "epoch": 0.17825180826284173, "grad_norm": 1.3468899987806227, "learning_rate": 9.434656228380233e-06, "loss": 0.7692, "step": 5816 }, { "epoch": 0.17828245678558294, "grad_norm": 1.3008920879612826, "learning_rate": 9.434426956594926e-06, "loss": 0.7209, "step": 5817 }, { "epoch": 0.17831310530832414, "grad_norm": 1.3963567106880201, "learning_rate": 9.434197641116074e-06, "loss": 0.8031, "step": 5818 }, { "epoch": 0.17834375383106535, "grad_norm": 1.2766668110823023, "learning_rate": 9.433968281945939e-06, "loss": 0.8299, "step": 5819 }, { "epoch": 0.17837440235380655, "grad_norm": 1.3934453416718382, "learning_rate": 9.43373887908678e-06, "loss": 0.766, "step": 5820 }, { "epoch": 0.17840505087654776, "grad_norm": 1.3626865388220664, "learning_rate": 9.433509432540856e-06, "loss": 0.852, "step": 5821 }, { "epoch": 0.17843569939928897, "grad_norm": 1.3778085888989633, "learning_rate": 9.433279942310431e-06, "loss": 0.7725, "step": 5822 }, { "epoch": 0.17846634792203017, "grad_norm": 1.2080213830700126, "learning_rate": 9.433050408397763e-06, "loss": 0.5826, "step": 5823 }, { "epoch": 0.17849699644477135, "grad_norm": 1.2768691664565188, "learning_rate": 9.432820830805116e-06, "loss": 0.7877, "step": 5824 }, { "epoch": 0.17852764496751256, "grad_norm": 1.364185041766725, "learning_rate": 9.432591209534752e-06, "loss": 0.4881, "step": 5825 }, { "epoch": 0.17855829349025376, "grad_norm": 1.2741416009744813, "learning_rate": 9.43236154458893e-06, "loss": 0.7233, "step": 5826 }, { "epoch": 0.17858894201299497, "grad_norm": 1.2940625610253846, "learning_rate": 9.432131835969918e-06, "loss": 0.7525, "step": 5827 }, { "epoch": 0.17861959053573617, "grad_norm": 1.4005305410707534, "learning_rate": 9.431902083679976e-06, "loss": 0.7591, "step": 5828 }, { "epoch": 0.17865023905847738, "grad_norm": 1.0558397860933348, "learning_rate": 9.43167228772137e-06, "loss": 0.5653, "step": 5829 }, { "epoch": 0.17868088758121858, "grad_norm": 0.5723619109392079, "learning_rate": 9.431442448096363e-06, "loss": 0.4624, "step": 5830 }, { "epoch": 0.1787115361039598, "grad_norm": 0.6149163785956601, "learning_rate": 9.431212564807217e-06, "loss": 0.4621, "step": 5831 }, { "epoch": 0.178742184626701, "grad_norm": 1.420845408784358, "learning_rate": 9.430982637856202e-06, "loss": 0.7998, "step": 5832 }, { "epoch": 0.1787728331494422, "grad_norm": 1.241048642181417, "learning_rate": 9.430752667245581e-06, "loss": 0.6109, "step": 5833 }, { "epoch": 0.1788034816721834, "grad_norm": 1.35221529567778, "learning_rate": 9.43052265297762e-06, "loss": 0.6839, "step": 5834 }, { "epoch": 0.1788341301949246, "grad_norm": 1.3207480746136564, "learning_rate": 9.430292595054586e-06, "loss": 0.7446, "step": 5835 }, { "epoch": 0.17886477871766582, "grad_norm": 1.1990967987247172, "learning_rate": 9.430062493478746e-06, "loss": 0.6959, "step": 5836 }, { "epoch": 0.17889542724040702, "grad_norm": 1.2267946435812738, "learning_rate": 9.429832348252365e-06, "loss": 0.7271, "step": 5837 }, { "epoch": 0.17892607576314823, "grad_norm": 1.3438471938251069, "learning_rate": 9.429602159377715e-06, "loss": 0.7674, "step": 5838 }, { "epoch": 0.1789567242858894, "grad_norm": 1.3440962437165638, "learning_rate": 9.42937192685706e-06, "loss": 0.6649, "step": 5839 }, { "epoch": 0.1789873728086306, "grad_norm": 0.8235062481391893, "learning_rate": 9.42914165069267e-06, "loss": 0.4835, "step": 5840 }, { "epoch": 0.17901802133137182, "grad_norm": 1.4036792539970395, "learning_rate": 9.428911330886816e-06, "loss": 0.7478, "step": 5841 }, { "epoch": 0.17904866985411302, "grad_norm": 1.354689539333165, "learning_rate": 9.428680967441764e-06, "loss": 0.7311, "step": 5842 }, { "epoch": 0.17907931837685423, "grad_norm": 1.630158016063618, "learning_rate": 9.428450560359786e-06, "loss": 0.711, "step": 5843 }, { "epoch": 0.17910996689959544, "grad_norm": 0.5178998709288577, "learning_rate": 9.428220109643149e-06, "loss": 0.4447, "step": 5844 }, { "epoch": 0.17914061542233664, "grad_norm": 1.143054018714964, "learning_rate": 9.427989615294128e-06, "loss": 0.677, "step": 5845 }, { "epoch": 0.17917126394507785, "grad_norm": 1.5155536262959748, "learning_rate": 9.427759077314993e-06, "loss": 0.7009, "step": 5846 }, { "epoch": 0.17920191246781905, "grad_norm": 1.2289001886263535, "learning_rate": 9.427528495708015e-06, "loss": 0.6564, "step": 5847 }, { "epoch": 0.17923256099056026, "grad_norm": 1.4671859659542692, "learning_rate": 9.427297870475465e-06, "loss": 0.7666, "step": 5848 }, { "epoch": 0.17926320951330146, "grad_norm": 1.325699444486741, "learning_rate": 9.427067201619618e-06, "loss": 0.6667, "step": 5849 }, { "epoch": 0.17929385803604267, "grad_norm": 1.3177662718732226, "learning_rate": 9.426836489142743e-06, "loss": 0.7997, "step": 5850 }, { "epoch": 0.17932450655878387, "grad_norm": 1.5217982538977493, "learning_rate": 9.426605733047116e-06, "loss": 0.6759, "step": 5851 }, { "epoch": 0.17935515508152508, "grad_norm": 1.2427316746361428, "learning_rate": 9.426374933335009e-06, "loss": 0.6245, "step": 5852 }, { "epoch": 0.1793858036042663, "grad_norm": 1.285001710998159, "learning_rate": 9.4261440900087e-06, "loss": 0.5751, "step": 5853 }, { "epoch": 0.1794164521270075, "grad_norm": 1.1086975490444082, "learning_rate": 9.42591320307046e-06, "loss": 0.6176, "step": 5854 }, { "epoch": 0.17944710064974867, "grad_norm": 1.37846734404675, "learning_rate": 9.425682272522562e-06, "loss": 0.7706, "step": 5855 }, { "epoch": 0.17947774917248988, "grad_norm": 1.3449027064114132, "learning_rate": 9.425451298367287e-06, "loss": 0.7034, "step": 5856 }, { "epoch": 0.17950839769523108, "grad_norm": 0.7479893803676559, "learning_rate": 9.425220280606908e-06, "loss": 0.4763, "step": 5857 }, { "epoch": 0.1795390462179723, "grad_norm": 1.2750357288478764, "learning_rate": 9.424989219243701e-06, "loss": 0.6758, "step": 5858 }, { "epoch": 0.1795696947407135, "grad_norm": 1.2049767683410177, "learning_rate": 9.424758114279942e-06, "loss": 0.6873, "step": 5859 }, { "epoch": 0.1796003432634547, "grad_norm": 1.3045552143865984, "learning_rate": 9.42452696571791e-06, "loss": 0.6586, "step": 5860 }, { "epoch": 0.1796309917861959, "grad_norm": 0.5361059218738257, "learning_rate": 9.424295773559882e-06, "loss": 0.4524, "step": 5861 }, { "epoch": 0.1796616403089371, "grad_norm": 1.3890305898293838, "learning_rate": 9.424064537808135e-06, "loss": 0.7923, "step": 5862 }, { "epoch": 0.17969228883167832, "grad_norm": 1.390215796272282, "learning_rate": 9.42383325846495e-06, "loss": 0.7599, "step": 5863 }, { "epoch": 0.17972293735441952, "grad_norm": 1.4279865792248494, "learning_rate": 9.423601935532603e-06, "loss": 0.7636, "step": 5864 }, { "epoch": 0.17975358587716073, "grad_norm": 1.3710668387414497, "learning_rate": 9.423370569013373e-06, "loss": 0.7661, "step": 5865 }, { "epoch": 0.17978423439990193, "grad_norm": 0.5354115196713644, "learning_rate": 9.423139158909542e-06, "loss": 0.4657, "step": 5866 }, { "epoch": 0.17981488292264314, "grad_norm": 1.297335966131172, "learning_rate": 9.42290770522339e-06, "loss": 0.715, "step": 5867 }, { "epoch": 0.17984553144538434, "grad_norm": 1.326162329427426, "learning_rate": 9.422676207957195e-06, "loss": 0.7541, "step": 5868 }, { "epoch": 0.17987617996812555, "grad_norm": 1.2505478488944812, "learning_rate": 9.422444667113239e-06, "loss": 0.7142, "step": 5869 }, { "epoch": 0.17990682849086673, "grad_norm": 1.3169607734639552, "learning_rate": 9.422213082693807e-06, "loss": 0.7484, "step": 5870 }, { "epoch": 0.17993747701360793, "grad_norm": 0.5208849576804815, "learning_rate": 9.421981454701176e-06, "loss": 0.4756, "step": 5871 }, { "epoch": 0.17996812553634914, "grad_norm": 1.3055898443238112, "learning_rate": 9.421749783137632e-06, "loss": 0.7844, "step": 5872 }, { "epoch": 0.17999877405909034, "grad_norm": 1.1529421427528788, "learning_rate": 9.421518068005455e-06, "loss": 0.6366, "step": 5873 }, { "epoch": 0.18002942258183155, "grad_norm": 0.4642780732750513, "learning_rate": 9.42128630930693e-06, "loss": 0.4513, "step": 5874 }, { "epoch": 0.18006007110457276, "grad_norm": 1.221683043769109, "learning_rate": 9.421054507044339e-06, "loss": 0.6012, "step": 5875 }, { "epoch": 0.18009071962731396, "grad_norm": 1.3054484792086376, "learning_rate": 9.420822661219966e-06, "loss": 0.7729, "step": 5876 }, { "epoch": 0.18012136815005517, "grad_norm": 1.4468621266331203, "learning_rate": 9.420590771836098e-06, "loss": 0.8069, "step": 5877 }, { "epoch": 0.18015201667279637, "grad_norm": 1.3103262886136549, "learning_rate": 9.420358838895016e-06, "loss": 0.7493, "step": 5878 }, { "epoch": 0.18018266519553758, "grad_norm": 1.355876722942505, "learning_rate": 9.420126862399008e-06, "loss": 0.637, "step": 5879 }, { "epoch": 0.18021331371827878, "grad_norm": 1.3412395798440702, "learning_rate": 9.41989484235036e-06, "loss": 0.7397, "step": 5880 }, { "epoch": 0.18024396224102, "grad_norm": 1.2379908449691734, "learning_rate": 9.419662778751356e-06, "loss": 0.642, "step": 5881 }, { "epoch": 0.1802746107637612, "grad_norm": 1.2960723221556743, "learning_rate": 9.419430671604286e-06, "loss": 0.7285, "step": 5882 }, { "epoch": 0.1803052592865024, "grad_norm": 1.4239205449903556, "learning_rate": 9.419198520911433e-06, "loss": 0.7511, "step": 5883 }, { "epoch": 0.1803359078092436, "grad_norm": 1.452258055450661, "learning_rate": 9.418966326675088e-06, "loss": 0.7545, "step": 5884 }, { "epoch": 0.1803665563319848, "grad_norm": 1.3779902371740635, "learning_rate": 9.418734088897534e-06, "loss": 0.7616, "step": 5885 }, { "epoch": 0.180397204854726, "grad_norm": 1.2591655969277273, "learning_rate": 9.418501807581065e-06, "loss": 0.7323, "step": 5886 }, { "epoch": 0.1804278533774672, "grad_norm": 0.5867665170047176, "learning_rate": 9.418269482727966e-06, "loss": 0.4686, "step": 5887 }, { "epoch": 0.1804585019002084, "grad_norm": 0.5351691551013295, "learning_rate": 9.418037114340528e-06, "loss": 0.4259, "step": 5888 }, { "epoch": 0.1804891504229496, "grad_norm": 1.3522180104701524, "learning_rate": 9.41780470242104e-06, "loss": 0.7361, "step": 5889 }, { "epoch": 0.1805197989456908, "grad_norm": 1.2316386035626643, "learning_rate": 9.417572246971791e-06, "loss": 0.7561, "step": 5890 }, { "epoch": 0.18055044746843202, "grad_norm": 1.3728531900039138, "learning_rate": 9.417339747995074e-06, "loss": 0.6921, "step": 5891 }, { "epoch": 0.18058109599117322, "grad_norm": 1.2783225080033647, "learning_rate": 9.417107205493177e-06, "loss": 0.7895, "step": 5892 }, { "epoch": 0.18061174451391443, "grad_norm": 1.2189841127947556, "learning_rate": 9.416874619468393e-06, "loss": 0.623, "step": 5893 }, { "epoch": 0.18064239303665564, "grad_norm": 0.5614487650174627, "learning_rate": 9.416641989923012e-06, "loss": 0.4592, "step": 5894 }, { "epoch": 0.18067304155939684, "grad_norm": 1.2653665743369853, "learning_rate": 9.41640931685933e-06, "loss": 0.705, "step": 5895 }, { "epoch": 0.18070369008213805, "grad_norm": 1.3777478185876797, "learning_rate": 9.416176600279635e-06, "loss": 0.6747, "step": 5896 }, { "epoch": 0.18073433860487925, "grad_norm": 0.5678613878927709, "learning_rate": 9.41594384018622e-06, "loss": 0.445, "step": 5897 }, { "epoch": 0.18076498712762046, "grad_norm": 0.48953446299273573, "learning_rate": 9.415711036581385e-06, "loss": 0.4266, "step": 5898 }, { "epoch": 0.18079563565036166, "grad_norm": 1.2755143273825285, "learning_rate": 9.415478189467418e-06, "loss": 0.6869, "step": 5899 }, { "epoch": 0.18082628417310287, "grad_norm": 1.4429830566923199, "learning_rate": 9.415245298846614e-06, "loss": 0.7051, "step": 5900 }, { "epoch": 0.18085693269584405, "grad_norm": 1.404526518677746, "learning_rate": 9.415012364721267e-06, "loss": 0.7894, "step": 5901 }, { "epoch": 0.18088758121858525, "grad_norm": 0.5737586514408025, "learning_rate": 9.414779387093675e-06, "loss": 0.4645, "step": 5902 }, { "epoch": 0.18091822974132646, "grad_norm": 1.2296944642967582, "learning_rate": 9.414546365966133e-06, "loss": 0.6846, "step": 5903 }, { "epoch": 0.18094887826406766, "grad_norm": 1.444315023295182, "learning_rate": 9.414313301340936e-06, "loss": 0.792, "step": 5904 }, { "epoch": 0.18097952678680887, "grad_norm": 0.5476729596140437, "learning_rate": 9.41408019322038e-06, "loss": 0.4555, "step": 5905 }, { "epoch": 0.18101017530955008, "grad_norm": 1.4153091033356275, "learning_rate": 9.413847041606761e-06, "loss": 0.7661, "step": 5906 }, { "epoch": 0.18104082383229128, "grad_norm": 1.2517114770736386, "learning_rate": 9.413613846502379e-06, "loss": 0.714, "step": 5907 }, { "epoch": 0.1810714723550325, "grad_norm": 1.427244642958983, "learning_rate": 9.41338060790953e-06, "loss": 0.6811, "step": 5908 }, { "epoch": 0.1811021208777737, "grad_norm": 1.1043573175069772, "learning_rate": 9.413147325830513e-06, "loss": 0.5636, "step": 5909 }, { "epoch": 0.1811327694005149, "grad_norm": 1.2347345603827686, "learning_rate": 9.412914000267626e-06, "loss": 0.7206, "step": 5910 }, { "epoch": 0.1811634179232561, "grad_norm": 1.4676979210553844, "learning_rate": 9.41268063122317e-06, "loss": 0.8501, "step": 5911 }, { "epoch": 0.1811940664459973, "grad_norm": 0.5304100419104119, "learning_rate": 9.412447218699442e-06, "loss": 0.4863, "step": 5912 }, { "epoch": 0.18122471496873852, "grad_norm": 1.2716849540913693, "learning_rate": 9.41221376269874e-06, "loss": 0.8488, "step": 5913 }, { "epoch": 0.18125536349147972, "grad_norm": 1.2447094409909223, "learning_rate": 9.41198026322337e-06, "loss": 0.6852, "step": 5914 }, { "epoch": 0.18128601201422093, "grad_norm": 0.45004820805583295, "learning_rate": 9.411746720275628e-06, "loss": 0.4753, "step": 5915 }, { "epoch": 0.18131666053696213, "grad_norm": 1.3571873468917208, "learning_rate": 9.411513133857816e-06, "loss": 0.6566, "step": 5916 }, { "epoch": 0.1813473090597033, "grad_norm": 1.4071683883359918, "learning_rate": 9.411279503972239e-06, "loss": 0.7551, "step": 5917 }, { "epoch": 0.18137795758244452, "grad_norm": 1.3224985590398783, "learning_rate": 9.411045830621194e-06, "loss": 0.7388, "step": 5918 }, { "epoch": 0.18140860610518572, "grad_norm": 1.273884673732513, "learning_rate": 9.410812113806987e-06, "loss": 0.7405, "step": 5919 }, { "epoch": 0.18143925462792693, "grad_norm": 1.351620330526178, "learning_rate": 9.41057835353192e-06, "loss": 0.6546, "step": 5920 }, { "epoch": 0.18146990315066813, "grad_norm": 1.457372132476881, "learning_rate": 9.410344549798296e-06, "loss": 0.7583, "step": 5921 }, { "epoch": 0.18150055167340934, "grad_norm": 1.259860158872118, "learning_rate": 9.410110702608418e-06, "loss": 0.772, "step": 5922 }, { "epoch": 0.18153120019615054, "grad_norm": 1.395898916348052, "learning_rate": 9.409876811964591e-06, "loss": 0.7344, "step": 5923 }, { "epoch": 0.18156184871889175, "grad_norm": 1.4211625037704343, "learning_rate": 9.409642877869118e-06, "loss": 0.7229, "step": 5924 }, { "epoch": 0.18159249724163296, "grad_norm": 1.5066082262149878, "learning_rate": 9.409408900324308e-06, "loss": 0.6821, "step": 5925 }, { "epoch": 0.18162314576437416, "grad_norm": 1.542177460109409, "learning_rate": 9.409174879332463e-06, "loss": 0.7361, "step": 5926 }, { "epoch": 0.18165379428711537, "grad_norm": 1.615837898034409, "learning_rate": 9.408940814895889e-06, "loss": 0.6996, "step": 5927 }, { "epoch": 0.18168444280985657, "grad_norm": 1.1855581285953853, "learning_rate": 9.408706707016895e-06, "loss": 0.7259, "step": 5928 }, { "epoch": 0.18171509133259778, "grad_norm": 1.2124124539484444, "learning_rate": 9.408472555697783e-06, "loss": 0.7317, "step": 5929 }, { "epoch": 0.18174573985533898, "grad_norm": 1.165689424182793, "learning_rate": 9.408238360940864e-06, "loss": 0.7516, "step": 5930 }, { "epoch": 0.1817763883780802, "grad_norm": 1.1899722026028645, "learning_rate": 9.408004122748447e-06, "loss": 0.7182, "step": 5931 }, { "epoch": 0.18180703690082137, "grad_norm": 0.5251069018566287, "learning_rate": 9.407769841122834e-06, "loss": 0.4582, "step": 5932 }, { "epoch": 0.18183768542356257, "grad_norm": 0.5500362516638735, "learning_rate": 9.40753551606634e-06, "loss": 0.4687, "step": 5933 }, { "epoch": 0.18186833394630378, "grad_norm": 0.48409184675112904, "learning_rate": 9.40730114758127e-06, "loss": 0.4709, "step": 5934 }, { "epoch": 0.18189898246904498, "grad_norm": 1.3751759150142946, "learning_rate": 9.407066735669931e-06, "loss": 0.7476, "step": 5935 }, { "epoch": 0.1819296309917862, "grad_norm": 1.3580766956898023, "learning_rate": 9.40683228033464e-06, "loss": 0.7632, "step": 5936 }, { "epoch": 0.1819602795145274, "grad_norm": 1.408860990383604, "learning_rate": 9.4065977815777e-06, "loss": 0.6859, "step": 5937 }, { "epoch": 0.1819909280372686, "grad_norm": 1.4993303160920206, "learning_rate": 9.406363239401427e-06, "loss": 0.7566, "step": 5938 }, { "epoch": 0.1820215765600098, "grad_norm": 1.499768548681632, "learning_rate": 9.406128653808128e-06, "loss": 0.725, "step": 5939 }, { "epoch": 0.182052225082751, "grad_norm": 1.4111977809481433, "learning_rate": 9.405894024800118e-06, "loss": 0.7212, "step": 5940 }, { "epoch": 0.18208287360549222, "grad_norm": 1.2416552033164376, "learning_rate": 9.405659352379704e-06, "loss": 0.6452, "step": 5941 }, { "epoch": 0.18211352212823342, "grad_norm": 1.3772797208594172, "learning_rate": 9.405424636549202e-06, "loss": 0.614, "step": 5942 }, { "epoch": 0.18214417065097463, "grad_norm": 0.7220767960621745, "learning_rate": 9.405189877310925e-06, "loss": 0.475, "step": 5943 }, { "epoch": 0.18217481917371584, "grad_norm": 1.448846020546112, "learning_rate": 9.404955074667185e-06, "loss": 0.8006, "step": 5944 }, { "epoch": 0.18220546769645704, "grad_norm": 0.6189519406125212, "learning_rate": 9.404720228620294e-06, "loss": 0.4501, "step": 5945 }, { "epoch": 0.18223611621919825, "grad_norm": 0.4911904758527905, "learning_rate": 9.404485339172568e-06, "loss": 0.4642, "step": 5946 }, { "epoch": 0.18226676474193945, "grad_norm": 1.4705853041039987, "learning_rate": 9.404250406326323e-06, "loss": 0.6329, "step": 5947 }, { "epoch": 0.18229741326468063, "grad_norm": 1.4458674104272669, "learning_rate": 9.40401543008387e-06, "loss": 0.782, "step": 5948 }, { "epoch": 0.18232806178742184, "grad_norm": 1.4656142427712613, "learning_rate": 9.403780410447528e-06, "loss": 0.6893, "step": 5949 }, { "epoch": 0.18235871031016304, "grad_norm": 1.3583417504542517, "learning_rate": 9.40354534741961e-06, "loss": 0.7421, "step": 5950 }, { "epoch": 0.18238935883290425, "grad_norm": 1.5473465336116456, "learning_rate": 9.403310241002433e-06, "loss": 0.7372, "step": 5951 }, { "epoch": 0.18242000735564545, "grad_norm": 1.297399455415503, "learning_rate": 9.403075091198311e-06, "loss": 0.6892, "step": 5952 }, { "epoch": 0.18245065587838666, "grad_norm": 1.4126230528350965, "learning_rate": 9.402839898009566e-06, "loss": 0.7058, "step": 5953 }, { "epoch": 0.18248130440112786, "grad_norm": 1.2418208776235777, "learning_rate": 9.402604661438513e-06, "loss": 0.6588, "step": 5954 }, { "epoch": 0.18251195292386907, "grad_norm": 1.300404267509589, "learning_rate": 9.40236938148747e-06, "loss": 0.7012, "step": 5955 }, { "epoch": 0.18254260144661028, "grad_norm": 1.4332547696355884, "learning_rate": 9.402134058158753e-06, "loss": 0.7319, "step": 5956 }, { "epoch": 0.18257324996935148, "grad_norm": 1.1574644521579933, "learning_rate": 9.401898691454686e-06, "loss": 0.6992, "step": 5957 }, { "epoch": 0.1826038984920927, "grad_norm": 1.349114001402754, "learning_rate": 9.401663281377583e-06, "loss": 0.6822, "step": 5958 }, { "epoch": 0.1826345470148339, "grad_norm": 1.3489377230932629, "learning_rate": 9.401427827929766e-06, "loss": 0.7068, "step": 5959 }, { "epoch": 0.1826651955375751, "grad_norm": 1.6530764115164818, "learning_rate": 9.401192331113553e-06, "loss": 0.8465, "step": 5960 }, { "epoch": 0.1826958440603163, "grad_norm": 1.4618750038273087, "learning_rate": 9.400956790931268e-06, "loss": 0.7344, "step": 5961 }, { "epoch": 0.1827264925830575, "grad_norm": 1.3935480687330277, "learning_rate": 9.400721207385228e-06, "loss": 0.7517, "step": 5962 }, { "epoch": 0.1827571411057987, "grad_norm": 1.3933899910765262, "learning_rate": 9.400485580477757e-06, "loss": 0.7673, "step": 5963 }, { "epoch": 0.1827877896285399, "grad_norm": 1.34895737570009, "learning_rate": 9.400249910211176e-06, "loss": 0.6928, "step": 5964 }, { "epoch": 0.1828184381512811, "grad_norm": 1.3573333963173828, "learning_rate": 9.400014196587805e-06, "loss": 0.7582, "step": 5965 }, { "epoch": 0.1828490866740223, "grad_norm": 1.3035365520056197, "learning_rate": 9.39977843960997e-06, "loss": 0.7935, "step": 5966 }, { "epoch": 0.1828797351967635, "grad_norm": 1.217679048117287, "learning_rate": 9.399542639279992e-06, "loss": 0.5659, "step": 5967 }, { "epoch": 0.18291038371950472, "grad_norm": 1.2859436898068928, "learning_rate": 9.399306795600193e-06, "loss": 0.7608, "step": 5968 }, { "epoch": 0.18294103224224592, "grad_norm": 1.48260326213861, "learning_rate": 9.399070908572902e-06, "loss": 0.7258, "step": 5969 }, { "epoch": 0.18297168076498713, "grad_norm": 1.4954908006802805, "learning_rate": 9.398834978200438e-06, "loss": 0.7502, "step": 5970 }, { "epoch": 0.18300232928772833, "grad_norm": 1.2897327176724493, "learning_rate": 9.398599004485127e-06, "loss": 0.4904, "step": 5971 }, { "epoch": 0.18303297781046954, "grad_norm": 1.5602546224175984, "learning_rate": 9.398362987429294e-06, "loss": 0.8225, "step": 5972 }, { "epoch": 0.18306362633321074, "grad_norm": 1.2800515302137354, "learning_rate": 9.398126927035267e-06, "loss": 0.6144, "step": 5973 }, { "epoch": 0.18309427485595195, "grad_norm": 1.218939799180822, "learning_rate": 9.397890823305369e-06, "loss": 0.7542, "step": 5974 }, { "epoch": 0.18312492337869316, "grad_norm": 1.1431827934965348, "learning_rate": 9.397654676241927e-06, "loss": 0.6781, "step": 5975 }, { "epoch": 0.18315557190143436, "grad_norm": 1.3067640247069294, "learning_rate": 9.39741848584727e-06, "loss": 0.7464, "step": 5976 }, { "epoch": 0.18318622042417557, "grad_norm": 1.3396281135800256, "learning_rate": 9.397182252123722e-06, "loss": 0.7981, "step": 5977 }, { "epoch": 0.18321686894691677, "grad_norm": 1.2033704802665997, "learning_rate": 9.396945975073613e-06, "loss": 0.6946, "step": 5978 }, { "epoch": 0.18324751746965795, "grad_norm": 1.2239982541560428, "learning_rate": 9.39670965469927e-06, "loss": 0.659, "step": 5979 }, { "epoch": 0.18327816599239916, "grad_norm": 1.263375777828935, "learning_rate": 9.396473291003021e-06, "loss": 0.7407, "step": 5980 }, { "epoch": 0.18330881451514036, "grad_norm": 1.4501200125058193, "learning_rate": 9.396236883987196e-06, "loss": 0.8741, "step": 5981 }, { "epoch": 0.18333946303788157, "grad_norm": 1.321447361549967, "learning_rate": 9.396000433654124e-06, "loss": 0.6897, "step": 5982 }, { "epoch": 0.18337011156062277, "grad_norm": 1.1779590912318538, "learning_rate": 9.395763940006136e-06, "loss": 0.6928, "step": 5983 }, { "epoch": 0.18340076008336398, "grad_norm": 0.896077218090852, "learning_rate": 9.395527403045562e-06, "loss": 0.4811, "step": 5984 }, { "epoch": 0.18343140860610518, "grad_norm": 1.496715275905292, "learning_rate": 9.395290822774729e-06, "loss": 0.815, "step": 5985 }, { "epoch": 0.1834620571288464, "grad_norm": 1.3660030643754992, "learning_rate": 9.395054199195974e-06, "loss": 0.7533, "step": 5986 }, { "epoch": 0.1834927056515876, "grad_norm": 0.681656157693378, "learning_rate": 9.394817532311625e-06, "loss": 0.4628, "step": 5987 }, { "epoch": 0.1835233541743288, "grad_norm": 1.3402652818394207, "learning_rate": 9.394580822124012e-06, "loss": 0.7912, "step": 5988 }, { "epoch": 0.18355400269707, "grad_norm": 0.48950148719565184, "learning_rate": 9.39434406863547e-06, "loss": 0.4683, "step": 5989 }, { "epoch": 0.1835846512198112, "grad_norm": 1.3317238679459331, "learning_rate": 9.394107271848334e-06, "loss": 0.6537, "step": 5990 }, { "epoch": 0.18361529974255242, "grad_norm": 1.3749549307313973, "learning_rate": 9.393870431764933e-06, "loss": 0.6958, "step": 5991 }, { "epoch": 0.18364594826529362, "grad_norm": 0.600200529343332, "learning_rate": 9.393633548387603e-06, "loss": 0.4588, "step": 5992 }, { "epoch": 0.18367659678803483, "grad_norm": 0.5875180636228206, "learning_rate": 9.393396621718678e-06, "loss": 0.4453, "step": 5993 }, { "epoch": 0.183707245310776, "grad_norm": 1.4294949230484175, "learning_rate": 9.39315965176049e-06, "loss": 0.845, "step": 5994 }, { "epoch": 0.1837378938335172, "grad_norm": 1.3398810951373967, "learning_rate": 9.392922638515379e-06, "loss": 0.7448, "step": 5995 }, { "epoch": 0.18376854235625842, "grad_norm": 1.2944671701681365, "learning_rate": 9.392685581985674e-06, "loss": 0.694, "step": 5996 }, { "epoch": 0.18379919087899962, "grad_norm": 1.4679716859315133, "learning_rate": 9.392448482173717e-06, "loss": 0.7629, "step": 5997 }, { "epoch": 0.18382983940174083, "grad_norm": 1.3512181570268444, "learning_rate": 9.392211339081839e-06, "loss": 0.6122, "step": 5998 }, { "epoch": 0.18386048792448204, "grad_norm": 1.4326457400511252, "learning_rate": 9.39197415271238e-06, "loss": 0.715, "step": 5999 }, { "epoch": 0.18389113644722324, "grad_norm": 1.4349663195296698, "learning_rate": 9.391736923067675e-06, "loss": 0.6526, "step": 6000 }, { "epoch": 0.18392178496996445, "grad_norm": 1.3739156053040293, "learning_rate": 9.391499650150065e-06, "loss": 0.7216, "step": 6001 }, { "epoch": 0.18395243349270565, "grad_norm": 0.7180522335698883, "learning_rate": 9.391262333961883e-06, "loss": 0.4831, "step": 6002 }, { "epoch": 0.18398308201544686, "grad_norm": 1.5185508913905466, "learning_rate": 9.39102497450547e-06, "loss": 0.5973, "step": 6003 }, { "epoch": 0.18401373053818806, "grad_norm": 0.5607159778200057, "learning_rate": 9.390787571783165e-06, "loss": 0.4633, "step": 6004 }, { "epoch": 0.18404437906092927, "grad_norm": 1.3171849643307714, "learning_rate": 9.390550125797306e-06, "loss": 0.6776, "step": 6005 }, { "epoch": 0.18407502758367048, "grad_norm": 1.4420054155113187, "learning_rate": 9.390312636550232e-06, "loss": 0.7499, "step": 6006 }, { "epoch": 0.18410567610641168, "grad_norm": 0.5006032043978556, "learning_rate": 9.390075104044286e-06, "loss": 0.453, "step": 6007 }, { "epoch": 0.1841363246291529, "grad_norm": 1.3530616017968413, "learning_rate": 9.389837528281807e-06, "loss": 0.6898, "step": 6008 }, { "epoch": 0.1841669731518941, "grad_norm": 1.3791666137943308, "learning_rate": 9.389599909265135e-06, "loss": 0.692, "step": 6009 }, { "epoch": 0.18419762167463527, "grad_norm": 1.5291376821068772, "learning_rate": 9.389362246996611e-06, "loss": 0.7416, "step": 6010 }, { "epoch": 0.18422827019737648, "grad_norm": 1.3808341918124531, "learning_rate": 9.38912454147858e-06, "loss": 0.7304, "step": 6011 }, { "epoch": 0.18425891872011768, "grad_norm": 1.2636607983337655, "learning_rate": 9.38888679271338e-06, "loss": 0.7588, "step": 6012 }, { "epoch": 0.1842895672428589, "grad_norm": 1.2808430852983863, "learning_rate": 9.388649000703357e-06, "loss": 0.6913, "step": 6013 }, { "epoch": 0.1843202157656001, "grad_norm": 1.4237798898197966, "learning_rate": 9.38841116545085e-06, "loss": 0.7005, "step": 6014 }, { "epoch": 0.1843508642883413, "grad_norm": 1.2914913049139827, "learning_rate": 9.388173286958207e-06, "loss": 0.7054, "step": 6015 }, { "epoch": 0.1843815128110825, "grad_norm": 1.3813187896091663, "learning_rate": 9.387935365227769e-06, "loss": 0.7397, "step": 6016 }, { "epoch": 0.1844121613338237, "grad_norm": 1.1482917566925785, "learning_rate": 9.387697400261882e-06, "loss": 0.6567, "step": 6017 }, { "epoch": 0.18444280985656492, "grad_norm": 1.3699415527226027, "learning_rate": 9.38745939206289e-06, "loss": 0.7238, "step": 6018 }, { "epoch": 0.18447345837930612, "grad_norm": 1.2640913890778287, "learning_rate": 9.387221340633137e-06, "loss": 0.7321, "step": 6019 }, { "epoch": 0.18450410690204733, "grad_norm": 1.4883390525773919, "learning_rate": 9.386983245974972e-06, "loss": 0.6811, "step": 6020 }, { "epoch": 0.18453475542478853, "grad_norm": 1.1258787369992287, "learning_rate": 9.386745108090736e-06, "loss": 0.7557, "step": 6021 }, { "epoch": 0.18456540394752974, "grad_norm": 0.7469450779013558, "learning_rate": 9.38650692698278e-06, "loss": 0.4497, "step": 6022 }, { "epoch": 0.18459605247027094, "grad_norm": 1.2224548192104685, "learning_rate": 9.386268702653447e-06, "loss": 0.6557, "step": 6023 }, { "epoch": 0.18462670099301215, "grad_norm": 1.3452399655206926, "learning_rate": 9.386030435105085e-06, "loss": 0.7116, "step": 6024 }, { "epoch": 0.18465734951575333, "grad_norm": 1.193159661184628, "learning_rate": 9.385792124340045e-06, "loss": 0.6783, "step": 6025 }, { "epoch": 0.18468799803849453, "grad_norm": 1.5575988342442302, "learning_rate": 9.385553770360674e-06, "loss": 0.6915, "step": 6026 }, { "epoch": 0.18471864656123574, "grad_norm": 1.4264409625866021, "learning_rate": 9.385315373169319e-06, "loss": 0.7336, "step": 6027 }, { "epoch": 0.18474929508397694, "grad_norm": 1.330169366685998, "learning_rate": 9.385076932768328e-06, "loss": 0.7664, "step": 6028 }, { "epoch": 0.18477994360671815, "grad_norm": 0.5107888069365006, "learning_rate": 9.384838449160055e-06, "loss": 0.4528, "step": 6029 }, { "epoch": 0.18481059212945936, "grad_norm": 1.275927669527744, "learning_rate": 9.384599922346843e-06, "loss": 0.7382, "step": 6030 }, { "epoch": 0.18484124065220056, "grad_norm": 1.389570745095117, "learning_rate": 9.384361352331048e-06, "loss": 0.7015, "step": 6031 }, { "epoch": 0.18487188917494177, "grad_norm": 1.310472961434943, "learning_rate": 9.38412273911502e-06, "loss": 0.6908, "step": 6032 }, { "epoch": 0.18490253769768297, "grad_norm": 1.299978827457656, "learning_rate": 9.383884082701107e-06, "loss": 0.7665, "step": 6033 }, { "epoch": 0.18493318622042418, "grad_norm": 0.5406085414448883, "learning_rate": 9.383645383091663e-06, "loss": 0.4684, "step": 6034 }, { "epoch": 0.18496383474316538, "grad_norm": 0.47470054352836427, "learning_rate": 9.383406640289041e-06, "loss": 0.4606, "step": 6035 }, { "epoch": 0.1849944832659066, "grad_norm": 1.5137791286170472, "learning_rate": 9.383167854295589e-06, "loss": 0.8357, "step": 6036 }, { "epoch": 0.1850251317886478, "grad_norm": 1.1861090350216918, "learning_rate": 9.382929025113665e-06, "loss": 0.7457, "step": 6037 }, { "epoch": 0.185055780311389, "grad_norm": 1.1939006073865468, "learning_rate": 9.38269015274562e-06, "loss": 0.6496, "step": 6038 }, { "epoch": 0.1850864288341302, "grad_norm": 1.3540855847754645, "learning_rate": 9.382451237193806e-06, "loss": 0.642, "step": 6039 }, { "epoch": 0.1851170773568714, "grad_norm": 1.3920370991233304, "learning_rate": 9.382212278460578e-06, "loss": 0.707, "step": 6040 }, { "epoch": 0.1851477258796126, "grad_norm": 0.5668363942029497, "learning_rate": 9.381973276548292e-06, "loss": 0.4635, "step": 6041 }, { "epoch": 0.1851783744023538, "grad_norm": 1.3372651614860573, "learning_rate": 9.381734231459303e-06, "loss": 0.6924, "step": 6042 }, { "epoch": 0.185209022925095, "grad_norm": 1.490529339199712, "learning_rate": 9.381495143195966e-06, "loss": 0.7234, "step": 6043 }, { "epoch": 0.1852396714478362, "grad_norm": 0.5108228638484288, "learning_rate": 9.381256011760635e-06, "loss": 0.4622, "step": 6044 }, { "epoch": 0.1852703199705774, "grad_norm": 1.3133100993099296, "learning_rate": 9.381016837155668e-06, "loss": 0.7196, "step": 6045 }, { "epoch": 0.18530096849331862, "grad_norm": 1.404087773876428, "learning_rate": 9.38077761938342e-06, "loss": 0.7516, "step": 6046 }, { "epoch": 0.18533161701605982, "grad_norm": 1.2926153371337392, "learning_rate": 9.380538358446252e-06, "loss": 0.7243, "step": 6047 }, { "epoch": 0.18536226553880103, "grad_norm": 1.1789235559658366, "learning_rate": 9.380299054346516e-06, "loss": 0.6864, "step": 6048 }, { "epoch": 0.18539291406154224, "grad_norm": 1.3827539915482885, "learning_rate": 9.380059707086573e-06, "loss": 0.7592, "step": 6049 }, { "epoch": 0.18542356258428344, "grad_norm": 1.2676393997172644, "learning_rate": 9.379820316668782e-06, "loss": 0.7578, "step": 6050 }, { "epoch": 0.18545421110702465, "grad_norm": 1.2957114979667526, "learning_rate": 9.379580883095501e-06, "loss": 0.6506, "step": 6051 }, { "epoch": 0.18548485962976585, "grad_norm": 1.3625947499353295, "learning_rate": 9.379341406369088e-06, "loss": 0.6762, "step": 6052 }, { "epoch": 0.18551550815250706, "grad_norm": 1.410075682134176, "learning_rate": 9.379101886491907e-06, "loss": 0.671, "step": 6053 }, { "epoch": 0.18554615667524826, "grad_norm": 1.3133150750746176, "learning_rate": 9.37886232346631e-06, "loss": 0.6279, "step": 6054 }, { "epoch": 0.18557680519798947, "grad_norm": 1.469221341874988, "learning_rate": 9.378622717294665e-06, "loss": 0.7657, "step": 6055 }, { "epoch": 0.18560745372073065, "grad_norm": 1.5772063907045544, "learning_rate": 9.378383067979329e-06, "loss": 0.7478, "step": 6056 }, { "epoch": 0.18563810224347185, "grad_norm": 1.3333268154721876, "learning_rate": 9.378143375522664e-06, "loss": 0.7005, "step": 6057 }, { "epoch": 0.18566875076621306, "grad_norm": 1.2215422715488877, "learning_rate": 9.377903639927032e-06, "loss": 0.7057, "step": 6058 }, { "epoch": 0.18569939928895426, "grad_norm": 1.3316358685588046, "learning_rate": 9.377663861194795e-06, "loss": 0.8196, "step": 6059 }, { "epoch": 0.18573004781169547, "grad_norm": 1.3244452105362172, "learning_rate": 9.377424039328317e-06, "loss": 0.6709, "step": 6060 }, { "epoch": 0.18576069633443668, "grad_norm": 1.1390795845115185, "learning_rate": 9.37718417432996e-06, "loss": 0.7312, "step": 6061 }, { "epoch": 0.18579134485717788, "grad_norm": 1.223334647057758, "learning_rate": 9.376944266202088e-06, "loss": 0.7148, "step": 6062 }, { "epoch": 0.1858219933799191, "grad_norm": 1.0863748504672708, "learning_rate": 9.376704314947062e-06, "loss": 0.6864, "step": 6063 }, { "epoch": 0.1858526419026603, "grad_norm": 1.5277938183614226, "learning_rate": 9.376464320567251e-06, "loss": 0.7667, "step": 6064 }, { "epoch": 0.1858832904254015, "grad_norm": 1.4028183486563912, "learning_rate": 9.376224283065017e-06, "loss": 0.6757, "step": 6065 }, { "epoch": 0.1859139389481427, "grad_norm": 1.2723828818455203, "learning_rate": 9.375984202442724e-06, "loss": 0.7648, "step": 6066 }, { "epoch": 0.1859445874708839, "grad_norm": 1.3671474149239893, "learning_rate": 9.37574407870274e-06, "loss": 0.7193, "step": 6067 }, { "epoch": 0.18597523599362512, "grad_norm": 0.7817140665118542, "learning_rate": 9.375503911847427e-06, "loss": 0.4671, "step": 6068 }, { "epoch": 0.18600588451636632, "grad_norm": 1.2159954570076892, "learning_rate": 9.375263701879158e-06, "loss": 0.7002, "step": 6069 }, { "epoch": 0.18603653303910753, "grad_norm": 1.3729911184909191, "learning_rate": 9.375023448800296e-06, "loss": 0.7417, "step": 6070 }, { "epoch": 0.18606718156184873, "grad_norm": 1.192211841284406, "learning_rate": 9.374783152613206e-06, "loss": 0.7579, "step": 6071 }, { "epoch": 0.1860978300845899, "grad_norm": 1.1542346816351738, "learning_rate": 9.374542813320261e-06, "loss": 0.6438, "step": 6072 }, { "epoch": 0.18612847860733112, "grad_norm": 1.222156115482117, "learning_rate": 9.374302430923827e-06, "loss": 0.7344, "step": 6073 }, { "epoch": 0.18615912713007232, "grad_norm": 1.3166710310038579, "learning_rate": 9.37406200542627e-06, "loss": 0.7774, "step": 6074 }, { "epoch": 0.18618977565281353, "grad_norm": 1.2536628288041165, "learning_rate": 9.373821536829962e-06, "loss": 0.7764, "step": 6075 }, { "epoch": 0.18622042417555473, "grad_norm": 1.2671656314873891, "learning_rate": 9.37358102513727e-06, "loss": 0.7593, "step": 6076 }, { "epoch": 0.18625107269829594, "grad_norm": 1.3672692809883478, "learning_rate": 9.373340470350567e-06, "loss": 0.7624, "step": 6077 }, { "epoch": 0.18628172122103714, "grad_norm": 1.3899040571426795, "learning_rate": 9.373099872472219e-06, "loss": 0.8077, "step": 6078 }, { "epoch": 0.18631236974377835, "grad_norm": 1.255353393950124, "learning_rate": 9.3728592315046e-06, "loss": 0.71, "step": 6079 }, { "epoch": 0.18634301826651956, "grad_norm": 0.6226193746801035, "learning_rate": 9.37261854745008e-06, "loss": 0.4408, "step": 6080 }, { "epoch": 0.18637366678926076, "grad_norm": 1.3526532694681288, "learning_rate": 9.372377820311032e-06, "loss": 0.7888, "step": 6081 }, { "epoch": 0.18640431531200197, "grad_norm": 1.4501413736613902, "learning_rate": 9.372137050089826e-06, "loss": 0.7867, "step": 6082 }, { "epoch": 0.18643496383474317, "grad_norm": 1.3673812898240325, "learning_rate": 9.371896236788834e-06, "loss": 0.764, "step": 6083 }, { "epoch": 0.18646561235748438, "grad_norm": 1.363876992390315, "learning_rate": 9.37165538041043e-06, "loss": 0.6664, "step": 6084 }, { "epoch": 0.18649626088022558, "grad_norm": 1.2263283161404797, "learning_rate": 9.371414480956988e-06, "loss": 0.7285, "step": 6085 }, { "epoch": 0.1865269094029668, "grad_norm": 1.1994275270552737, "learning_rate": 9.37117353843088e-06, "loss": 0.7146, "step": 6086 }, { "epoch": 0.18655755792570797, "grad_norm": 1.39314675748386, "learning_rate": 9.37093255283448e-06, "loss": 0.6406, "step": 6087 }, { "epoch": 0.18658820644844917, "grad_norm": 1.4012202571277568, "learning_rate": 9.370691524170166e-06, "loss": 0.7857, "step": 6088 }, { "epoch": 0.18661885497119038, "grad_norm": 1.4237201386573604, "learning_rate": 9.370450452440307e-06, "loss": 0.674, "step": 6089 }, { "epoch": 0.18664950349393158, "grad_norm": 1.1848594429940726, "learning_rate": 9.370209337647282e-06, "loss": 0.6485, "step": 6090 }, { "epoch": 0.1866801520166728, "grad_norm": 1.372969863165298, "learning_rate": 9.369968179793467e-06, "loss": 0.7625, "step": 6091 }, { "epoch": 0.186710800539414, "grad_norm": 1.1857570618429478, "learning_rate": 9.369726978881237e-06, "loss": 0.6771, "step": 6092 }, { "epoch": 0.1867414490621552, "grad_norm": 1.4257353452976032, "learning_rate": 9.369485734912971e-06, "loss": 0.7846, "step": 6093 }, { "epoch": 0.1867720975848964, "grad_norm": 1.2597085208056245, "learning_rate": 9.369244447891041e-06, "loss": 0.6499, "step": 6094 }, { "epoch": 0.1868027461076376, "grad_norm": 1.461829100876119, "learning_rate": 9.36900311781783e-06, "loss": 0.7721, "step": 6095 }, { "epoch": 0.18683339463037882, "grad_norm": 1.3128450837691423, "learning_rate": 9.368761744695711e-06, "loss": 0.6302, "step": 6096 }, { "epoch": 0.18686404315312002, "grad_norm": 1.3592187914745018, "learning_rate": 9.368520328527066e-06, "loss": 0.6766, "step": 6097 }, { "epoch": 0.18689469167586123, "grad_norm": 1.2581323176769477, "learning_rate": 9.368278869314274e-06, "loss": 0.7405, "step": 6098 }, { "epoch": 0.18692534019860244, "grad_norm": 1.4941808291321332, "learning_rate": 9.36803736705971e-06, "loss": 0.7902, "step": 6099 }, { "epoch": 0.18695598872134364, "grad_norm": 1.264611313461754, "learning_rate": 9.367795821765758e-06, "loss": 0.7514, "step": 6100 }, { "epoch": 0.18698663724408485, "grad_norm": 1.357853020847735, "learning_rate": 9.367554233434795e-06, "loss": 0.7299, "step": 6101 }, { "epoch": 0.18701728576682605, "grad_norm": 1.2977976255188912, "learning_rate": 9.367312602069203e-06, "loss": 0.7847, "step": 6102 }, { "epoch": 0.18704793428956723, "grad_norm": 1.3459548164836606, "learning_rate": 9.367070927671361e-06, "loss": 0.7137, "step": 6103 }, { "epoch": 0.18707858281230844, "grad_norm": 1.3286227215519202, "learning_rate": 9.366829210243655e-06, "loss": 0.768, "step": 6104 }, { "epoch": 0.18710923133504964, "grad_norm": 0.5570679775451175, "learning_rate": 9.366587449788463e-06, "loss": 0.4765, "step": 6105 }, { "epoch": 0.18713987985779085, "grad_norm": 1.2683415840412557, "learning_rate": 9.366345646308165e-06, "loss": 0.756, "step": 6106 }, { "epoch": 0.18717052838053205, "grad_norm": 1.3153022415493683, "learning_rate": 9.366103799805148e-06, "loss": 0.7771, "step": 6107 }, { "epoch": 0.18720117690327326, "grad_norm": 1.2601912959846084, "learning_rate": 9.365861910281795e-06, "loss": 0.657, "step": 6108 }, { "epoch": 0.18723182542601446, "grad_norm": 1.279629611359922, "learning_rate": 9.365619977740484e-06, "loss": 0.712, "step": 6109 }, { "epoch": 0.18726247394875567, "grad_norm": 1.4045458194351366, "learning_rate": 9.365378002183605e-06, "loss": 0.7143, "step": 6110 }, { "epoch": 0.18729312247149688, "grad_norm": 1.2613222442470606, "learning_rate": 9.365135983613537e-06, "loss": 0.7573, "step": 6111 }, { "epoch": 0.18732377099423808, "grad_norm": 0.46562895448903086, "learning_rate": 9.36489392203267e-06, "loss": 0.4767, "step": 6112 }, { "epoch": 0.1873544195169793, "grad_norm": 1.3767191205940696, "learning_rate": 9.364651817443384e-06, "loss": 0.7968, "step": 6113 }, { "epoch": 0.1873850680397205, "grad_norm": 1.291729942824436, "learning_rate": 9.364409669848069e-06, "loss": 0.6721, "step": 6114 }, { "epoch": 0.1874157165624617, "grad_norm": 1.3618597011307663, "learning_rate": 9.364167479249108e-06, "loss": 0.704, "step": 6115 }, { "epoch": 0.1874463650852029, "grad_norm": 0.4930357956769989, "learning_rate": 9.363925245648888e-06, "loss": 0.4583, "step": 6116 }, { "epoch": 0.1874770136079441, "grad_norm": 0.47234964562808346, "learning_rate": 9.363682969049797e-06, "loss": 0.4742, "step": 6117 }, { "epoch": 0.1875076621306853, "grad_norm": 1.3912867028571125, "learning_rate": 9.363440649454218e-06, "loss": 0.7162, "step": 6118 }, { "epoch": 0.1875383106534265, "grad_norm": 1.229518430484891, "learning_rate": 9.363198286864545e-06, "loss": 0.7154, "step": 6119 }, { "epoch": 0.1875689591761677, "grad_norm": 0.4573547047141522, "learning_rate": 9.362955881283162e-06, "loss": 0.4439, "step": 6120 }, { "epoch": 0.1875996076989089, "grad_norm": 0.4611229603367367, "learning_rate": 9.36271343271246e-06, "loss": 0.468, "step": 6121 }, { "epoch": 0.1876302562216501, "grad_norm": 1.197638000521199, "learning_rate": 9.362470941154825e-06, "loss": 0.7772, "step": 6122 }, { "epoch": 0.18766090474439132, "grad_norm": 1.236951731155229, "learning_rate": 9.362228406612645e-06, "loss": 0.8111, "step": 6123 }, { "epoch": 0.18769155326713252, "grad_norm": 1.506562975767979, "learning_rate": 9.361985829088316e-06, "loss": 0.8034, "step": 6124 }, { "epoch": 0.18772220178987373, "grad_norm": 1.2233137827196716, "learning_rate": 9.361743208584223e-06, "loss": 0.6581, "step": 6125 }, { "epoch": 0.18775285031261493, "grad_norm": 1.3329618449015062, "learning_rate": 9.36150054510276e-06, "loss": 0.6979, "step": 6126 }, { "epoch": 0.18778349883535614, "grad_norm": 1.2188807663675978, "learning_rate": 9.361257838646313e-06, "loss": 0.676, "step": 6127 }, { "epoch": 0.18781414735809734, "grad_norm": 1.3851195559830676, "learning_rate": 9.361015089217277e-06, "loss": 0.7859, "step": 6128 }, { "epoch": 0.18784479588083855, "grad_norm": 1.2318480466604653, "learning_rate": 9.360772296818046e-06, "loss": 0.686, "step": 6129 }, { "epoch": 0.18787544440357976, "grad_norm": 1.230477463978384, "learning_rate": 9.360529461451009e-06, "loss": 0.7115, "step": 6130 }, { "epoch": 0.18790609292632096, "grad_norm": 1.2966978385269075, "learning_rate": 9.36028658311856e-06, "loss": 0.6798, "step": 6131 }, { "epoch": 0.18793674144906217, "grad_norm": 1.4533642353174283, "learning_rate": 9.360043661823089e-06, "loss": 0.7496, "step": 6132 }, { "epoch": 0.18796738997180337, "grad_norm": 1.4035158219183144, "learning_rate": 9.359800697566994e-06, "loss": 0.7522, "step": 6133 }, { "epoch": 0.18799803849454455, "grad_norm": 1.397361148861785, "learning_rate": 9.359557690352667e-06, "loss": 0.7337, "step": 6134 }, { "epoch": 0.18802868701728576, "grad_norm": 1.4159360043821305, "learning_rate": 9.359314640182504e-06, "loss": 0.693, "step": 6135 }, { "epoch": 0.18805933554002696, "grad_norm": 1.2176878428979456, "learning_rate": 9.359071547058898e-06, "loss": 0.7121, "step": 6136 }, { "epoch": 0.18808998406276817, "grad_norm": 1.524996822787248, "learning_rate": 9.358828410984244e-06, "loss": 0.6504, "step": 6137 }, { "epoch": 0.18812063258550937, "grad_norm": 1.1195015802631256, "learning_rate": 9.358585231960938e-06, "loss": 0.6332, "step": 6138 }, { "epoch": 0.18815128110825058, "grad_norm": 1.3188818005335452, "learning_rate": 9.358342009991377e-06, "loss": 0.7485, "step": 6139 }, { "epoch": 0.18818192963099178, "grad_norm": 1.256780439993484, "learning_rate": 9.358098745077957e-06, "loss": 0.6681, "step": 6140 }, { "epoch": 0.188212578153733, "grad_norm": 1.2610045504712966, "learning_rate": 9.357855437223075e-06, "loss": 0.7165, "step": 6141 }, { "epoch": 0.1882432266764742, "grad_norm": 1.4051186200990728, "learning_rate": 9.357612086429129e-06, "loss": 0.6476, "step": 6142 }, { "epoch": 0.1882738751992154, "grad_norm": 1.3937362426443896, "learning_rate": 9.357368692698515e-06, "loss": 0.7209, "step": 6143 }, { "epoch": 0.1883045237219566, "grad_norm": 1.2553551658631725, "learning_rate": 9.357125256033634e-06, "loss": 0.6639, "step": 6144 }, { "epoch": 0.1883351722446978, "grad_norm": 1.1548654326214611, "learning_rate": 9.356881776436881e-06, "loss": 0.7239, "step": 6145 }, { "epoch": 0.18836582076743902, "grad_norm": 1.465974806418125, "learning_rate": 9.356638253910659e-06, "loss": 0.717, "step": 6146 }, { "epoch": 0.18839646929018022, "grad_norm": 1.3075159829437806, "learning_rate": 9.356394688457364e-06, "loss": 0.7976, "step": 6147 }, { "epoch": 0.18842711781292143, "grad_norm": 1.2149972821165387, "learning_rate": 9.356151080079399e-06, "loss": 0.6699, "step": 6148 }, { "epoch": 0.1884577663356626, "grad_norm": 1.357535493421366, "learning_rate": 9.355907428779163e-06, "loss": 0.787, "step": 6149 }, { "epoch": 0.1884884148584038, "grad_norm": 0.6550123993888889, "learning_rate": 9.355663734559055e-06, "loss": 0.461, "step": 6150 }, { "epoch": 0.18851906338114502, "grad_norm": 1.2512827709273155, "learning_rate": 9.355419997421478e-06, "loss": 0.6379, "step": 6151 }, { "epoch": 0.18854971190388622, "grad_norm": 0.5107421616079247, "learning_rate": 9.355176217368833e-06, "loss": 0.4594, "step": 6152 }, { "epoch": 0.18858036042662743, "grad_norm": 0.4742234901202837, "learning_rate": 9.354932394403524e-06, "loss": 0.4529, "step": 6153 }, { "epoch": 0.18861100894936864, "grad_norm": 1.4386558204198927, "learning_rate": 9.354688528527952e-06, "loss": 0.6693, "step": 6154 }, { "epoch": 0.18864165747210984, "grad_norm": 1.482908334606118, "learning_rate": 9.354444619744519e-06, "loss": 0.7589, "step": 6155 }, { "epoch": 0.18867230599485105, "grad_norm": 1.3930167105744278, "learning_rate": 9.354200668055629e-06, "loss": 0.7673, "step": 6156 }, { "epoch": 0.18870295451759225, "grad_norm": 1.2390313844326792, "learning_rate": 9.353956673463684e-06, "loss": 0.6532, "step": 6157 }, { "epoch": 0.18873360304033346, "grad_norm": 0.679652509599598, "learning_rate": 9.353712635971093e-06, "loss": 0.4483, "step": 6158 }, { "epoch": 0.18876425156307466, "grad_norm": 1.361002378599138, "learning_rate": 9.353468555580256e-06, "loss": 0.6429, "step": 6159 }, { "epoch": 0.18879490008581587, "grad_norm": 1.4402548452315835, "learning_rate": 9.353224432293578e-06, "loss": 0.6565, "step": 6160 }, { "epoch": 0.18882554860855708, "grad_norm": 0.5398961745807294, "learning_rate": 9.352980266113468e-06, "loss": 0.4705, "step": 6161 }, { "epoch": 0.18885619713129828, "grad_norm": 1.3384223001684026, "learning_rate": 9.352736057042329e-06, "loss": 0.7441, "step": 6162 }, { "epoch": 0.1888868456540395, "grad_norm": 1.2800114378454321, "learning_rate": 9.352491805082568e-06, "loss": 0.7162, "step": 6163 }, { "epoch": 0.1889174941767807, "grad_norm": 1.1663562435595156, "learning_rate": 9.352247510236591e-06, "loss": 0.7385, "step": 6164 }, { "epoch": 0.18894814269952187, "grad_norm": 1.292961419147991, "learning_rate": 9.352003172506807e-06, "loss": 0.7323, "step": 6165 }, { "epoch": 0.18897879122226308, "grad_norm": 1.2782474209792678, "learning_rate": 9.351758791895621e-06, "loss": 0.7005, "step": 6166 }, { "epoch": 0.18900943974500428, "grad_norm": 1.4439368504982213, "learning_rate": 9.351514368405442e-06, "loss": 0.7579, "step": 6167 }, { "epoch": 0.1890400882677455, "grad_norm": 1.1911429739482418, "learning_rate": 9.35126990203868e-06, "loss": 0.6988, "step": 6168 }, { "epoch": 0.1890707367904867, "grad_norm": 1.3222588153881303, "learning_rate": 9.35102539279774e-06, "loss": 0.6596, "step": 6169 }, { "epoch": 0.1891013853132279, "grad_norm": 1.1936676424742894, "learning_rate": 9.350780840685036e-06, "loss": 0.7232, "step": 6170 }, { "epoch": 0.1891320338359691, "grad_norm": 1.234295903969572, "learning_rate": 9.350536245702975e-06, "loss": 0.7618, "step": 6171 }, { "epoch": 0.1891626823587103, "grad_norm": 1.3610568216274952, "learning_rate": 9.350291607853965e-06, "loss": 0.8575, "step": 6172 }, { "epoch": 0.18919333088145152, "grad_norm": 1.338856395190489, "learning_rate": 9.350046927140422e-06, "loss": 0.7326, "step": 6173 }, { "epoch": 0.18922397940419272, "grad_norm": 1.2107433855558882, "learning_rate": 9.34980220356475e-06, "loss": 0.7182, "step": 6174 }, { "epoch": 0.18925462792693393, "grad_norm": 1.436767784744853, "learning_rate": 9.349557437129366e-06, "loss": 0.7324, "step": 6175 }, { "epoch": 0.18928527644967513, "grad_norm": 0.8732673685162742, "learning_rate": 9.34931262783668e-06, "loss": 0.4624, "step": 6176 }, { "epoch": 0.18931592497241634, "grad_norm": 1.2387539196923798, "learning_rate": 9.349067775689102e-06, "loss": 0.7563, "step": 6177 }, { "epoch": 0.18934657349515754, "grad_norm": 1.2118630474626224, "learning_rate": 9.348822880689049e-06, "loss": 0.7204, "step": 6178 }, { "epoch": 0.18937722201789875, "grad_norm": 1.2098053113057314, "learning_rate": 9.34857794283893e-06, "loss": 0.6802, "step": 6179 }, { "epoch": 0.18940787054063993, "grad_norm": 1.2924518539014667, "learning_rate": 9.34833296214116e-06, "loss": 0.8051, "step": 6180 }, { "epoch": 0.18943851906338113, "grad_norm": 1.3402359816101883, "learning_rate": 9.348087938598153e-06, "loss": 0.7729, "step": 6181 }, { "epoch": 0.18946916758612234, "grad_norm": 0.5404376798979814, "learning_rate": 9.347842872212323e-06, "loss": 0.4568, "step": 6182 }, { "epoch": 0.18949981610886354, "grad_norm": 1.3168197993358484, "learning_rate": 9.347597762986085e-06, "loss": 0.7103, "step": 6183 }, { "epoch": 0.18953046463160475, "grad_norm": 1.2377312424950766, "learning_rate": 9.347352610921853e-06, "loss": 0.6417, "step": 6184 }, { "epoch": 0.18956111315434596, "grad_norm": 0.5264999030212038, "learning_rate": 9.347107416022043e-06, "loss": 0.4481, "step": 6185 }, { "epoch": 0.18959176167708716, "grad_norm": 1.1890109332916137, "learning_rate": 9.346862178289073e-06, "loss": 0.6967, "step": 6186 }, { "epoch": 0.18962241019982837, "grad_norm": 1.3436433181576986, "learning_rate": 9.346616897725357e-06, "loss": 0.7108, "step": 6187 }, { "epoch": 0.18965305872256957, "grad_norm": 1.4529933881299493, "learning_rate": 9.346371574333312e-06, "loss": 0.8231, "step": 6188 }, { "epoch": 0.18968370724531078, "grad_norm": 1.420105966200821, "learning_rate": 9.346126208115358e-06, "loss": 0.7679, "step": 6189 }, { "epoch": 0.18971435576805198, "grad_norm": 1.5152763347271248, "learning_rate": 9.345880799073908e-06, "loss": 0.707, "step": 6190 }, { "epoch": 0.1897450042907932, "grad_norm": 0.49447628782110975, "learning_rate": 9.345635347211383e-06, "loss": 0.4776, "step": 6191 }, { "epoch": 0.1897756528135344, "grad_norm": 1.162412060933107, "learning_rate": 9.345389852530201e-06, "loss": 0.7539, "step": 6192 }, { "epoch": 0.1898063013362756, "grad_norm": 1.326276071967049, "learning_rate": 9.345144315032783e-06, "loss": 0.8251, "step": 6193 }, { "epoch": 0.1898369498590168, "grad_norm": 1.3852677844136265, "learning_rate": 9.344898734721544e-06, "loss": 0.7268, "step": 6194 }, { "epoch": 0.189867598381758, "grad_norm": 1.313008223861237, "learning_rate": 9.344653111598907e-06, "loss": 0.716, "step": 6195 }, { "epoch": 0.1898982469044992, "grad_norm": 1.4414968714813108, "learning_rate": 9.344407445667292e-06, "loss": 0.8032, "step": 6196 }, { "epoch": 0.1899288954272404, "grad_norm": 0.4952550171099447, "learning_rate": 9.344161736929116e-06, "loss": 0.4613, "step": 6197 }, { "epoch": 0.1899595439499816, "grad_norm": 0.5045860593651744, "learning_rate": 9.343915985386806e-06, "loss": 0.4774, "step": 6198 }, { "epoch": 0.1899901924727228, "grad_norm": 1.310709974951817, "learning_rate": 9.34367019104278e-06, "loss": 0.6991, "step": 6199 }, { "epoch": 0.190020840995464, "grad_norm": 0.4478633540774723, "learning_rate": 9.343424353899459e-06, "loss": 0.4569, "step": 6200 }, { "epoch": 0.19005148951820522, "grad_norm": 1.3991864969743713, "learning_rate": 9.343178473959266e-06, "loss": 0.7018, "step": 6201 }, { "epoch": 0.19008213804094642, "grad_norm": 1.2650049156154837, "learning_rate": 9.342932551224626e-06, "loss": 0.7022, "step": 6202 }, { "epoch": 0.19011278656368763, "grad_norm": 1.4217416404882073, "learning_rate": 9.34268658569796e-06, "loss": 0.7176, "step": 6203 }, { "epoch": 0.19014343508642884, "grad_norm": 0.5165043604644778, "learning_rate": 9.34244057738169e-06, "loss": 0.457, "step": 6204 }, { "epoch": 0.19017408360917004, "grad_norm": 1.2840729445375185, "learning_rate": 9.342194526278243e-06, "loss": 0.7199, "step": 6205 }, { "epoch": 0.19020473213191125, "grad_norm": 1.4056423513318024, "learning_rate": 9.341948432390044e-06, "loss": 0.6907, "step": 6206 }, { "epoch": 0.19023538065465245, "grad_norm": 1.2061894643203122, "learning_rate": 9.341702295719515e-06, "loss": 0.7208, "step": 6207 }, { "epoch": 0.19026602917739366, "grad_norm": 1.2866247084803775, "learning_rate": 9.341456116269084e-06, "loss": 0.7463, "step": 6208 }, { "epoch": 0.19029667770013486, "grad_norm": 1.3553789526322844, "learning_rate": 9.341209894041173e-06, "loss": 0.7335, "step": 6209 }, { "epoch": 0.19032732622287607, "grad_norm": 1.3006565307531164, "learning_rate": 9.340963629038208e-06, "loss": 0.6765, "step": 6210 }, { "epoch": 0.19035797474561725, "grad_norm": 1.374273806832208, "learning_rate": 9.340717321262622e-06, "loss": 0.798, "step": 6211 }, { "epoch": 0.19038862326835845, "grad_norm": 1.4212689944558634, "learning_rate": 9.340470970716836e-06, "loss": 0.6371, "step": 6212 }, { "epoch": 0.19041927179109966, "grad_norm": 1.3943119539722457, "learning_rate": 9.340224577403278e-06, "loss": 0.7172, "step": 6213 }, { "epoch": 0.19044992031384086, "grad_norm": 1.3032948316128872, "learning_rate": 9.339978141324378e-06, "loss": 0.6841, "step": 6214 }, { "epoch": 0.19048056883658207, "grad_norm": 1.2428845578300893, "learning_rate": 9.339731662482564e-06, "loss": 0.6665, "step": 6215 }, { "epoch": 0.19051121735932328, "grad_norm": 1.720104731804578, "learning_rate": 9.339485140880261e-06, "loss": 0.8267, "step": 6216 }, { "epoch": 0.19054186588206448, "grad_norm": 1.2623805409059532, "learning_rate": 9.339238576519902e-06, "loss": 0.6687, "step": 6217 }, { "epoch": 0.1905725144048057, "grad_norm": 1.2795210020763597, "learning_rate": 9.338991969403914e-06, "loss": 0.7852, "step": 6218 }, { "epoch": 0.1906031629275469, "grad_norm": 1.1883849349605355, "learning_rate": 9.33874531953473e-06, "loss": 0.643, "step": 6219 }, { "epoch": 0.1906338114502881, "grad_norm": 1.3035547283509092, "learning_rate": 9.338498626914776e-06, "loss": 0.7642, "step": 6220 }, { "epoch": 0.1906644599730293, "grad_norm": 1.4642895252032746, "learning_rate": 9.338251891546486e-06, "loss": 0.7733, "step": 6221 }, { "epoch": 0.1906951084957705, "grad_norm": 1.1596409329678148, "learning_rate": 9.33800511343229e-06, "loss": 0.6858, "step": 6222 }, { "epoch": 0.19072575701851172, "grad_norm": 1.297161781493326, "learning_rate": 9.337758292574622e-06, "loss": 0.7394, "step": 6223 }, { "epoch": 0.19075640554125292, "grad_norm": 1.2887704647629623, "learning_rate": 9.337511428975908e-06, "loss": 0.7689, "step": 6224 }, { "epoch": 0.19078705406399413, "grad_norm": 1.339892512374856, "learning_rate": 9.337264522638584e-06, "loss": 0.8378, "step": 6225 }, { "epoch": 0.19081770258673533, "grad_norm": 1.2823532138765592, "learning_rate": 9.337017573565086e-06, "loss": 0.7439, "step": 6226 }, { "epoch": 0.1908483511094765, "grad_norm": 1.3795086195841477, "learning_rate": 9.336770581757844e-06, "loss": 0.7425, "step": 6227 }, { "epoch": 0.19087899963221772, "grad_norm": 1.5297230739906893, "learning_rate": 9.336523547219289e-06, "loss": 0.7526, "step": 6228 }, { "epoch": 0.19090964815495892, "grad_norm": 1.327484165478336, "learning_rate": 9.33627646995186e-06, "loss": 0.7817, "step": 6229 }, { "epoch": 0.19094029667770013, "grad_norm": 0.5878216088815643, "learning_rate": 9.336029349957989e-06, "loss": 0.474, "step": 6230 }, { "epoch": 0.19097094520044133, "grad_norm": 1.3032753785379172, "learning_rate": 9.335782187240111e-06, "loss": 0.6482, "step": 6231 }, { "epoch": 0.19100159372318254, "grad_norm": 1.3303126794989895, "learning_rate": 9.335534981800662e-06, "loss": 0.749, "step": 6232 }, { "epoch": 0.19103224224592374, "grad_norm": 1.2927241798510514, "learning_rate": 9.335287733642078e-06, "loss": 0.7425, "step": 6233 }, { "epoch": 0.19106289076866495, "grad_norm": 1.5208167883896742, "learning_rate": 9.335040442766794e-06, "loss": 0.68, "step": 6234 }, { "epoch": 0.19109353929140616, "grad_norm": 1.2168937887488687, "learning_rate": 9.334793109177248e-06, "loss": 0.7239, "step": 6235 }, { "epoch": 0.19112418781414736, "grad_norm": 1.3946570585710565, "learning_rate": 9.334545732875876e-06, "loss": 0.6856, "step": 6236 }, { "epoch": 0.19115483633688857, "grad_norm": 1.3209906779021285, "learning_rate": 9.334298313865115e-06, "loss": 0.761, "step": 6237 }, { "epoch": 0.19118548485962977, "grad_norm": 1.2679928273008774, "learning_rate": 9.334050852147404e-06, "loss": 0.6933, "step": 6238 }, { "epoch": 0.19121613338237098, "grad_norm": 1.235907558936074, "learning_rate": 9.333803347725184e-06, "loss": 0.6916, "step": 6239 }, { "epoch": 0.19124678190511218, "grad_norm": 1.5008355568173226, "learning_rate": 9.333555800600888e-06, "loss": 0.7453, "step": 6240 }, { "epoch": 0.1912774304278534, "grad_norm": 1.1743379227264266, "learning_rate": 9.333308210776959e-06, "loss": 0.8689, "step": 6241 }, { "epoch": 0.19130807895059457, "grad_norm": 1.2730072819636913, "learning_rate": 9.333060578255833e-06, "loss": 0.701, "step": 6242 }, { "epoch": 0.19133872747333577, "grad_norm": 1.3104874481423603, "learning_rate": 9.332812903039954e-06, "loss": 0.7228, "step": 6243 }, { "epoch": 0.19136937599607698, "grad_norm": 1.2822259452485012, "learning_rate": 9.332565185131762e-06, "loss": 0.7026, "step": 6244 }, { "epoch": 0.19140002451881818, "grad_norm": 1.2783189838663067, "learning_rate": 9.332317424533696e-06, "loss": 0.6075, "step": 6245 }, { "epoch": 0.1914306730415594, "grad_norm": 1.3344326094599608, "learning_rate": 9.332069621248199e-06, "loss": 0.6564, "step": 6246 }, { "epoch": 0.1914613215643006, "grad_norm": 1.243570812176157, "learning_rate": 9.33182177527771e-06, "loss": 0.7671, "step": 6247 }, { "epoch": 0.1914919700870418, "grad_norm": 1.1916493720722454, "learning_rate": 9.331573886624672e-06, "loss": 0.6072, "step": 6248 }, { "epoch": 0.191522618609783, "grad_norm": 1.2264504015025537, "learning_rate": 9.33132595529153e-06, "loss": 0.6323, "step": 6249 }, { "epoch": 0.1915532671325242, "grad_norm": 1.3278149745717605, "learning_rate": 9.331077981280724e-06, "loss": 0.7608, "step": 6250 }, { "epoch": 0.19158391565526542, "grad_norm": 1.383117846632268, "learning_rate": 9.330829964594698e-06, "loss": 0.6888, "step": 6251 }, { "epoch": 0.19161456417800662, "grad_norm": 1.3630032760857027, "learning_rate": 9.330581905235898e-06, "loss": 0.6744, "step": 6252 }, { "epoch": 0.19164521270074783, "grad_norm": 0.5562097277635354, "learning_rate": 9.330333803206766e-06, "loss": 0.4497, "step": 6253 }, { "epoch": 0.19167586122348904, "grad_norm": 1.361981108176868, "learning_rate": 9.330085658509747e-06, "loss": 0.698, "step": 6254 }, { "epoch": 0.19170650974623024, "grad_norm": 1.3883270569323996, "learning_rate": 9.329837471147286e-06, "loss": 0.7076, "step": 6255 }, { "epoch": 0.19173715826897145, "grad_norm": 1.1969886604318827, "learning_rate": 9.329589241121828e-06, "loss": 0.718, "step": 6256 }, { "epoch": 0.19176780679171265, "grad_norm": 1.788729543876991, "learning_rate": 9.32934096843582e-06, "loss": 0.7575, "step": 6257 }, { "epoch": 0.19179845531445383, "grad_norm": 1.2990668838308312, "learning_rate": 9.329092653091708e-06, "loss": 0.7122, "step": 6258 }, { "epoch": 0.19182910383719504, "grad_norm": 1.1921014479725873, "learning_rate": 9.328844295091938e-06, "loss": 0.6932, "step": 6259 }, { "epoch": 0.19185975235993624, "grad_norm": 0.5177462402870567, "learning_rate": 9.328595894438958e-06, "loss": 0.4311, "step": 6260 }, { "epoch": 0.19189040088267745, "grad_norm": 1.2790493477316691, "learning_rate": 9.328347451135213e-06, "loss": 0.6003, "step": 6261 }, { "epoch": 0.19192104940541865, "grad_norm": 1.393413897287764, "learning_rate": 9.328098965183157e-06, "loss": 0.741, "step": 6262 }, { "epoch": 0.19195169792815986, "grad_norm": 1.0772681165361204, "learning_rate": 9.32785043658523e-06, "loss": 0.644, "step": 6263 }, { "epoch": 0.19198234645090106, "grad_norm": 1.2711712168158646, "learning_rate": 9.32760186534389e-06, "loss": 0.7794, "step": 6264 }, { "epoch": 0.19201299497364227, "grad_norm": 0.4642790527207598, "learning_rate": 9.327353251461578e-06, "loss": 0.4496, "step": 6265 }, { "epoch": 0.19204364349638348, "grad_norm": 1.3011493277661321, "learning_rate": 9.327104594940748e-06, "loss": 0.7023, "step": 6266 }, { "epoch": 0.19207429201912468, "grad_norm": 2.501887831987746, "learning_rate": 9.326855895783851e-06, "loss": 0.7107, "step": 6267 }, { "epoch": 0.1921049405418659, "grad_norm": 1.2548413355788395, "learning_rate": 9.326607153993335e-06, "loss": 0.6805, "step": 6268 }, { "epoch": 0.1921355890646071, "grad_norm": 1.3104382783277668, "learning_rate": 9.32635836957165e-06, "loss": 0.6366, "step": 6269 }, { "epoch": 0.1921662375873483, "grad_norm": 1.2648440515417891, "learning_rate": 9.326109542521252e-06, "loss": 0.6461, "step": 6270 }, { "epoch": 0.1921968861100895, "grad_norm": 1.1480920593725594, "learning_rate": 9.325860672844586e-06, "loss": 0.6751, "step": 6271 }, { "epoch": 0.1922275346328307, "grad_norm": 1.334498322665005, "learning_rate": 9.325611760544112e-06, "loss": 0.719, "step": 6272 }, { "epoch": 0.1922581831555719, "grad_norm": 1.3374307906442737, "learning_rate": 9.325362805622275e-06, "loss": 0.7445, "step": 6273 }, { "epoch": 0.1922888316783131, "grad_norm": 1.3085435530212517, "learning_rate": 9.325113808081535e-06, "loss": 0.7403, "step": 6274 }, { "epoch": 0.1923194802010543, "grad_norm": 1.1529643642938154, "learning_rate": 9.32486476792434e-06, "loss": 0.6427, "step": 6275 }, { "epoch": 0.1923501287237955, "grad_norm": 1.3350187916886231, "learning_rate": 9.324615685153145e-06, "loss": 0.7025, "step": 6276 }, { "epoch": 0.1923807772465367, "grad_norm": 1.708679776758117, "learning_rate": 9.324366559770406e-06, "loss": 0.6734, "step": 6277 }, { "epoch": 0.19241142576927792, "grad_norm": 1.3061302578669296, "learning_rate": 9.324117391778577e-06, "loss": 0.7991, "step": 6278 }, { "epoch": 0.19244207429201912, "grad_norm": 1.3883184780411773, "learning_rate": 9.323868181180113e-06, "loss": 0.6989, "step": 6279 }, { "epoch": 0.19247272281476033, "grad_norm": 1.2171527360134422, "learning_rate": 9.32361892797747e-06, "loss": 0.7898, "step": 6280 }, { "epoch": 0.19250337133750153, "grad_norm": 1.3511739346520288, "learning_rate": 9.323369632173103e-06, "loss": 0.6501, "step": 6281 }, { "epoch": 0.19253401986024274, "grad_norm": 1.2323626629198974, "learning_rate": 9.323120293769468e-06, "loss": 0.7018, "step": 6282 }, { "epoch": 0.19256466838298394, "grad_norm": 1.3167322402205646, "learning_rate": 9.322870912769024e-06, "loss": 0.7194, "step": 6283 }, { "epoch": 0.19259531690572515, "grad_norm": 1.3590398349616428, "learning_rate": 9.322621489174226e-06, "loss": 0.8163, "step": 6284 }, { "epoch": 0.19262596542846636, "grad_norm": 1.316949182205931, "learning_rate": 9.322372022987533e-06, "loss": 0.7059, "step": 6285 }, { "epoch": 0.19265661395120756, "grad_norm": 1.2203655903932993, "learning_rate": 9.322122514211402e-06, "loss": 0.6518, "step": 6286 }, { "epoch": 0.19268726247394877, "grad_norm": 1.1324778591343927, "learning_rate": 9.321872962848292e-06, "loss": 0.6275, "step": 6287 }, { "epoch": 0.19271791099668997, "grad_norm": 1.4361310340297797, "learning_rate": 9.321623368900664e-06, "loss": 0.7703, "step": 6288 }, { "epoch": 0.19274855951943115, "grad_norm": 1.3109976368911989, "learning_rate": 9.321373732370973e-06, "loss": 0.7523, "step": 6289 }, { "epoch": 0.19277920804217236, "grad_norm": 1.3261611148651629, "learning_rate": 9.321124053261681e-06, "loss": 0.6638, "step": 6290 }, { "epoch": 0.19280985656491356, "grad_norm": 1.2985290608267555, "learning_rate": 9.32087433157525e-06, "loss": 0.694, "step": 6291 }, { "epoch": 0.19284050508765477, "grad_norm": 1.3196770518156533, "learning_rate": 9.320624567314136e-06, "loss": 0.7207, "step": 6292 }, { "epoch": 0.19287115361039597, "grad_norm": 1.201212255611259, "learning_rate": 9.320374760480804e-06, "loss": 0.6825, "step": 6293 }, { "epoch": 0.19290180213313718, "grad_norm": 1.2496999097659613, "learning_rate": 9.320124911077713e-06, "loss": 0.6367, "step": 6294 }, { "epoch": 0.19293245065587838, "grad_norm": 1.2859336615011343, "learning_rate": 9.319875019107327e-06, "loss": 0.639, "step": 6295 }, { "epoch": 0.1929630991786196, "grad_norm": 1.2373701312567398, "learning_rate": 9.319625084572108e-06, "loss": 0.7603, "step": 6296 }, { "epoch": 0.1929937477013608, "grad_norm": 1.381925627653951, "learning_rate": 9.319375107474516e-06, "loss": 0.6832, "step": 6297 }, { "epoch": 0.193024396224102, "grad_norm": 1.218713794039134, "learning_rate": 9.319125087817017e-06, "loss": 0.7322, "step": 6298 }, { "epoch": 0.1930550447468432, "grad_norm": 1.294408737672836, "learning_rate": 9.318875025602072e-06, "loss": 0.719, "step": 6299 }, { "epoch": 0.1930856932695844, "grad_norm": 1.260224462475573, "learning_rate": 9.31862492083215e-06, "loss": 0.7592, "step": 6300 }, { "epoch": 0.19311634179232562, "grad_norm": 1.569493395453541, "learning_rate": 9.318374773509707e-06, "loss": 0.7999, "step": 6301 }, { "epoch": 0.19314699031506682, "grad_norm": 1.3008111571764387, "learning_rate": 9.318124583637216e-06, "loss": 0.6735, "step": 6302 }, { "epoch": 0.19317763883780803, "grad_norm": 1.3541895442578054, "learning_rate": 9.317874351217136e-06, "loss": 0.7157, "step": 6303 }, { "epoch": 0.1932082873605492, "grad_norm": 1.4057137579954408, "learning_rate": 9.317624076251936e-06, "loss": 0.7213, "step": 6304 }, { "epoch": 0.1932389358832904, "grad_norm": 1.4196893744228485, "learning_rate": 9.317373758744082e-06, "loss": 0.7522, "step": 6305 }, { "epoch": 0.19326958440603162, "grad_norm": 1.3192147850800344, "learning_rate": 9.317123398696039e-06, "loss": 0.7153, "step": 6306 }, { "epoch": 0.19330023292877282, "grad_norm": 1.160313863996333, "learning_rate": 9.316872996110276e-06, "loss": 0.6075, "step": 6307 }, { "epoch": 0.19333088145151403, "grad_norm": 1.3117631986183382, "learning_rate": 9.316622550989259e-06, "loss": 0.7218, "step": 6308 }, { "epoch": 0.19336152997425524, "grad_norm": 1.4061169602759505, "learning_rate": 9.316372063335453e-06, "loss": 0.7369, "step": 6309 }, { "epoch": 0.19339217849699644, "grad_norm": 0.5132714656711277, "learning_rate": 9.31612153315133e-06, "loss": 0.4635, "step": 6310 }, { "epoch": 0.19342282701973765, "grad_norm": 1.2510572941774063, "learning_rate": 9.315870960439357e-06, "loss": 0.7194, "step": 6311 }, { "epoch": 0.19345347554247885, "grad_norm": 1.5223636718196614, "learning_rate": 9.315620345202004e-06, "loss": 0.7849, "step": 6312 }, { "epoch": 0.19348412406522006, "grad_norm": 1.3021184778462707, "learning_rate": 9.31536968744174e-06, "loss": 0.692, "step": 6313 }, { "epoch": 0.19351477258796126, "grad_norm": 1.3706974497738915, "learning_rate": 9.31511898716103e-06, "loss": 0.7679, "step": 6314 }, { "epoch": 0.19354542111070247, "grad_norm": 1.1952057893114054, "learning_rate": 9.314868244362355e-06, "loss": 0.6906, "step": 6315 }, { "epoch": 0.19357606963344368, "grad_norm": 1.3798288282929794, "learning_rate": 9.314617459048175e-06, "loss": 0.6337, "step": 6316 }, { "epoch": 0.19360671815618488, "grad_norm": 1.2265581543510207, "learning_rate": 9.314366631220965e-06, "loss": 0.6379, "step": 6317 }, { "epoch": 0.1936373666789261, "grad_norm": 1.4429352632502248, "learning_rate": 9.314115760883199e-06, "loss": 0.6561, "step": 6318 }, { "epoch": 0.1936680152016673, "grad_norm": 1.3764315912723246, "learning_rate": 9.313864848037346e-06, "loss": 0.8139, "step": 6319 }, { "epoch": 0.19369866372440847, "grad_norm": 0.4773417654482132, "learning_rate": 9.313613892685877e-06, "loss": 0.4455, "step": 6320 }, { "epoch": 0.19372931224714968, "grad_norm": 0.48694418725738936, "learning_rate": 9.31336289483127e-06, "loss": 0.4456, "step": 6321 }, { "epoch": 0.19375996076989088, "grad_norm": 1.4112746807948275, "learning_rate": 9.313111854475991e-06, "loss": 0.748, "step": 6322 }, { "epoch": 0.1937906092926321, "grad_norm": 0.46396506935878346, "learning_rate": 9.312860771622521e-06, "loss": 0.4327, "step": 6323 }, { "epoch": 0.1938212578153733, "grad_norm": 1.2402782913133905, "learning_rate": 9.312609646273327e-06, "loss": 0.7668, "step": 6324 }, { "epoch": 0.1938519063381145, "grad_norm": 0.44038206942555475, "learning_rate": 9.31235847843089e-06, "loss": 0.4578, "step": 6325 }, { "epoch": 0.1938825548608557, "grad_norm": 1.4783185983871694, "learning_rate": 9.312107268097679e-06, "loss": 0.6905, "step": 6326 }, { "epoch": 0.1939132033835969, "grad_norm": 1.3873877139215147, "learning_rate": 9.311856015276172e-06, "loss": 0.6677, "step": 6327 }, { "epoch": 0.19394385190633812, "grad_norm": 1.3265985157480455, "learning_rate": 9.311604719968845e-06, "loss": 0.6894, "step": 6328 }, { "epoch": 0.19397450042907932, "grad_norm": 1.290794569661173, "learning_rate": 9.311353382178174e-06, "loss": 0.7967, "step": 6329 }, { "epoch": 0.19400514895182053, "grad_norm": 1.34605777113641, "learning_rate": 9.311102001906634e-06, "loss": 0.753, "step": 6330 }, { "epoch": 0.19403579747456173, "grad_norm": 0.5677612064489594, "learning_rate": 9.310850579156703e-06, "loss": 0.4555, "step": 6331 }, { "epoch": 0.19406644599730294, "grad_norm": 1.4165159655680344, "learning_rate": 9.31059911393086e-06, "loss": 0.7439, "step": 6332 }, { "epoch": 0.19409709452004414, "grad_norm": 1.3599838425583197, "learning_rate": 9.31034760623158e-06, "loss": 0.714, "step": 6333 }, { "epoch": 0.19412774304278535, "grad_norm": 1.2846270125379715, "learning_rate": 9.310096056061341e-06, "loss": 0.6603, "step": 6334 }, { "epoch": 0.19415839156552653, "grad_norm": 1.3337011742257465, "learning_rate": 9.309844463422624e-06, "loss": 0.6716, "step": 6335 }, { "epoch": 0.19418904008826773, "grad_norm": 1.4774491288736347, "learning_rate": 9.309592828317906e-06, "loss": 0.8109, "step": 6336 }, { "epoch": 0.19421968861100894, "grad_norm": 1.2570227160778273, "learning_rate": 9.309341150749669e-06, "loss": 0.7086, "step": 6337 }, { "epoch": 0.19425033713375014, "grad_norm": 1.3513746831911269, "learning_rate": 9.30908943072039e-06, "loss": 0.7477, "step": 6338 }, { "epoch": 0.19428098565649135, "grad_norm": 0.4794238246481516, "learning_rate": 9.308837668232548e-06, "loss": 0.4406, "step": 6339 }, { "epoch": 0.19431163417923256, "grad_norm": 1.2229957856659237, "learning_rate": 9.30858586328863e-06, "loss": 0.6389, "step": 6340 }, { "epoch": 0.19434228270197376, "grad_norm": 0.48204486520642237, "learning_rate": 9.30833401589111e-06, "loss": 0.4628, "step": 6341 }, { "epoch": 0.19437293122471497, "grad_norm": 1.121168627402014, "learning_rate": 9.308082126042474e-06, "loss": 0.7071, "step": 6342 }, { "epoch": 0.19440357974745617, "grad_norm": 1.1867337582013233, "learning_rate": 9.307830193745203e-06, "loss": 0.7108, "step": 6343 }, { "epoch": 0.19443422827019738, "grad_norm": 1.338931015676391, "learning_rate": 9.307578219001778e-06, "loss": 0.6582, "step": 6344 }, { "epoch": 0.19446487679293858, "grad_norm": 0.4666612042578378, "learning_rate": 9.307326201814684e-06, "loss": 0.46, "step": 6345 }, { "epoch": 0.1944955253156798, "grad_norm": 1.3430325509061891, "learning_rate": 9.307074142186401e-06, "loss": 0.7014, "step": 6346 }, { "epoch": 0.194526173838421, "grad_norm": 0.7311478307637318, "learning_rate": 9.306822040119415e-06, "loss": 0.4528, "step": 6347 }, { "epoch": 0.1945568223611622, "grad_norm": 1.274401583111148, "learning_rate": 9.30656989561621e-06, "loss": 0.6532, "step": 6348 }, { "epoch": 0.1945874708839034, "grad_norm": 1.5532446169869227, "learning_rate": 9.30631770867927e-06, "loss": 0.6545, "step": 6349 }, { "epoch": 0.1946181194066446, "grad_norm": 1.2800388194343386, "learning_rate": 9.30606547931108e-06, "loss": 0.6577, "step": 6350 }, { "epoch": 0.1946487679293858, "grad_norm": 1.4236726930588, "learning_rate": 9.305813207514123e-06, "loss": 0.7311, "step": 6351 }, { "epoch": 0.194679416452127, "grad_norm": 1.192861099156035, "learning_rate": 9.305560893290889e-06, "loss": 0.6251, "step": 6352 }, { "epoch": 0.1947100649748682, "grad_norm": 1.4502584832300414, "learning_rate": 9.30530853664386e-06, "loss": 0.7338, "step": 6353 }, { "epoch": 0.1947407134976094, "grad_norm": 1.282415276317264, "learning_rate": 9.305056137575526e-06, "loss": 0.7326, "step": 6354 }, { "epoch": 0.1947713620203506, "grad_norm": 1.291480752960131, "learning_rate": 9.304803696088372e-06, "loss": 0.6504, "step": 6355 }, { "epoch": 0.19480201054309182, "grad_norm": 0.482685704343227, "learning_rate": 9.304551212184887e-06, "loss": 0.4602, "step": 6356 }, { "epoch": 0.19483265906583302, "grad_norm": 1.6025633728490227, "learning_rate": 9.304298685867556e-06, "loss": 0.7111, "step": 6357 }, { "epoch": 0.19486330758857423, "grad_norm": 1.2559146134944823, "learning_rate": 9.304046117138868e-06, "loss": 0.7224, "step": 6358 }, { "epoch": 0.19489395611131544, "grad_norm": 1.4045426446901026, "learning_rate": 9.303793506001314e-06, "loss": 0.7137, "step": 6359 }, { "epoch": 0.19492460463405664, "grad_norm": 1.3942473829748698, "learning_rate": 9.30354085245738e-06, "loss": 0.6512, "step": 6360 }, { "epoch": 0.19495525315679785, "grad_norm": 0.4488741981302784, "learning_rate": 9.303288156509557e-06, "loss": 0.4598, "step": 6361 }, { "epoch": 0.19498590167953905, "grad_norm": 1.305526068177582, "learning_rate": 9.303035418160337e-06, "loss": 0.7056, "step": 6362 }, { "epoch": 0.19501655020228026, "grad_norm": 1.446655937627669, "learning_rate": 9.302782637412206e-06, "loss": 0.7134, "step": 6363 }, { "epoch": 0.19504719872502146, "grad_norm": 1.2897417690898436, "learning_rate": 9.302529814267658e-06, "loss": 0.821, "step": 6364 }, { "epoch": 0.19507784724776267, "grad_norm": 1.2046650387971394, "learning_rate": 9.302276948729182e-06, "loss": 0.7314, "step": 6365 }, { "epoch": 0.19510849577050385, "grad_norm": 0.5009786523513291, "learning_rate": 9.30202404079927e-06, "loss": 0.4654, "step": 6366 }, { "epoch": 0.19513914429324505, "grad_norm": 0.5104670058924798, "learning_rate": 9.301771090480415e-06, "loss": 0.4904, "step": 6367 }, { "epoch": 0.19516979281598626, "grad_norm": 1.281086917525723, "learning_rate": 9.301518097775109e-06, "loss": 0.6766, "step": 6368 }, { "epoch": 0.19520044133872747, "grad_norm": 1.276233120825921, "learning_rate": 9.301265062685845e-06, "loss": 0.7391, "step": 6369 }, { "epoch": 0.19523108986146867, "grad_norm": 0.4691928051273291, "learning_rate": 9.301011985215113e-06, "loss": 0.4828, "step": 6370 }, { "epoch": 0.19526173838420988, "grad_norm": 1.4238138490144203, "learning_rate": 9.300758865365413e-06, "loss": 0.7064, "step": 6371 }, { "epoch": 0.19529238690695108, "grad_norm": 1.2474661969835512, "learning_rate": 9.300505703139235e-06, "loss": 0.665, "step": 6372 }, { "epoch": 0.1953230354296923, "grad_norm": 1.3821996429868497, "learning_rate": 9.300252498539073e-06, "loss": 0.73, "step": 6373 }, { "epoch": 0.1953536839524335, "grad_norm": 1.3202699483268692, "learning_rate": 9.299999251567421e-06, "loss": 0.6519, "step": 6374 }, { "epoch": 0.1953843324751747, "grad_norm": 1.2868857414316657, "learning_rate": 9.29974596222678e-06, "loss": 0.6552, "step": 6375 }, { "epoch": 0.1954149809979159, "grad_norm": 1.1809548864506925, "learning_rate": 9.29949263051964e-06, "loss": 0.7606, "step": 6376 }, { "epoch": 0.1954456295206571, "grad_norm": 1.3793860663398316, "learning_rate": 9.299239256448497e-06, "loss": 0.7282, "step": 6377 }, { "epoch": 0.19547627804339832, "grad_norm": 1.236814092524837, "learning_rate": 9.298985840015853e-06, "loss": 0.6959, "step": 6378 }, { "epoch": 0.19550692656613952, "grad_norm": 1.371520201549427, "learning_rate": 9.2987323812242e-06, "loss": 0.708, "step": 6379 }, { "epoch": 0.19553757508888073, "grad_norm": 1.5402227818167316, "learning_rate": 9.298478880076037e-06, "loss": 0.6861, "step": 6380 }, { "epoch": 0.19556822361162193, "grad_norm": 0.6595852672284716, "learning_rate": 9.298225336573863e-06, "loss": 0.457, "step": 6381 }, { "epoch": 0.1955988721343631, "grad_norm": 1.1542846885981113, "learning_rate": 9.297971750720174e-06, "loss": 0.6693, "step": 6382 }, { "epoch": 0.19562952065710432, "grad_norm": 1.299873044307528, "learning_rate": 9.29771812251747e-06, "loss": 0.7685, "step": 6383 }, { "epoch": 0.19566016917984552, "grad_norm": 1.5645442773196627, "learning_rate": 9.297464451968248e-06, "loss": 0.8398, "step": 6384 }, { "epoch": 0.19569081770258673, "grad_norm": 1.295640241998582, "learning_rate": 9.29721073907501e-06, "loss": 0.7268, "step": 6385 }, { "epoch": 0.19572146622532793, "grad_norm": 1.2544734940277114, "learning_rate": 9.296956983840258e-06, "loss": 0.6979, "step": 6386 }, { "epoch": 0.19575211474806914, "grad_norm": 1.2831185425623024, "learning_rate": 9.296703186266486e-06, "loss": 0.6134, "step": 6387 }, { "epoch": 0.19578276327081034, "grad_norm": 1.2578401478760017, "learning_rate": 9.296449346356199e-06, "loss": 0.7227, "step": 6388 }, { "epoch": 0.19581341179355155, "grad_norm": 1.1994209071052275, "learning_rate": 9.296195464111899e-06, "loss": 0.7862, "step": 6389 }, { "epoch": 0.19584406031629276, "grad_norm": 1.4809847031000123, "learning_rate": 9.295941539536083e-06, "loss": 0.7978, "step": 6390 }, { "epoch": 0.19587470883903396, "grad_norm": 1.2807241166058674, "learning_rate": 9.295687572631258e-06, "loss": 0.7265, "step": 6391 }, { "epoch": 0.19590535736177517, "grad_norm": 0.6305661409664881, "learning_rate": 9.295433563399922e-06, "loss": 0.4483, "step": 6392 }, { "epoch": 0.19593600588451637, "grad_norm": 1.334031725271152, "learning_rate": 9.295179511844583e-06, "loss": 0.7332, "step": 6393 }, { "epoch": 0.19596665440725758, "grad_norm": 0.5122137184331776, "learning_rate": 9.29492541796774e-06, "loss": 0.481, "step": 6394 }, { "epoch": 0.19599730292999878, "grad_norm": 1.262985505300571, "learning_rate": 9.294671281771897e-06, "loss": 0.6695, "step": 6395 }, { "epoch": 0.19602795145274, "grad_norm": 0.4482912568030117, "learning_rate": 9.29441710325956e-06, "loss": 0.4461, "step": 6396 }, { "epoch": 0.19605859997548117, "grad_norm": 1.302365208004077, "learning_rate": 9.294162882433233e-06, "loss": 0.6951, "step": 6397 }, { "epoch": 0.19608924849822237, "grad_norm": 0.48727416870270884, "learning_rate": 9.29390861929542e-06, "loss": 0.4512, "step": 6398 }, { "epoch": 0.19611989702096358, "grad_norm": 0.47426459873691307, "learning_rate": 9.293654313848626e-06, "loss": 0.4618, "step": 6399 }, { "epoch": 0.19615054554370479, "grad_norm": 0.48340265838941104, "learning_rate": 9.293399966095358e-06, "loss": 0.4424, "step": 6400 }, { "epoch": 0.196181194066446, "grad_norm": 1.629043560965769, "learning_rate": 9.293145576038121e-06, "loss": 0.7839, "step": 6401 }, { "epoch": 0.1962118425891872, "grad_norm": 1.3336946129656555, "learning_rate": 9.292891143679423e-06, "loss": 0.6443, "step": 6402 }, { "epoch": 0.1962424911119284, "grad_norm": 0.47929728047091646, "learning_rate": 9.29263666902177e-06, "loss": 0.4549, "step": 6403 }, { "epoch": 0.1962731396346696, "grad_norm": 1.3263912215883176, "learning_rate": 9.29238215206767e-06, "loss": 0.748, "step": 6404 }, { "epoch": 0.1963037881574108, "grad_norm": 1.359878309016648, "learning_rate": 9.29212759281963e-06, "loss": 0.7754, "step": 6405 }, { "epoch": 0.19633443668015202, "grad_norm": 1.1556840024462438, "learning_rate": 9.291872991280158e-06, "loss": 0.681, "step": 6406 }, { "epoch": 0.19636508520289322, "grad_norm": 1.3646893751065967, "learning_rate": 9.291618347451763e-06, "loss": 0.7403, "step": 6407 }, { "epoch": 0.19639573372563443, "grad_norm": 1.200182357803131, "learning_rate": 9.291363661336956e-06, "loss": 0.7034, "step": 6408 }, { "epoch": 0.19642638224837564, "grad_norm": 1.4428482229846393, "learning_rate": 9.291108932938244e-06, "loss": 0.7983, "step": 6409 }, { "epoch": 0.19645703077111684, "grad_norm": 0.5321643321349753, "learning_rate": 9.290854162258138e-06, "loss": 0.4693, "step": 6410 }, { "epoch": 0.19648767929385805, "grad_norm": 1.1771031803829308, "learning_rate": 9.290599349299148e-06, "loss": 0.7324, "step": 6411 }, { "epoch": 0.19651832781659925, "grad_norm": 1.2824622877160095, "learning_rate": 9.290344494063785e-06, "loss": 0.5886, "step": 6412 }, { "epoch": 0.19654897633934043, "grad_norm": 1.2716240400721006, "learning_rate": 9.290089596554559e-06, "loss": 0.6972, "step": 6413 }, { "epoch": 0.19657962486208164, "grad_norm": 1.26988745488309, "learning_rate": 9.289834656773984e-06, "loss": 0.7421, "step": 6414 }, { "epoch": 0.19661027338482284, "grad_norm": 1.3874742380023188, "learning_rate": 9.28957967472457e-06, "loss": 0.7099, "step": 6415 }, { "epoch": 0.19664092190756405, "grad_norm": 1.2475892870549437, "learning_rate": 9.28932465040883e-06, "loss": 0.6367, "step": 6416 }, { "epoch": 0.19667157043030525, "grad_norm": 1.2814835803341929, "learning_rate": 9.289069583829276e-06, "loss": 0.6954, "step": 6417 }, { "epoch": 0.19670221895304646, "grad_norm": 1.303137493423832, "learning_rate": 9.288814474988421e-06, "loss": 0.7737, "step": 6418 }, { "epoch": 0.19673286747578766, "grad_norm": 1.5217132312312014, "learning_rate": 9.288559323888781e-06, "loss": 0.7849, "step": 6419 }, { "epoch": 0.19676351599852887, "grad_norm": 1.289234635314614, "learning_rate": 9.28830413053287e-06, "loss": 0.7496, "step": 6420 }, { "epoch": 0.19679416452127008, "grad_norm": 1.3781032995552152, "learning_rate": 9.2880488949232e-06, "loss": 0.6945, "step": 6421 }, { "epoch": 0.19682481304401128, "grad_norm": 1.3180053696002625, "learning_rate": 9.287793617062286e-06, "loss": 0.7053, "step": 6422 }, { "epoch": 0.1968554615667525, "grad_norm": 1.2775048508528453, "learning_rate": 9.287538296952646e-06, "loss": 0.7158, "step": 6423 }, { "epoch": 0.1968861100894937, "grad_norm": 1.3599448853001384, "learning_rate": 9.287282934596793e-06, "loss": 0.6508, "step": 6424 }, { "epoch": 0.1969167586122349, "grad_norm": 1.2814283339958987, "learning_rate": 9.287027529997246e-06, "loss": 0.6537, "step": 6425 }, { "epoch": 0.1969474071349761, "grad_norm": 1.3025616937395545, "learning_rate": 9.286772083156518e-06, "loss": 0.7632, "step": 6426 }, { "epoch": 0.1969780556577173, "grad_norm": 0.5068434926553986, "learning_rate": 9.286516594077129e-06, "loss": 0.4409, "step": 6427 }, { "epoch": 0.1970087041804585, "grad_norm": 1.268304840902129, "learning_rate": 9.286261062761595e-06, "loss": 0.7194, "step": 6428 }, { "epoch": 0.1970393527031997, "grad_norm": 1.6075699286138645, "learning_rate": 9.286005489212433e-06, "loss": 0.6269, "step": 6429 }, { "epoch": 0.1970700012259409, "grad_norm": 0.46005507585672145, "learning_rate": 9.285749873432165e-06, "loss": 0.4463, "step": 6430 }, { "epoch": 0.1971006497486821, "grad_norm": 0.4516811220390749, "learning_rate": 9.285494215423304e-06, "loss": 0.4435, "step": 6431 }, { "epoch": 0.1971312982714233, "grad_norm": 1.5663263535822987, "learning_rate": 9.285238515188372e-06, "loss": 0.7675, "step": 6432 }, { "epoch": 0.19716194679416452, "grad_norm": 1.3520032692749502, "learning_rate": 9.284982772729891e-06, "loss": 0.7162, "step": 6433 }, { "epoch": 0.19719259531690572, "grad_norm": 1.3911024138140295, "learning_rate": 9.284726988050376e-06, "loss": 0.6978, "step": 6434 }, { "epoch": 0.19722324383964693, "grad_norm": 1.257757183258401, "learning_rate": 9.284471161152351e-06, "loss": 0.7216, "step": 6435 }, { "epoch": 0.19725389236238813, "grad_norm": 1.472944915888778, "learning_rate": 9.284215292038335e-06, "loss": 0.7837, "step": 6436 }, { "epoch": 0.19728454088512934, "grad_norm": 1.460358077027703, "learning_rate": 9.28395938071085e-06, "loss": 0.6852, "step": 6437 }, { "epoch": 0.19731518940787054, "grad_norm": 1.2769777737479056, "learning_rate": 9.283703427172417e-06, "loss": 0.7324, "step": 6438 }, { "epoch": 0.19734583793061175, "grad_norm": 1.2223054169839294, "learning_rate": 9.28344743142556e-06, "loss": 0.7232, "step": 6439 }, { "epoch": 0.19737648645335296, "grad_norm": 1.440025826075143, "learning_rate": 9.283191393472796e-06, "loss": 0.6829, "step": 6440 }, { "epoch": 0.19740713497609416, "grad_norm": 1.4460775291564305, "learning_rate": 9.282935313316652e-06, "loss": 0.7482, "step": 6441 }, { "epoch": 0.19743778349883537, "grad_norm": 1.0796815140743123, "learning_rate": 9.282679190959652e-06, "loss": 0.6101, "step": 6442 }, { "epoch": 0.19746843202157657, "grad_norm": 1.17924969041399, "learning_rate": 9.282423026404317e-06, "loss": 0.7329, "step": 6443 }, { "epoch": 0.19749908054431775, "grad_norm": 1.40565146484967, "learning_rate": 9.282166819653172e-06, "loss": 0.7398, "step": 6444 }, { "epoch": 0.19752972906705896, "grad_norm": 1.4593289187422218, "learning_rate": 9.281910570708744e-06, "loss": 0.8036, "step": 6445 }, { "epoch": 0.19756037758980016, "grad_norm": 1.4224912572491322, "learning_rate": 9.281654279573553e-06, "loss": 0.7371, "step": 6446 }, { "epoch": 0.19759102611254137, "grad_norm": 1.1593791135898628, "learning_rate": 9.281397946250129e-06, "loss": 0.6106, "step": 6447 }, { "epoch": 0.19762167463528257, "grad_norm": 1.282552770100159, "learning_rate": 9.281141570740992e-06, "loss": 0.665, "step": 6448 }, { "epoch": 0.19765232315802378, "grad_norm": 1.2059183526081996, "learning_rate": 9.280885153048676e-06, "loss": 0.6721, "step": 6449 }, { "epoch": 0.19768297168076499, "grad_norm": 1.2584828172223594, "learning_rate": 9.2806286931757e-06, "loss": 0.6261, "step": 6450 }, { "epoch": 0.1977136202035062, "grad_norm": 0.8195822935555067, "learning_rate": 9.280372191124596e-06, "loss": 0.4552, "step": 6451 }, { "epoch": 0.1977442687262474, "grad_norm": 1.1677542079186034, "learning_rate": 9.280115646897888e-06, "loss": 0.6646, "step": 6452 }, { "epoch": 0.1977749172489886, "grad_norm": 1.420050858615983, "learning_rate": 9.279859060498107e-06, "loss": 0.7246, "step": 6453 }, { "epoch": 0.1978055657717298, "grad_norm": 1.4217999653555498, "learning_rate": 9.27960243192778e-06, "loss": 0.7892, "step": 6454 }, { "epoch": 0.197836214294471, "grad_norm": 0.5133704912370382, "learning_rate": 9.279345761189435e-06, "loss": 0.4678, "step": 6455 }, { "epoch": 0.19786686281721222, "grad_norm": 1.435662669563926, "learning_rate": 9.2790890482856e-06, "loss": 0.6697, "step": 6456 }, { "epoch": 0.19789751133995342, "grad_norm": 1.4718288804179749, "learning_rate": 9.278832293218807e-06, "loss": 0.6695, "step": 6457 }, { "epoch": 0.19792815986269463, "grad_norm": 1.1625466893356584, "learning_rate": 9.278575495991583e-06, "loss": 0.62, "step": 6458 }, { "epoch": 0.1979588083854358, "grad_norm": 1.3152699056333996, "learning_rate": 9.278318656606463e-06, "loss": 0.6523, "step": 6459 }, { "epoch": 0.19798945690817701, "grad_norm": 0.610535544037995, "learning_rate": 9.278061775065972e-06, "loss": 0.4659, "step": 6460 }, { "epoch": 0.19802010543091822, "grad_norm": 1.3824984389388664, "learning_rate": 9.277804851372643e-06, "loss": 0.7835, "step": 6461 }, { "epoch": 0.19805075395365943, "grad_norm": 1.3757321452109703, "learning_rate": 9.27754788552901e-06, "loss": 0.6198, "step": 6462 }, { "epoch": 0.19808140247640063, "grad_norm": 1.2487717977850816, "learning_rate": 9.277290877537603e-06, "loss": 0.6356, "step": 6463 }, { "epoch": 0.19811205099914184, "grad_norm": 1.1998891489491217, "learning_rate": 9.277033827400956e-06, "loss": 0.7567, "step": 6464 }, { "epoch": 0.19814269952188304, "grad_norm": 1.1189947880110587, "learning_rate": 9.276776735121597e-06, "loss": 0.6714, "step": 6465 }, { "epoch": 0.19817334804462425, "grad_norm": 0.5081812450269834, "learning_rate": 9.276519600702065e-06, "loss": 0.4709, "step": 6466 }, { "epoch": 0.19820399656736545, "grad_norm": 1.360249038674442, "learning_rate": 9.276262424144891e-06, "loss": 0.6777, "step": 6467 }, { "epoch": 0.19823464509010666, "grad_norm": 1.3631330980496965, "learning_rate": 9.27600520545261e-06, "loss": 0.7691, "step": 6468 }, { "epoch": 0.19826529361284786, "grad_norm": 1.3665340520025917, "learning_rate": 9.275747944627753e-06, "loss": 0.6936, "step": 6469 }, { "epoch": 0.19829594213558907, "grad_norm": 1.230091019661336, "learning_rate": 9.275490641672859e-06, "loss": 0.6669, "step": 6470 }, { "epoch": 0.19832659065833028, "grad_norm": 1.2856516134119467, "learning_rate": 9.275233296590463e-06, "loss": 0.6547, "step": 6471 }, { "epoch": 0.19835723918107148, "grad_norm": 1.2704638283754641, "learning_rate": 9.274975909383097e-06, "loss": 0.6611, "step": 6472 }, { "epoch": 0.1983878877038127, "grad_norm": 1.2446734452534938, "learning_rate": 9.274718480053303e-06, "loss": 0.6571, "step": 6473 }, { "epoch": 0.1984185362265539, "grad_norm": 1.2360365807134854, "learning_rate": 9.27446100860361e-06, "loss": 0.7039, "step": 6474 }, { "epoch": 0.19844918474929507, "grad_norm": 1.204944035554781, "learning_rate": 9.274203495036563e-06, "loss": 0.6351, "step": 6475 }, { "epoch": 0.19847983327203628, "grad_norm": 1.233865795298071, "learning_rate": 9.273945939354691e-06, "loss": 0.6654, "step": 6476 }, { "epoch": 0.19851048179477748, "grad_norm": 1.2004134709005254, "learning_rate": 9.27368834156054e-06, "loss": 0.7367, "step": 6477 }, { "epoch": 0.1985411303175187, "grad_norm": 1.3174872421724064, "learning_rate": 9.273430701656642e-06, "loss": 0.7189, "step": 6478 }, { "epoch": 0.1985717788402599, "grad_norm": 1.3715255389815932, "learning_rate": 9.273173019645539e-06, "loss": 0.7437, "step": 6479 }, { "epoch": 0.1986024273630011, "grad_norm": 1.4095770079947945, "learning_rate": 9.272915295529768e-06, "loss": 0.7089, "step": 6480 }, { "epoch": 0.1986330758857423, "grad_norm": 1.2083817907779202, "learning_rate": 9.27265752931187e-06, "loss": 0.6832, "step": 6481 }, { "epoch": 0.1986637244084835, "grad_norm": 0.5371580796621895, "learning_rate": 9.272399720994384e-06, "loss": 0.4507, "step": 6482 }, { "epoch": 0.19869437293122472, "grad_norm": 1.282507466810491, "learning_rate": 9.272141870579851e-06, "loss": 0.706, "step": 6483 }, { "epoch": 0.19872502145396592, "grad_norm": 0.47679591247011605, "learning_rate": 9.27188397807081e-06, "loss": 0.4674, "step": 6484 }, { "epoch": 0.19875566997670713, "grad_norm": 1.2254993061761799, "learning_rate": 9.271626043469804e-06, "loss": 0.7207, "step": 6485 }, { "epoch": 0.19878631849944833, "grad_norm": 1.3583960412794396, "learning_rate": 9.271368066779373e-06, "loss": 0.719, "step": 6486 }, { "epoch": 0.19881696702218954, "grad_norm": 1.201739729912561, "learning_rate": 9.27111004800206e-06, "loss": 0.686, "step": 6487 }, { "epoch": 0.19884761554493074, "grad_norm": 1.378872233205684, "learning_rate": 9.270851987140405e-06, "loss": 0.727, "step": 6488 }, { "epoch": 0.19887826406767195, "grad_norm": 0.5496894890424353, "learning_rate": 9.270593884196956e-06, "loss": 0.4689, "step": 6489 }, { "epoch": 0.19890891259041313, "grad_norm": 1.3252117335700813, "learning_rate": 9.270335739174251e-06, "loss": 0.6706, "step": 6490 }, { "epoch": 0.19893956111315433, "grad_norm": 1.1828195125757301, "learning_rate": 9.270077552074835e-06, "loss": 0.646, "step": 6491 }, { "epoch": 0.19897020963589554, "grad_norm": 1.3669668509948025, "learning_rate": 9.269819322901254e-06, "loss": 0.7175, "step": 6492 }, { "epoch": 0.19900085815863675, "grad_norm": 1.3433324049213142, "learning_rate": 9.269561051656049e-06, "loss": 0.6749, "step": 6493 }, { "epoch": 0.19903150668137795, "grad_norm": 1.3206228981890844, "learning_rate": 9.269302738341766e-06, "loss": 0.7008, "step": 6494 }, { "epoch": 0.19906215520411916, "grad_norm": 1.2513436829075744, "learning_rate": 9.269044382960952e-06, "loss": 0.6705, "step": 6495 }, { "epoch": 0.19909280372686036, "grad_norm": 1.3885293491148796, "learning_rate": 9.268785985516152e-06, "loss": 0.6724, "step": 6496 }, { "epoch": 0.19912345224960157, "grad_norm": 1.3783357321977232, "learning_rate": 9.268527546009911e-06, "loss": 0.7533, "step": 6497 }, { "epoch": 0.19915410077234277, "grad_norm": 1.383724348130434, "learning_rate": 9.268269064444775e-06, "loss": 0.7815, "step": 6498 }, { "epoch": 0.19918474929508398, "grad_norm": 1.8161444427743207, "learning_rate": 9.268010540823294e-06, "loss": 0.6823, "step": 6499 }, { "epoch": 0.19921539781782518, "grad_norm": 1.382727788406704, "learning_rate": 9.267751975148011e-06, "loss": 0.7509, "step": 6500 }, { "epoch": 0.1992460463405664, "grad_norm": 1.292524627000216, "learning_rate": 9.267493367421476e-06, "loss": 0.7341, "step": 6501 }, { "epoch": 0.1992766948633076, "grad_norm": 1.5306138493720165, "learning_rate": 9.267234717646237e-06, "loss": 0.7315, "step": 6502 }, { "epoch": 0.1993073433860488, "grad_norm": 1.3366487820433024, "learning_rate": 9.266976025824843e-06, "loss": 0.6407, "step": 6503 }, { "epoch": 0.19933799190879, "grad_norm": 1.4823831689884233, "learning_rate": 9.266717291959843e-06, "loss": 0.7504, "step": 6504 }, { "epoch": 0.1993686404315312, "grad_norm": 1.3655034909023096, "learning_rate": 9.266458516053785e-06, "loss": 0.7308, "step": 6505 }, { "epoch": 0.1993992889542724, "grad_norm": 1.5058595635823944, "learning_rate": 9.266199698109219e-06, "loss": 0.7549, "step": 6506 }, { "epoch": 0.1994299374770136, "grad_norm": 1.3404457749071574, "learning_rate": 9.265940838128698e-06, "loss": 0.7288, "step": 6507 }, { "epoch": 0.1994605859997548, "grad_norm": 1.328971049400321, "learning_rate": 9.265681936114768e-06, "loss": 0.7165, "step": 6508 }, { "epoch": 0.199491234522496, "grad_norm": 1.3027069543633647, "learning_rate": 9.265422992069985e-06, "loss": 0.6977, "step": 6509 }, { "epoch": 0.1995218830452372, "grad_norm": 1.3091769143092822, "learning_rate": 9.265164005996896e-06, "loss": 0.6448, "step": 6510 }, { "epoch": 0.19955253156797842, "grad_norm": 1.2586504990754648, "learning_rate": 9.264904977898055e-06, "loss": 0.641, "step": 6511 }, { "epoch": 0.19958318009071963, "grad_norm": 1.1452228528922803, "learning_rate": 9.264645907776013e-06, "loss": 0.7624, "step": 6512 }, { "epoch": 0.19961382861346083, "grad_norm": 0.543979350585495, "learning_rate": 9.264386795633327e-06, "loss": 0.4676, "step": 6513 }, { "epoch": 0.19964447713620204, "grad_norm": 1.203726030269912, "learning_rate": 9.264127641472544e-06, "loss": 0.6966, "step": 6514 }, { "epoch": 0.19967512565894324, "grad_norm": 0.511366102007935, "learning_rate": 9.263868445296222e-06, "loss": 0.457, "step": 6515 }, { "epoch": 0.19970577418168445, "grad_norm": 1.4637834493270008, "learning_rate": 9.263609207106911e-06, "loss": 0.7334, "step": 6516 }, { "epoch": 0.19973642270442565, "grad_norm": 1.2631913373045949, "learning_rate": 9.26334992690717e-06, "loss": 0.6631, "step": 6517 }, { "epoch": 0.19976707122716686, "grad_norm": 1.2486074500613316, "learning_rate": 9.263090604699549e-06, "loss": 0.6972, "step": 6518 }, { "epoch": 0.19979771974990806, "grad_norm": 1.370449009945555, "learning_rate": 9.262831240486608e-06, "loss": 0.6235, "step": 6519 }, { "epoch": 0.19982836827264927, "grad_norm": 1.290442593379235, "learning_rate": 9.262571834270899e-06, "loss": 0.6441, "step": 6520 }, { "epoch": 0.19985901679539045, "grad_norm": 0.5270024470448813, "learning_rate": 9.262312386054978e-06, "loss": 0.4684, "step": 6521 }, { "epoch": 0.19988966531813165, "grad_norm": 1.305835073622713, "learning_rate": 9.262052895841404e-06, "loss": 0.7473, "step": 6522 }, { "epoch": 0.19992031384087286, "grad_norm": 0.4983889458105424, "learning_rate": 9.261793363632733e-06, "loss": 0.4556, "step": 6523 }, { "epoch": 0.19995096236361407, "grad_norm": 1.5013539845469066, "learning_rate": 9.26153378943152e-06, "loss": 0.6543, "step": 6524 }, { "epoch": 0.19998161088635527, "grad_norm": 1.539634920191434, "learning_rate": 9.261274173240324e-06, "loss": 0.749, "step": 6525 }, { "epoch": 0.20001225940909648, "grad_norm": 1.4195006666235552, "learning_rate": 9.261014515061704e-06, "loss": 0.6839, "step": 6526 }, { "epoch": 0.20004290793183768, "grad_norm": 1.263849642718196, "learning_rate": 9.260754814898217e-06, "loss": 0.6699, "step": 6527 }, { "epoch": 0.2000735564545789, "grad_norm": 1.5240194101747, "learning_rate": 9.260495072752424e-06, "loss": 0.7695, "step": 6528 }, { "epoch": 0.2001042049773201, "grad_norm": 1.3255164304981397, "learning_rate": 9.260235288626882e-06, "loss": 0.619, "step": 6529 }, { "epoch": 0.2001348535000613, "grad_norm": 1.4529335719863257, "learning_rate": 9.259975462524151e-06, "loss": 0.7546, "step": 6530 }, { "epoch": 0.2001655020228025, "grad_norm": 1.6118549642672404, "learning_rate": 9.259715594446794e-06, "loss": 0.8056, "step": 6531 }, { "epoch": 0.2001961505455437, "grad_norm": 1.2849684551541924, "learning_rate": 9.259455684397367e-06, "loss": 0.6801, "step": 6532 }, { "epoch": 0.20022679906828492, "grad_norm": 1.4649955306375053, "learning_rate": 9.259195732378436e-06, "loss": 0.7569, "step": 6533 }, { "epoch": 0.20025744759102612, "grad_norm": 0.515455119492623, "learning_rate": 9.258935738392557e-06, "loss": 0.4491, "step": 6534 }, { "epoch": 0.20028809611376733, "grad_norm": 1.2712699102422145, "learning_rate": 9.258675702442295e-06, "loss": 0.6638, "step": 6535 }, { "epoch": 0.20031874463650853, "grad_norm": 1.4253147821442023, "learning_rate": 9.258415624530213e-06, "loss": 0.7474, "step": 6536 }, { "epoch": 0.2003493931592497, "grad_norm": 1.2865549155002238, "learning_rate": 9.258155504658871e-06, "loss": 0.7436, "step": 6537 }, { "epoch": 0.20038004168199092, "grad_norm": 1.1669692605393738, "learning_rate": 9.257895342830834e-06, "loss": 0.6249, "step": 6538 }, { "epoch": 0.20041069020473212, "grad_norm": 1.3028590900588852, "learning_rate": 9.257635139048664e-06, "loss": 0.6369, "step": 6539 }, { "epoch": 0.20044133872747333, "grad_norm": 1.1869296685759392, "learning_rate": 9.257374893314929e-06, "loss": 0.7383, "step": 6540 }, { "epoch": 0.20047198725021453, "grad_norm": 1.4098996060316493, "learning_rate": 9.257114605632184e-06, "loss": 0.7881, "step": 6541 }, { "epoch": 0.20050263577295574, "grad_norm": 1.3752135052605792, "learning_rate": 9.256854276003004e-06, "loss": 0.7427, "step": 6542 }, { "epoch": 0.20053328429569695, "grad_norm": 1.1440549838616179, "learning_rate": 9.256593904429948e-06, "loss": 0.7145, "step": 6543 }, { "epoch": 0.20056393281843815, "grad_norm": 1.288253882361717, "learning_rate": 9.256333490915583e-06, "loss": 0.8168, "step": 6544 }, { "epoch": 0.20059458134117936, "grad_norm": 1.3129420781702938, "learning_rate": 9.256073035462476e-06, "loss": 0.6833, "step": 6545 }, { "epoch": 0.20062522986392056, "grad_norm": 1.374175892249688, "learning_rate": 9.255812538073192e-06, "loss": 0.7786, "step": 6546 }, { "epoch": 0.20065587838666177, "grad_norm": 1.280148418142836, "learning_rate": 9.255551998750298e-06, "loss": 0.7261, "step": 6547 }, { "epoch": 0.20068652690940297, "grad_norm": 1.1990382684580956, "learning_rate": 9.255291417496361e-06, "loss": 0.6511, "step": 6548 }, { "epoch": 0.20071717543214418, "grad_norm": 1.425823978113303, "learning_rate": 9.255030794313951e-06, "loss": 0.7223, "step": 6549 }, { "epoch": 0.20074782395488538, "grad_norm": 1.3290796072251012, "learning_rate": 9.254770129205631e-06, "loss": 0.7915, "step": 6550 }, { "epoch": 0.2007784724776266, "grad_norm": 0.5215503370654814, "learning_rate": 9.254509422173974e-06, "loss": 0.4724, "step": 6551 }, { "epoch": 0.20080912100036777, "grad_norm": 1.3545625999567097, "learning_rate": 9.254248673221546e-06, "loss": 0.7564, "step": 6552 }, { "epoch": 0.20083976952310897, "grad_norm": 1.4130125956767237, "learning_rate": 9.253987882350919e-06, "loss": 0.738, "step": 6553 }, { "epoch": 0.20087041804585018, "grad_norm": 1.4169295554333254, "learning_rate": 9.25372704956466e-06, "loss": 0.7042, "step": 6554 }, { "epoch": 0.20090106656859139, "grad_norm": 1.4436201842899234, "learning_rate": 9.25346617486534e-06, "loss": 0.7717, "step": 6555 }, { "epoch": 0.2009317150913326, "grad_norm": 0.48448588832665335, "learning_rate": 9.25320525825553e-06, "loss": 0.4625, "step": 6556 }, { "epoch": 0.2009623636140738, "grad_norm": 1.306481087362081, "learning_rate": 9.252944299737799e-06, "loss": 0.6797, "step": 6557 }, { "epoch": 0.200993012136815, "grad_norm": 1.3313388038110667, "learning_rate": 9.25268329931472e-06, "loss": 0.7138, "step": 6558 }, { "epoch": 0.2010236606595562, "grad_norm": 1.1591311358583505, "learning_rate": 9.252422256988864e-06, "loss": 0.6714, "step": 6559 }, { "epoch": 0.2010543091822974, "grad_norm": 1.3223527558505082, "learning_rate": 9.252161172762806e-06, "loss": 0.6716, "step": 6560 }, { "epoch": 0.20108495770503862, "grad_norm": 1.1543574569666881, "learning_rate": 9.251900046639113e-06, "loss": 0.6602, "step": 6561 }, { "epoch": 0.20111560622777983, "grad_norm": 1.260691734360759, "learning_rate": 9.251638878620363e-06, "loss": 0.7644, "step": 6562 }, { "epoch": 0.20114625475052103, "grad_norm": 1.379022408271175, "learning_rate": 9.251377668709127e-06, "loss": 0.7335, "step": 6563 }, { "epoch": 0.20117690327326224, "grad_norm": 1.365946713804286, "learning_rate": 9.251116416907977e-06, "loss": 0.7383, "step": 6564 }, { "epoch": 0.20120755179600344, "grad_norm": 1.3085099168131376, "learning_rate": 9.25085512321949e-06, "loss": 0.674, "step": 6565 }, { "epoch": 0.20123820031874465, "grad_norm": 1.3481971944469195, "learning_rate": 9.250593787646243e-06, "loss": 0.6592, "step": 6566 }, { "epoch": 0.20126884884148585, "grad_norm": 1.3834427782111056, "learning_rate": 9.250332410190805e-06, "loss": 0.6422, "step": 6567 }, { "epoch": 0.20129949736422703, "grad_norm": 1.4070088100681863, "learning_rate": 9.250070990855755e-06, "loss": 0.6784, "step": 6568 }, { "epoch": 0.20133014588696824, "grad_norm": 1.2846227883485584, "learning_rate": 9.249809529643668e-06, "loss": 0.7272, "step": 6569 }, { "epoch": 0.20136079440970944, "grad_norm": 1.2517579635369789, "learning_rate": 9.24954802655712e-06, "loss": 0.721, "step": 6570 }, { "epoch": 0.20139144293245065, "grad_norm": 1.2264232534266744, "learning_rate": 9.249286481598686e-06, "loss": 0.6738, "step": 6571 }, { "epoch": 0.20142209145519185, "grad_norm": 1.200602003011681, "learning_rate": 9.24902489477095e-06, "loss": 0.7454, "step": 6572 }, { "epoch": 0.20145273997793306, "grad_norm": 1.3886507317250183, "learning_rate": 9.248763266076482e-06, "loss": 0.7341, "step": 6573 }, { "epoch": 0.20148338850067427, "grad_norm": 1.2861274279969035, "learning_rate": 9.248501595517861e-06, "loss": 0.655, "step": 6574 }, { "epoch": 0.20151403702341547, "grad_norm": 1.1808159020506401, "learning_rate": 9.248239883097668e-06, "loss": 0.6486, "step": 6575 }, { "epoch": 0.20154468554615668, "grad_norm": 1.2566401653289707, "learning_rate": 9.247978128818482e-06, "loss": 0.6751, "step": 6576 }, { "epoch": 0.20157533406889788, "grad_norm": 1.298084579935174, "learning_rate": 9.24771633268288e-06, "loss": 0.6962, "step": 6577 }, { "epoch": 0.2016059825916391, "grad_norm": 1.3916000133589135, "learning_rate": 9.24745449469344e-06, "loss": 0.73, "step": 6578 }, { "epoch": 0.2016366311143803, "grad_norm": 1.301192321491931, "learning_rate": 9.247192614852744e-06, "loss": 0.6748, "step": 6579 }, { "epoch": 0.2016672796371215, "grad_norm": 1.230058689611636, "learning_rate": 9.246930693163375e-06, "loss": 0.7208, "step": 6580 }, { "epoch": 0.2016979281598627, "grad_norm": 0.5023271385396554, "learning_rate": 9.246668729627911e-06, "loss": 0.4541, "step": 6581 }, { "epoch": 0.2017285766826039, "grad_norm": 0.5092300790043424, "learning_rate": 9.246406724248931e-06, "loss": 0.4757, "step": 6582 }, { "epoch": 0.2017592252053451, "grad_norm": 1.2071112432053348, "learning_rate": 9.246144677029022e-06, "loss": 0.6538, "step": 6583 }, { "epoch": 0.2017898737280863, "grad_norm": 1.2473033364085453, "learning_rate": 9.245882587970761e-06, "loss": 0.7518, "step": 6584 }, { "epoch": 0.2018205222508275, "grad_norm": 1.7292431537277713, "learning_rate": 9.245620457076732e-06, "loss": 0.7297, "step": 6585 }, { "epoch": 0.2018511707735687, "grad_norm": 1.2817986829524148, "learning_rate": 9.24535828434952e-06, "loss": 0.7709, "step": 6586 }, { "epoch": 0.2018818192963099, "grad_norm": 0.49144406420397424, "learning_rate": 9.245096069791706e-06, "loss": 0.4665, "step": 6587 }, { "epoch": 0.20191246781905112, "grad_norm": 1.26756827036755, "learning_rate": 9.244833813405875e-06, "loss": 0.661, "step": 6588 }, { "epoch": 0.20194311634179232, "grad_norm": 1.421340488897492, "learning_rate": 9.244571515194609e-06, "loss": 0.713, "step": 6589 }, { "epoch": 0.20197376486453353, "grad_norm": 1.5257599180874526, "learning_rate": 9.244309175160495e-06, "loss": 0.762, "step": 6590 }, { "epoch": 0.20200441338727473, "grad_norm": 1.2993833443754859, "learning_rate": 9.244046793306116e-06, "loss": 0.7399, "step": 6591 }, { "epoch": 0.20203506191001594, "grad_norm": 0.48409334034241097, "learning_rate": 9.243784369634059e-06, "loss": 0.4195, "step": 6592 }, { "epoch": 0.20206571043275715, "grad_norm": 1.3571653319770565, "learning_rate": 9.243521904146908e-06, "loss": 0.7816, "step": 6593 }, { "epoch": 0.20209635895549835, "grad_norm": 1.221542023472114, "learning_rate": 9.243259396847251e-06, "loss": 0.7634, "step": 6594 }, { "epoch": 0.20212700747823956, "grad_norm": 1.2998098479392874, "learning_rate": 9.242996847737672e-06, "loss": 0.7069, "step": 6595 }, { "epoch": 0.20215765600098076, "grad_norm": 1.3567688787814194, "learning_rate": 9.24273425682076e-06, "loss": 0.7317, "step": 6596 }, { "epoch": 0.20218830452372197, "grad_norm": 1.2832118011543643, "learning_rate": 9.242471624099102e-06, "loss": 0.7276, "step": 6597 }, { "epoch": 0.20221895304646317, "grad_norm": 1.2878083135055665, "learning_rate": 9.242208949575286e-06, "loss": 0.6637, "step": 6598 }, { "epoch": 0.20224960156920435, "grad_norm": 1.121384263533575, "learning_rate": 9.241946233251899e-06, "loss": 0.6145, "step": 6599 }, { "epoch": 0.20228025009194556, "grad_norm": 1.223122759287425, "learning_rate": 9.241683475131531e-06, "loss": 0.6672, "step": 6600 }, { "epoch": 0.20231089861468676, "grad_norm": 1.4957978797444258, "learning_rate": 9.24142067521677e-06, "loss": 0.7365, "step": 6601 }, { "epoch": 0.20234154713742797, "grad_norm": 1.1892881378687654, "learning_rate": 9.241157833510206e-06, "loss": 0.7503, "step": 6602 }, { "epoch": 0.20237219566016917, "grad_norm": 1.2557840115377725, "learning_rate": 9.240894950014429e-06, "loss": 0.65, "step": 6603 }, { "epoch": 0.20240284418291038, "grad_norm": 1.2990380730023328, "learning_rate": 9.240632024732027e-06, "loss": 0.7648, "step": 6604 }, { "epoch": 0.20243349270565159, "grad_norm": 1.371389527688379, "learning_rate": 9.240369057665595e-06, "loss": 0.7091, "step": 6605 }, { "epoch": 0.2024641412283928, "grad_norm": 1.38022094310897, "learning_rate": 9.24010604881772e-06, "loss": 0.6581, "step": 6606 }, { "epoch": 0.202494789751134, "grad_norm": 1.4560879955008177, "learning_rate": 9.239842998190997e-06, "loss": 0.8024, "step": 6607 }, { "epoch": 0.2025254382738752, "grad_norm": 1.2987923547602038, "learning_rate": 9.239579905788016e-06, "loss": 0.7549, "step": 6608 }, { "epoch": 0.2025560867966164, "grad_norm": 0.6184708764326058, "learning_rate": 9.239316771611369e-06, "loss": 0.445, "step": 6609 }, { "epoch": 0.2025867353193576, "grad_norm": 1.226396177399663, "learning_rate": 9.239053595663649e-06, "loss": 0.794, "step": 6610 }, { "epoch": 0.20261738384209882, "grad_norm": 1.3463998991743689, "learning_rate": 9.23879037794745e-06, "loss": 0.7844, "step": 6611 }, { "epoch": 0.20264803236484003, "grad_norm": 0.4726935099581215, "learning_rate": 9.238527118465364e-06, "loss": 0.4369, "step": 6612 }, { "epoch": 0.20267868088758123, "grad_norm": 1.3132075314662635, "learning_rate": 9.238263817219986e-06, "loss": 0.7967, "step": 6613 }, { "epoch": 0.2027093294103224, "grad_norm": 1.3290889516702094, "learning_rate": 9.23800047421391e-06, "loss": 0.7484, "step": 6614 }, { "epoch": 0.20273997793306361, "grad_norm": 1.548827457941671, "learning_rate": 9.237737089449731e-06, "loss": 0.7656, "step": 6615 }, { "epoch": 0.20277062645580482, "grad_norm": 6.506576498942628, "learning_rate": 9.237473662930045e-06, "loss": 0.7482, "step": 6616 }, { "epoch": 0.20280127497854603, "grad_norm": 1.3138991808391225, "learning_rate": 9.237210194657447e-06, "loss": 0.6467, "step": 6617 }, { "epoch": 0.20283192350128723, "grad_norm": 1.2665982320664786, "learning_rate": 9.236946684634531e-06, "loss": 0.7577, "step": 6618 }, { "epoch": 0.20286257202402844, "grad_norm": 1.273082995131102, "learning_rate": 9.236683132863897e-06, "loss": 0.691, "step": 6619 }, { "epoch": 0.20289322054676964, "grad_norm": 1.2085333514797245, "learning_rate": 9.236419539348138e-06, "loss": 0.6207, "step": 6620 }, { "epoch": 0.20292386906951085, "grad_norm": 1.1369265714277397, "learning_rate": 9.236155904089856e-06, "loss": 0.7308, "step": 6621 }, { "epoch": 0.20295451759225205, "grad_norm": 1.373790981329435, "learning_rate": 9.235892227091645e-06, "loss": 0.7066, "step": 6622 }, { "epoch": 0.20298516611499326, "grad_norm": 1.181640375845231, "learning_rate": 9.235628508356102e-06, "loss": 0.6256, "step": 6623 }, { "epoch": 0.20301581463773447, "grad_norm": 1.206902617245641, "learning_rate": 9.235364747885831e-06, "loss": 0.6834, "step": 6624 }, { "epoch": 0.20304646316047567, "grad_norm": 1.8399673618369015, "learning_rate": 9.235100945683425e-06, "loss": 0.7621, "step": 6625 }, { "epoch": 0.20307711168321688, "grad_norm": 1.2747493829483816, "learning_rate": 9.234837101751486e-06, "loss": 0.7023, "step": 6626 }, { "epoch": 0.20310776020595808, "grad_norm": 1.2346615894762518, "learning_rate": 9.234573216092615e-06, "loss": 0.6639, "step": 6627 }, { "epoch": 0.2031384087286993, "grad_norm": 1.1549133422324533, "learning_rate": 9.23430928870941e-06, "loss": 0.6887, "step": 6628 }, { "epoch": 0.2031690572514405, "grad_norm": 1.3400423733242648, "learning_rate": 9.234045319604472e-06, "loss": 0.6413, "step": 6629 }, { "epoch": 0.20319970577418167, "grad_norm": 0.7051093476008314, "learning_rate": 9.233781308780402e-06, "loss": 0.4612, "step": 6630 }, { "epoch": 0.20323035429692288, "grad_norm": 1.2194284205919834, "learning_rate": 9.233517256239801e-06, "loss": 0.7494, "step": 6631 }, { "epoch": 0.20326100281966408, "grad_norm": 1.325831589432212, "learning_rate": 9.233253161985271e-06, "loss": 0.7597, "step": 6632 }, { "epoch": 0.2032916513424053, "grad_norm": 1.3716470419979203, "learning_rate": 9.232989026019417e-06, "loss": 0.7487, "step": 6633 }, { "epoch": 0.2033222998651465, "grad_norm": 1.4187402381880978, "learning_rate": 9.232724848344836e-06, "loss": 0.7782, "step": 6634 }, { "epoch": 0.2033529483878877, "grad_norm": 1.4107290692702639, "learning_rate": 9.232460628964138e-06, "loss": 0.7389, "step": 6635 }, { "epoch": 0.2033835969106289, "grad_norm": 1.1957968118862325, "learning_rate": 9.23219636787992e-06, "loss": 0.7666, "step": 6636 }, { "epoch": 0.2034142454333701, "grad_norm": 1.4281239733755513, "learning_rate": 9.231932065094787e-06, "loss": 0.6577, "step": 6637 }, { "epoch": 0.20344489395611132, "grad_norm": 1.372826543754081, "learning_rate": 9.231667720611345e-06, "loss": 0.8148, "step": 6638 }, { "epoch": 0.20347554247885252, "grad_norm": 1.24884578977292, "learning_rate": 9.2314033344322e-06, "loss": 0.6823, "step": 6639 }, { "epoch": 0.20350619100159373, "grad_norm": 1.3824409237549828, "learning_rate": 9.231138906559956e-06, "loss": 0.768, "step": 6640 }, { "epoch": 0.20353683952433493, "grad_norm": 1.3543320033384492, "learning_rate": 9.230874436997215e-06, "loss": 0.7182, "step": 6641 }, { "epoch": 0.20356748804707614, "grad_norm": 1.5764635925314452, "learning_rate": 9.230609925746588e-06, "loss": 0.7685, "step": 6642 }, { "epoch": 0.20359813656981735, "grad_norm": 0.727889004907329, "learning_rate": 9.230345372810676e-06, "loss": 0.4631, "step": 6643 }, { "epoch": 0.20362878509255855, "grad_norm": 0.6241635542785183, "learning_rate": 9.230080778192091e-06, "loss": 0.4363, "step": 6644 }, { "epoch": 0.20365943361529976, "grad_norm": 1.3780028824058876, "learning_rate": 9.229816141893438e-06, "loss": 0.7046, "step": 6645 }, { "epoch": 0.20369008213804093, "grad_norm": 1.4254667369656189, "learning_rate": 9.229551463917323e-06, "loss": 0.7668, "step": 6646 }, { "epoch": 0.20372073066078214, "grad_norm": 1.3163118336224189, "learning_rate": 9.229286744266356e-06, "loss": 0.7565, "step": 6647 }, { "epoch": 0.20375137918352335, "grad_norm": 1.2751501746732383, "learning_rate": 9.229021982943144e-06, "loss": 0.731, "step": 6648 }, { "epoch": 0.20378202770626455, "grad_norm": 1.406440352546216, "learning_rate": 9.228757179950298e-06, "loss": 0.7746, "step": 6649 }, { "epoch": 0.20381267622900576, "grad_norm": 1.4707733484007846, "learning_rate": 9.228492335290423e-06, "loss": 0.6431, "step": 6650 }, { "epoch": 0.20384332475174696, "grad_norm": 0.79169254593252, "learning_rate": 9.228227448966132e-06, "loss": 0.457, "step": 6651 }, { "epoch": 0.20387397327448817, "grad_norm": 0.7305339707489595, "learning_rate": 9.227962520980037e-06, "loss": 0.4628, "step": 6652 }, { "epoch": 0.20390462179722937, "grad_norm": 1.3535470049978846, "learning_rate": 9.227697551334741e-06, "loss": 0.6848, "step": 6653 }, { "epoch": 0.20393527031997058, "grad_norm": 1.502407791883565, "learning_rate": 9.227432540032862e-06, "loss": 0.6948, "step": 6654 }, { "epoch": 0.20396591884271179, "grad_norm": 1.2657785250473765, "learning_rate": 9.227167487077008e-06, "loss": 0.7362, "step": 6655 }, { "epoch": 0.203996567365453, "grad_norm": 1.2767730691179102, "learning_rate": 9.226902392469792e-06, "loss": 0.8012, "step": 6656 }, { "epoch": 0.2040272158881942, "grad_norm": 0.711987229461968, "learning_rate": 9.226637256213825e-06, "loss": 0.4625, "step": 6657 }, { "epoch": 0.2040578644109354, "grad_norm": 1.4067883277362745, "learning_rate": 9.226372078311718e-06, "loss": 0.8051, "step": 6658 }, { "epoch": 0.2040885129336766, "grad_norm": 1.299569146255191, "learning_rate": 9.226106858766086e-06, "loss": 0.6268, "step": 6659 }, { "epoch": 0.2041191614564178, "grad_norm": 1.3361383863797422, "learning_rate": 9.225841597579543e-06, "loss": 0.6858, "step": 6660 }, { "epoch": 0.204149809979159, "grad_norm": 1.4296960047072738, "learning_rate": 9.2255762947547e-06, "loss": 0.7184, "step": 6661 }, { "epoch": 0.2041804585019002, "grad_norm": 1.3337507633978816, "learning_rate": 9.225310950294173e-06, "loss": 0.6442, "step": 6662 }, { "epoch": 0.2042111070246414, "grad_norm": 1.2140466239316412, "learning_rate": 9.225045564200578e-06, "loss": 0.7106, "step": 6663 }, { "epoch": 0.2042417555473826, "grad_norm": 1.2707280149396802, "learning_rate": 9.224780136476526e-06, "loss": 0.6339, "step": 6664 }, { "epoch": 0.20427240407012381, "grad_norm": 1.3946675035238816, "learning_rate": 9.224514667124636e-06, "loss": 0.7625, "step": 6665 }, { "epoch": 0.20430305259286502, "grad_norm": 0.5388396610276891, "learning_rate": 9.22424915614752e-06, "loss": 0.4499, "step": 6666 }, { "epoch": 0.20433370111560623, "grad_norm": 1.170494629914577, "learning_rate": 9.223983603547797e-06, "loss": 0.7449, "step": 6667 }, { "epoch": 0.20436434963834743, "grad_norm": 1.5734076064558953, "learning_rate": 9.223718009328085e-06, "loss": 0.718, "step": 6668 }, { "epoch": 0.20439499816108864, "grad_norm": 1.3108778634587341, "learning_rate": 9.223452373490995e-06, "loss": 0.7421, "step": 6669 }, { "epoch": 0.20442564668382984, "grad_norm": 1.2892295992356193, "learning_rate": 9.22318669603915e-06, "loss": 0.746, "step": 6670 }, { "epoch": 0.20445629520657105, "grad_norm": 1.375896350390168, "learning_rate": 9.222920976975166e-06, "loss": 0.7497, "step": 6671 }, { "epoch": 0.20448694372931225, "grad_norm": 1.427361826870776, "learning_rate": 9.222655216301663e-06, "loss": 0.7794, "step": 6672 }, { "epoch": 0.20451759225205346, "grad_norm": 1.3073104515988585, "learning_rate": 9.222389414021256e-06, "loss": 0.7215, "step": 6673 }, { "epoch": 0.20454824077479467, "grad_norm": 1.4368407638867247, "learning_rate": 9.222123570136565e-06, "loss": 0.6893, "step": 6674 }, { "epoch": 0.20457888929753587, "grad_norm": 1.3874304583043546, "learning_rate": 9.22185768465021e-06, "loss": 0.7183, "step": 6675 }, { "epoch": 0.20460953782027708, "grad_norm": 1.295430007767359, "learning_rate": 9.221591757564811e-06, "loss": 0.7133, "step": 6676 }, { "epoch": 0.20464018634301825, "grad_norm": 1.3400820252131227, "learning_rate": 9.221325788882991e-06, "loss": 0.6902, "step": 6677 }, { "epoch": 0.20467083486575946, "grad_norm": 1.39182957394508, "learning_rate": 9.221059778607366e-06, "loss": 0.6657, "step": 6678 }, { "epoch": 0.20470148338850067, "grad_norm": 1.304340138658148, "learning_rate": 9.220793726740558e-06, "loss": 0.7816, "step": 6679 }, { "epoch": 0.20473213191124187, "grad_norm": 1.3303275578668166, "learning_rate": 9.22052763328519e-06, "loss": 0.6362, "step": 6680 }, { "epoch": 0.20476278043398308, "grad_norm": 0.5168758277416549, "learning_rate": 9.220261498243886e-06, "loss": 0.4738, "step": 6681 }, { "epoch": 0.20479342895672428, "grad_norm": 1.106850845867741, "learning_rate": 9.219995321619263e-06, "loss": 0.569, "step": 6682 }, { "epoch": 0.2048240774794655, "grad_norm": 1.2765060235247494, "learning_rate": 9.219729103413946e-06, "loss": 0.6596, "step": 6683 }, { "epoch": 0.2048547260022067, "grad_norm": 1.509441100253391, "learning_rate": 9.21946284363056e-06, "loss": 0.6771, "step": 6684 }, { "epoch": 0.2048853745249479, "grad_norm": 1.262961502009222, "learning_rate": 9.219196542271727e-06, "loss": 0.6801, "step": 6685 }, { "epoch": 0.2049160230476891, "grad_norm": 1.2988184294203742, "learning_rate": 9.218930199340071e-06, "loss": 0.5854, "step": 6686 }, { "epoch": 0.2049466715704303, "grad_norm": 1.2948730760150535, "learning_rate": 9.218663814838215e-06, "loss": 0.7405, "step": 6687 }, { "epoch": 0.20497732009317152, "grad_norm": 1.4371776345031935, "learning_rate": 9.218397388768785e-06, "loss": 0.7517, "step": 6688 }, { "epoch": 0.20500796861591272, "grad_norm": 1.2146290520058378, "learning_rate": 9.218130921134408e-06, "loss": 0.7299, "step": 6689 }, { "epoch": 0.20503861713865393, "grad_norm": 1.3748505026015523, "learning_rate": 9.217864411937704e-06, "loss": 0.7386, "step": 6690 }, { "epoch": 0.20506926566139513, "grad_norm": 1.112724379236674, "learning_rate": 9.217597861181306e-06, "loss": 0.6063, "step": 6691 }, { "epoch": 0.2050999141841363, "grad_norm": 1.159908123697851, "learning_rate": 9.217331268867838e-06, "loss": 0.6275, "step": 6692 }, { "epoch": 0.20513056270687752, "grad_norm": 0.4982008365616322, "learning_rate": 9.217064634999925e-06, "loss": 0.457, "step": 6693 }, { "epoch": 0.20516121122961872, "grad_norm": 1.2256236480983442, "learning_rate": 9.216797959580193e-06, "loss": 0.707, "step": 6694 }, { "epoch": 0.20519185975235993, "grad_norm": 1.3168230410972337, "learning_rate": 9.216531242611275e-06, "loss": 0.8375, "step": 6695 }, { "epoch": 0.20522250827510113, "grad_norm": 1.2205969272736514, "learning_rate": 9.216264484095794e-06, "loss": 0.5661, "step": 6696 }, { "epoch": 0.20525315679784234, "grad_norm": 1.6787216908713656, "learning_rate": 9.21599768403638e-06, "loss": 0.7189, "step": 6697 }, { "epoch": 0.20528380532058355, "grad_norm": 1.3608502986622855, "learning_rate": 9.215730842435663e-06, "loss": 0.7943, "step": 6698 }, { "epoch": 0.20531445384332475, "grad_norm": 1.529258240566261, "learning_rate": 9.215463959296272e-06, "loss": 0.7715, "step": 6699 }, { "epoch": 0.20534510236606596, "grad_norm": 1.3410020415712276, "learning_rate": 9.215197034620835e-06, "loss": 0.7806, "step": 6700 }, { "epoch": 0.20537575088880716, "grad_norm": 1.3603002288895376, "learning_rate": 9.214930068411982e-06, "loss": 0.7851, "step": 6701 }, { "epoch": 0.20540639941154837, "grad_norm": 0.4678997559053219, "learning_rate": 9.214663060672347e-06, "loss": 0.431, "step": 6702 }, { "epoch": 0.20543704793428957, "grad_norm": 1.1689310361180694, "learning_rate": 9.214396011404557e-06, "loss": 0.7035, "step": 6703 }, { "epoch": 0.20546769645703078, "grad_norm": 1.2322815562218383, "learning_rate": 9.214128920611246e-06, "loss": 0.6428, "step": 6704 }, { "epoch": 0.20549834497977199, "grad_norm": 1.2656465608360543, "learning_rate": 9.213861788295043e-06, "loss": 0.7394, "step": 6705 }, { "epoch": 0.2055289935025132, "grad_norm": 1.364324393187273, "learning_rate": 9.21359461445858e-06, "loss": 0.7894, "step": 6706 }, { "epoch": 0.2055596420252544, "grad_norm": 0.4681397215999968, "learning_rate": 9.213327399104495e-06, "loss": 0.4637, "step": 6707 }, { "epoch": 0.20559029054799557, "grad_norm": 1.4118876414070414, "learning_rate": 9.213060142235415e-06, "loss": 0.8023, "step": 6708 }, { "epoch": 0.20562093907073678, "grad_norm": 1.4482518653616185, "learning_rate": 9.212792843853976e-06, "loss": 0.7006, "step": 6709 }, { "epoch": 0.20565158759347799, "grad_norm": 1.3322994507939314, "learning_rate": 9.212525503962812e-06, "loss": 0.7082, "step": 6710 }, { "epoch": 0.2056822361162192, "grad_norm": 1.3345553514945057, "learning_rate": 9.212258122564554e-06, "loss": 0.7097, "step": 6711 }, { "epoch": 0.2057128846389604, "grad_norm": 1.3263914994681558, "learning_rate": 9.21199069966184e-06, "loss": 0.6409, "step": 6712 }, { "epoch": 0.2057435331617016, "grad_norm": 1.3340897276657888, "learning_rate": 9.211723235257306e-06, "loss": 0.6519, "step": 6713 }, { "epoch": 0.2057741816844428, "grad_norm": 1.344841431194047, "learning_rate": 9.211455729353584e-06, "loss": 0.5844, "step": 6714 }, { "epoch": 0.20580483020718401, "grad_norm": 1.5299393341539362, "learning_rate": 9.21118818195331e-06, "loss": 0.4409, "step": 6715 }, { "epoch": 0.20583547872992522, "grad_norm": 1.2736352853580652, "learning_rate": 9.210920593059124e-06, "loss": 0.7076, "step": 6716 }, { "epoch": 0.20586612725266643, "grad_norm": 1.3340477684887813, "learning_rate": 9.210652962673658e-06, "loss": 0.6993, "step": 6717 }, { "epoch": 0.20589677577540763, "grad_norm": 1.3099224784783374, "learning_rate": 9.210385290799551e-06, "loss": 0.634, "step": 6718 }, { "epoch": 0.20592742429814884, "grad_norm": 1.414961411239251, "learning_rate": 9.210117577439441e-06, "loss": 0.8174, "step": 6719 }, { "epoch": 0.20595807282089004, "grad_norm": 0.500276827486864, "learning_rate": 9.209849822595964e-06, "loss": 0.4724, "step": 6720 }, { "epoch": 0.20598872134363125, "grad_norm": 1.3150353642212458, "learning_rate": 9.209582026271762e-06, "loss": 0.7076, "step": 6721 }, { "epoch": 0.20601936986637245, "grad_norm": 1.5362050113035952, "learning_rate": 9.209314188469469e-06, "loss": 0.7593, "step": 6722 }, { "epoch": 0.20605001838911363, "grad_norm": 1.3371442570668572, "learning_rate": 9.209046309191727e-06, "loss": 0.7795, "step": 6723 }, { "epoch": 0.20608066691185484, "grad_norm": 1.2555330636005073, "learning_rate": 9.208778388441175e-06, "loss": 0.6962, "step": 6724 }, { "epoch": 0.20611131543459604, "grad_norm": 1.2708317709243726, "learning_rate": 9.208510426220454e-06, "loss": 0.5875, "step": 6725 }, { "epoch": 0.20614196395733725, "grad_norm": 1.3652682144517532, "learning_rate": 9.2082424225322e-06, "loss": 0.6574, "step": 6726 }, { "epoch": 0.20617261248007845, "grad_norm": 0.5024089691732795, "learning_rate": 9.20797437737906e-06, "loss": 0.4419, "step": 6727 }, { "epoch": 0.20620326100281966, "grad_norm": 1.3119009854245107, "learning_rate": 9.20770629076367e-06, "loss": 0.7667, "step": 6728 }, { "epoch": 0.20623390952556087, "grad_norm": 1.3295068784673334, "learning_rate": 9.207438162688673e-06, "loss": 0.7813, "step": 6729 }, { "epoch": 0.20626455804830207, "grad_norm": 1.280821321034739, "learning_rate": 9.207169993156713e-06, "loss": 0.7918, "step": 6730 }, { "epoch": 0.20629520657104328, "grad_norm": 1.28566450821754, "learning_rate": 9.206901782170428e-06, "loss": 0.6959, "step": 6731 }, { "epoch": 0.20632585509378448, "grad_norm": 1.4489830932578491, "learning_rate": 9.206633529732465e-06, "loss": 0.7707, "step": 6732 }, { "epoch": 0.2063565036165257, "grad_norm": 1.3238487129644045, "learning_rate": 9.206365235845465e-06, "loss": 0.7071, "step": 6733 }, { "epoch": 0.2063871521392669, "grad_norm": 1.2985266654810668, "learning_rate": 9.206096900512072e-06, "loss": 0.652, "step": 6734 }, { "epoch": 0.2064178006620081, "grad_norm": 1.296641898045185, "learning_rate": 9.205828523734931e-06, "loss": 0.6344, "step": 6735 }, { "epoch": 0.2064484491847493, "grad_norm": 1.1503457449830006, "learning_rate": 9.205560105516684e-06, "loss": 0.7217, "step": 6736 }, { "epoch": 0.2064790977074905, "grad_norm": 1.363241362603328, "learning_rate": 9.20529164585998e-06, "loss": 0.756, "step": 6737 }, { "epoch": 0.20650974623023172, "grad_norm": 1.3445027231464644, "learning_rate": 9.205023144767457e-06, "loss": 0.7386, "step": 6738 }, { "epoch": 0.2065403947529729, "grad_norm": 1.1421644806632345, "learning_rate": 9.20475460224177e-06, "loss": 0.7272, "step": 6739 }, { "epoch": 0.2065710432757141, "grad_norm": 1.3053239566178265, "learning_rate": 9.204486018285557e-06, "loss": 0.7295, "step": 6740 }, { "epoch": 0.2066016917984553, "grad_norm": 1.1849332732466644, "learning_rate": 9.204217392901468e-06, "loss": 0.6529, "step": 6741 }, { "epoch": 0.2066323403211965, "grad_norm": 0.5244939601514912, "learning_rate": 9.20394872609215e-06, "loss": 0.4527, "step": 6742 }, { "epoch": 0.20666298884393772, "grad_norm": 1.3737767893729365, "learning_rate": 9.203680017860249e-06, "loss": 0.7542, "step": 6743 }, { "epoch": 0.20669363736667892, "grad_norm": 1.3672461184889866, "learning_rate": 9.203411268208413e-06, "loss": 0.6207, "step": 6744 }, { "epoch": 0.20672428588942013, "grad_norm": 1.196954333135677, "learning_rate": 9.20314247713929e-06, "loss": 0.6818, "step": 6745 }, { "epoch": 0.20675493441216133, "grad_norm": 1.2581222225055466, "learning_rate": 9.20287364465553e-06, "loss": 0.731, "step": 6746 }, { "epoch": 0.20678558293490254, "grad_norm": 1.5019674942051118, "learning_rate": 9.20260477075978e-06, "loss": 0.7058, "step": 6747 }, { "epoch": 0.20681623145764375, "grad_norm": 0.5103483620105408, "learning_rate": 9.20233585545469e-06, "loss": 0.4513, "step": 6748 }, { "epoch": 0.20684687998038495, "grad_norm": 1.4273093350984314, "learning_rate": 9.20206689874291e-06, "loss": 0.6848, "step": 6749 }, { "epoch": 0.20687752850312616, "grad_norm": 0.4388020357103123, "learning_rate": 9.201797900627087e-06, "loss": 0.4385, "step": 6750 }, { "epoch": 0.20690817702586736, "grad_norm": 1.2343879338849864, "learning_rate": 9.201528861109877e-06, "loss": 0.6872, "step": 6751 }, { "epoch": 0.20693882554860857, "grad_norm": 0.4657061748068661, "learning_rate": 9.201259780193927e-06, "loss": 0.4476, "step": 6752 }, { "epoch": 0.20696947407134977, "grad_norm": 1.4415969642658402, "learning_rate": 9.200990657881891e-06, "loss": 0.6832, "step": 6753 }, { "epoch": 0.20700012259409095, "grad_norm": 1.296350195257637, "learning_rate": 9.200721494176418e-06, "loss": 0.7458, "step": 6754 }, { "epoch": 0.20703077111683216, "grad_norm": 1.3861185369440476, "learning_rate": 9.200452289080161e-06, "loss": 0.7296, "step": 6755 }, { "epoch": 0.20706141963957336, "grad_norm": 1.3375949552539956, "learning_rate": 9.200183042595775e-06, "loss": 0.7618, "step": 6756 }, { "epoch": 0.20709206816231457, "grad_norm": 1.3345635038707455, "learning_rate": 9.199913754725908e-06, "loss": 0.7455, "step": 6757 }, { "epoch": 0.20712271668505577, "grad_norm": 1.1390367717900955, "learning_rate": 9.199644425473217e-06, "loss": 0.6882, "step": 6758 }, { "epoch": 0.20715336520779698, "grad_norm": 1.3346952009788386, "learning_rate": 9.199375054840356e-06, "loss": 0.6625, "step": 6759 }, { "epoch": 0.20718401373053819, "grad_norm": 1.3640563935500278, "learning_rate": 9.199105642829977e-06, "loss": 0.7482, "step": 6760 }, { "epoch": 0.2072146622532794, "grad_norm": 1.248033075434793, "learning_rate": 9.198836189444735e-06, "loss": 0.6509, "step": 6761 }, { "epoch": 0.2072453107760206, "grad_norm": 1.3786235299991265, "learning_rate": 9.198566694687288e-06, "loss": 0.6745, "step": 6762 }, { "epoch": 0.2072759592987618, "grad_norm": 1.433143131263637, "learning_rate": 9.198297158560288e-06, "loss": 0.6782, "step": 6763 }, { "epoch": 0.207306607821503, "grad_norm": 1.4311975774232666, "learning_rate": 9.19802758106639e-06, "loss": 0.7903, "step": 6764 }, { "epoch": 0.20733725634424421, "grad_norm": 1.1946157189732483, "learning_rate": 9.197757962208257e-06, "loss": 0.6184, "step": 6765 }, { "epoch": 0.20736790486698542, "grad_norm": 1.2599546041046696, "learning_rate": 9.197488301988535e-06, "loss": 0.726, "step": 6766 }, { "epoch": 0.20739855338972663, "grad_norm": 1.4440699907873193, "learning_rate": 9.197218600409889e-06, "loss": 0.7664, "step": 6767 }, { "epoch": 0.20742920191246783, "grad_norm": 1.2227749625051845, "learning_rate": 9.196948857474976e-06, "loss": 0.649, "step": 6768 }, { "epoch": 0.20745985043520904, "grad_norm": 1.313769270255491, "learning_rate": 9.19667907318645e-06, "loss": 0.7189, "step": 6769 }, { "epoch": 0.20749049895795021, "grad_norm": 1.3846479451804585, "learning_rate": 9.196409247546973e-06, "loss": 0.7271, "step": 6770 }, { "epoch": 0.20752114748069142, "grad_norm": 1.1276258512939976, "learning_rate": 9.196139380559201e-06, "loss": 0.6723, "step": 6771 }, { "epoch": 0.20755179600343263, "grad_norm": 0.5299067818476599, "learning_rate": 9.195869472225794e-06, "loss": 0.4593, "step": 6772 }, { "epoch": 0.20758244452617383, "grad_norm": 1.4891429494468715, "learning_rate": 9.195599522549412e-06, "loss": 0.759, "step": 6773 }, { "epoch": 0.20761309304891504, "grad_norm": 1.4141150136783003, "learning_rate": 9.195329531532715e-06, "loss": 0.7822, "step": 6774 }, { "epoch": 0.20764374157165624, "grad_norm": 1.3135871146145472, "learning_rate": 9.19505949917836e-06, "loss": 0.7862, "step": 6775 }, { "epoch": 0.20767439009439745, "grad_norm": 1.1691388063224968, "learning_rate": 9.194789425489012e-06, "loss": 0.583, "step": 6776 }, { "epoch": 0.20770503861713865, "grad_norm": 1.1103525331559556, "learning_rate": 9.194519310467332e-06, "loss": 0.65, "step": 6777 }, { "epoch": 0.20773568713987986, "grad_norm": 1.5541440123491652, "learning_rate": 9.194249154115978e-06, "loss": 0.8055, "step": 6778 }, { "epoch": 0.20776633566262107, "grad_norm": 1.3447272497177725, "learning_rate": 9.193978956437615e-06, "loss": 0.7055, "step": 6779 }, { "epoch": 0.20779698418536227, "grad_norm": 1.3112528615725212, "learning_rate": 9.193708717434904e-06, "loss": 0.6517, "step": 6780 }, { "epoch": 0.20782763270810348, "grad_norm": 1.2599587571476856, "learning_rate": 9.193438437110508e-06, "loss": 0.7068, "step": 6781 }, { "epoch": 0.20785828123084468, "grad_norm": 1.2784544211876518, "learning_rate": 9.19316811546709e-06, "loss": 0.7473, "step": 6782 }, { "epoch": 0.2078889297535859, "grad_norm": 1.5084086723231809, "learning_rate": 9.192897752507314e-06, "loss": 0.7222, "step": 6783 }, { "epoch": 0.2079195782763271, "grad_norm": 1.404369886651973, "learning_rate": 9.192627348233842e-06, "loss": 0.7746, "step": 6784 }, { "epoch": 0.20795022679906827, "grad_norm": 1.2965862822579706, "learning_rate": 9.192356902649342e-06, "loss": 0.6883, "step": 6785 }, { "epoch": 0.20798087532180948, "grad_norm": 1.215150176809173, "learning_rate": 9.192086415756476e-06, "loss": 0.7401, "step": 6786 }, { "epoch": 0.20801152384455068, "grad_norm": 1.3827729141895966, "learning_rate": 9.19181588755791e-06, "loss": 0.577, "step": 6787 }, { "epoch": 0.2080421723672919, "grad_norm": 1.2190563858358672, "learning_rate": 9.19154531805631e-06, "loss": 0.7287, "step": 6788 }, { "epoch": 0.2080728208900331, "grad_norm": 1.2628702791568058, "learning_rate": 9.191274707254342e-06, "loss": 0.6119, "step": 6789 }, { "epoch": 0.2081034694127743, "grad_norm": 0.6794649208327411, "learning_rate": 9.19100405515467e-06, "loss": 0.4354, "step": 6790 }, { "epoch": 0.2081341179355155, "grad_norm": 1.355953172090038, "learning_rate": 9.190733361759964e-06, "loss": 0.8162, "step": 6791 }, { "epoch": 0.2081647664582567, "grad_norm": 1.316365312995831, "learning_rate": 9.190462627072891e-06, "loss": 0.6579, "step": 6792 }, { "epoch": 0.20819541498099792, "grad_norm": 1.1443865755612577, "learning_rate": 9.190191851096116e-06, "loss": 0.6146, "step": 6793 }, { "epoch": 0.20822606350373912, "grad_norm": 1.366636123746484, "learning_rate": 9.18992103383231e-06, "loss": 0.6671, "step": 6794 }, { "epoch": 0.20825671202648033, "grad_norm": 1.4828219523853696, "learning_rate": 9.189650175284139e-06, "loss": 0.7411, "step": 6795 }, { "epoch": 0.20828736054922153, "grad_norm": 1.2216190070179398, "learning_rate": 9.18937927545427e-06, "loss": 0.6704, "step": 6796 }, { "epoch": 0.20831800907196274, "grad_norm": 1.2948664206607454, "learning_rate": 9.18910833434538e-06, "loss": 0.7635, "step": 6797 }, { "epoch": 0.20834865759470395, "grad_norm": 0.5507575378322367, "learning_rate": 9.188837351960132e-06, "loss": 0.4697, "step": 6798 }, { "epoch": 0.20837930611744515, "grad_norm": 1.1395600207126009, "learning_rate": 9.188566328301196e-06, "loss": 0.6396, "step": 6799 }, { "epoch": 0.20840995464018636, "grad_norm": 1.4750988786896235, "learning_rate": 9.188295263371247e-06, "loss": 0.7762, "step": 6800 }, { "epoch": 0.20844060316292753, "grad_norm": 0.4499379966413769, "learning_rate": 9.188024157172952e-06, "loss": 0.46, "step": 6801 }, { "epoch": 0.20847125168566874, "grad_norm": 1.4644777043324035, "learning_rate": 9.187753009708983e-06, "loss": 0.63, "step": 6802 }, { "epoch": 0.20850190020840995, "grad_norm": 1.375203610570652, "learning_rate": 9.187481820982011e-06, "loss": 0.7275, "step": 6803 }, { "epoch": 0.20853254873115115, "grad_norm": 1.1765091681232784, "learning_rate": 9.187210590994711e-06, "loss": 0.7777, "step": 6804 }, { "epoch": 0.20856319725389236, "grad_norm": 1.319034103613196, "learning_rate": 9.186939319749756e-06, "loss": 0.7782, "step": 6805 }, { "epoch": 0.20859384577663356, "grad_norm": 1.3189923130741732, "learning_rate": 9.186668007249813e-06, "loss": 0.7557, "step": 6806 }, { "epoch": 0.20862449429937477, "grad_norm": 2.8180663593429975, "learning_rate": 9.18639665349756e-06, "loss": 0.6723, "step": 6807 }, { "epoch": 0.20865514282211597, "grad_norm": 1.331739182446984, "learning_rate": 9.186125258495668e-06, "loss": 0.7046, "step": 6808 }, { "epoch": 0.20868579134485718, "grad_norm": 0.517534109085744, "learning_rate": 9.185853822246814e-06, "loss": 0.4403, "step": 6809 }, { "epoch": 0.20871643986759839, "grad_norm": 0.5281909540347844, "learning_rate": 9.185582344753673e-06, "loss": 0.4447, "step": 6810 }, { "epoch": 0.2087470883903396, "grad_norm": 1.3982776006203508, "learning_rate": 9.185310826018916e-06, "loss": 0.7924, "step": 6811 }, { "epoch": 0.2087777369130808, "grad_norm": 1.3684121192202514, "learning_rate": 9.185039266045221e-06, "loss": 0.6386, "step": 6812 }, { "epoch": 0.208808385435822, "grad_norm": 1.1642568630740835, "learning_rate": 9.184767664835264e-06, "loss": 0.699, "step": 6813 }, { "epoch": 0.2088390339585632, "grad_norm": 1.2080411151732953, "learning_rate": 9.18449602239172e-06, "loss": 0.665, "step": 6814 }, { "epoch": 0.20886968248130441, "grad_norm": 1.3433992883327743, "learning_rate": 9.184224338717268e-06, "loss": 0.7523, "step": 6815 }, { "epoch": 0.2089003310040456, "grad_norm": 1.2856238225983716, "learning_rate": 9.18395261381458e-06, "loss": 0.6069, "step": 6816 }, { "epoch": 0.2089309795267868, "grad_norm": 1.2831198329727946, "learning_rate": 9.183680847686338e-06, "loss": 0.7822, "step": 6817 }, { "epoch": 0.208961628049528, "grad_norm": 1.364038068830077, "learning_rate": 9.183409040335218e-06, "loss": 0.7364, "step": 6818 }, { "epoch": 0.2089922765722692, "grad_norm": 1.1999743456358538, "learning_rate": 9.183137191763898e-06, "loss": 0.628, "step": 6819 }, { "epoch": 0.20902292509501041, "grad_norm": 1.2222979709787063, "learning_rate": 9.182865301975056e-06, "loss": 0.7203, "step": 6820 }, { "epoch": 0.20905357361775162, "grad_norm": 1.4146555496828586, "learning_rate": 9.182593370971373e-06, "loss": 0.7585, "step": 6821 }, { "epoch": 0.20908422214049283, "grad_norm": 1.5485895907864755, "learning_rate": 9.18232139875553e-06, "loss": 0.771, "step": 6822 }, { "epoch": 0.20911487066323403, "grad_norm": 1.2962217734703845, "learning_rate": 9.1820493853302e-06, "loss": 0.6192, "step": 6823 }, { "epoch": 0.20914551918597524, "grad_norm": 1.2097370753232377, "learning_rate": 9.181777330698069e-06, "loss": 0.7029, "step": 6824 }, { "epoch": 0.20917616770871644, "grad_norm": 1.3784660715950359, "learning_rate": 9.181505234861816e-06, "loss": 0.7305, "step": 6825 }, { "epoch": 0.20920681623145765, "grad_norm": 1.2381464176608676, "learning_rate": 9.181233097824123e-06, "loss": 0.6391, "step": 6826 }, { "epoch": 0.20923746475419885, "grad_norm": 1.2735565423992692, "learning_rate": 9.18096091958767e-06, "loss": 0.7398, "step": 6827 }, { "epoch": 0.20926811327694006, "grad_norm": 1.51534917290032, "learning_rate": 9.180688700155138e-06, "loss": 0.7068, "step": 6828 }, { "epoch": 0.20929876179968127, "grad_norm": 1.280462864911339, "learning_rate": 9.180416439529211e-06, "loss": 0.7651, "step": 6829 }, { "epoch": 0.20932941032242247, "grad_norm": 1.3085342920241434, "learning_rate": 9.18014413771257e-06, "loss": 0.7066, "step": 6830 }, { "epoch": 0.20936005884516368, "grad_norm": 1.353479855222095, "learning_rate": 9.179871794707903e-06, "loss": 0.8238, "step": 6831 }, { "epoch": 0.20939070736790485, "grad_norm": 1.3645953432973839, "learning_rate": 9.179599410517887e-06, "loss": 0.7263, "step": 6832 }, { "epoch": 0.20942135589064606, "grad_norm": 1.4252556623746082, "learning_rate": 9.17932698514521e-06, "loss": 0.782, "step": 6833 }, { "epoch": 0.20945200441338727, "grad_norm": 1.3675358778076894, "learning_rate": 9.179054518592553e-06, "loss": 0.7517, "step": 6834 }, { "epoch": 0.20948265293612847, "grad_norm": 1.2296534041161407, "learning_rate": 9.178782010862603e-06, "loss": 0.7305, "step": 6835 }, { "epoch": 0.20951330145886968, "grad_norm": 1.3659928964579189, "learning_rate": 9.178509461958046e-06, "loss": 0.7534, "step": 6836 }, { "epoch": 0.20954394998161088, "grad_norm": 1.2840965135271911, "learning_rate": 9.178236871881565e-06, "loss": 0.7706, "step": 6837 }, { "epoch": 0.2095745985043521, "grad_norm": 1.2850003351157164, "learning_rate": 9.177964240635849e-06, "loss": 0.6915, "step": 6838 }, { "epoch": 0.2096052470270933, "grad_norm": 1.2867870840808344, "learning_rate": 9.17769156822358e-06, "loss": 0.684, "step": 6839 }, { "epoch": 0.2096358955498345, "grad_norm": 1.3447298212988255, "learning_rate": 9.177418854647447e-06, "loss": 0.6698, "step": 6840 }, { "epoch": 0.2096665440725757, "grad_norm": 0.598583673680917, "learning_rate": 9.17714609991014e-06, "loss": 0.4568, "step": 6841 }, { "epoch": 0.2096971925953169, "grad_norm": 0.5720028297537493, "learning_rate": 9.17687330401434e-06, "loss": 0.4489, "step": 6842 }, { "epoch": 0.20972784111805812, "grad_norm": 1.3271046031992633, "learning_rate": 9.176600466962741e-06, "loss": 0.7647, "step": 6843 }, { "epoch": 0.20975848964079932, "grad_norm": 1.3634518271700489, "learning_rate": 9.176327588758029e-06, "loss": 0.8055, "step": 6844 }, { "epoch": 0.20978913816354053, "grad_norm": 1.256919276484129, "learning_rate": 9.176054669402892e-06, "loss": 0.6729, "step": 6845 }, { "epoch": 0.20981978668628173, "grad_norm": 1.2756518811404747, "learning_rate": 9.17578170890002e-06, "loss": 0.812, "step": 6846 }, { "epoch": 0.2098504352090229, "grad_norm": 1.2788906198479564, "learning_rate": 9.175508707252102e-06, "loss": 0.7938, "step": 6847 }, { "epoch": 0.20988108373176412, "grad_norm": 1.3821170864389596, "learning_rate": 9.175235664461828e-06, "loss": 0.7204, "step": 6848 }, { "epoch": 0.20991173225450532, "grad_norm": 1.4554835795119505, "learning_rate": 9.17496258053189e-06, "loss": 0.763, "step": 6849 }, { "epoch": 0.20994238077724653, "grad_norm": 1.315329103656932, "learning_rate": 9.174689455464978e-06, "loss": 0.7898, "step": 6850 }, { "epoch": 0.20997302929998773, "grad_norm": 1.478896181400952, "learning_rate": 9.174416289263781e-06, "loss": 0.7847, "step": 6851 }, { "epoch": 0.21000367782272894, "grad_norm": 1.5157293145908854, "learning_rate": 9.174143081930993e-06, "loss": 0.6958, "step": 6852 }, { "epoch": 0.21003432634547015, "grad_norm": 1.353858716255957, "learning_rate": 9.173869833469304e-06, "loss": 0.6253, "step": 6853 }, { "epoch": 0.21006497486821135, "grad_norm": 1.2836386488638658, "learning_rate": 9.17359654388141e-06, "loss": 0.6906, "step": 6854 }, { "epoch": 0.21009562339095256, "grad_norm": 1.3127926129222416, "learning_rate": 9.17332321317e-06, "loss": 0.7216, "step": 6855 }, { "epoch": 0.21012627191369376, "grad_norm": 1.3172371057467804, "learning_rate": 9.173049841337768e-06, "loss": 0.7222, "step": 6856 }, { "epoch": 0.21015692043643497, "grad_norm": 1.4218860567907485, "learning_rate": 9.172776428387408e-06, "loss": 0.7424, "step": 6857 }, { "epoch": 0.21018756895917617, "grad_norm": 1.2155107782812453, "learning_rate": 9.172502974321616e-06, "loss": 0.7252, "step": 6858 }, { "epoch": 0.21021821748191738, "grad_norm": 1.0032247586084013, "learning_rate": 9.172229479143085e-06, "loss": 0.4704, "step": 6859 }, { "epoch": 0.21024886600465859, "grad_norm": 1.441963564271323, "learning_rate": 9.171955942854506e-06, "loss": 0.7417, "step": 6860 }, { "epoch": 0.2102795145273998, "grad_norm": 1.4036855397158698, "learning_rate": 9.17168236545858e-06, "loss": 0.7386, "step": 6861 }, { "epoch": 0.210310163050141, "grad_norm": 1.2300880796843872, "learning_rate": 9.171408746958e-06, "loss": 0.6669, "step": 6862 }, { "epoch": 0.21034081157288217, "grad_norm": 1.304350138681109, "learning_rate": 9.171135087355463e-06, "loss": 0.7731, "step": 6863 }, { "epoch": 0.21037146009562338, "grad_norm": 1.3512843332399564, "learning_rate": 9.170861386653665e-06, "loss": 0.7436, "step": 6864 }, { "epoch": 0.21040210861836459, "grad_norm": 1.3878756375876085, "learning_rate": 9.1705876448553e-06, "loss": 0.7094, "step": 6865 }, { "epoch": 0.2104327571411058, "grad_norm": 0.6666684700538998, "learning_rate": 9.17031386196307e-06, "loss": 0.4691, "step": 6866 }, { "epoch": 0.210463405663847, "grad_norm": 1.4013281916907947, "learning_rate": 9.170040037979671e-06, "loss": 0.7297, "step": 6867 }, { "epoch": 0.2104940541865882, "grad_norm": 1.263906973245361, "learning_rate": 9.169766172907799e-06, "loss": 0.7538, "step": 6868 }, { "epoch": 0.2105247027093294, "grad_norm": 1.5530306497213644, "learning_rate": 9.169492266750154e-06, "loss": 0.6772, "step": 6869 }, { "epoch": 0.21055535123207061, "grad_norm": 1.388260635773284, "learning_rate": 9.169218319509436e-06, "loss": 0.7107, "step": 6870 }, { "epoch": 0.21058599975481182, "grad_norm": 0.5751654708172502, "learning_rate": 9.168944331188342e-06, "loss": 0.4785, "step": 6871 }, { "epoch": 0.21061664827755303, "grad_norm": 1.3835952507094607, "learning_rate": 9.168670301789574e-06, "loss": 0.7699, "step": 6872 }, { "epoch": 0.21064729680029423, "grad_norm": 1.5241111551104398, "learning_rate": 9.16839623131583e-06, "loss": 0.7322, "step": 6873 }, { "epoch": 0.21067794532303544, "grad_norm": 1.2563581039314002, "learning_rate": 9.168122119769813e-06, "loss": 0.8129, "step": 6874 }, { "epoch": 0.21070859384577664, "grad_norm": 1.4207475341086548, "learning_rate": 9.167847967154219e-06, "loss": 0.6854, "step": 6875 }, { "epoch": 0.21073924236851785, "grad_norm": 1.1812725283820233, "learning_rate": 9.167573773471756e-06, "loss": 0.618, "step": 6876 }, { "epoch": 0.21076989089125905, "grad_norm": 1.2332941682787448, "learning_rate": 9.167299538725121e-06, "loss": 0.6706, "step": 6877 }, { "epoch": 0.21080053941400023, "grad_norm": 1.2300223870125409, "learning_rate": 9.167025262917018e-06, "loss": 0.679, "step": 6878 }, { "epoch": 0.21083118793674144, "grad_norm": 1.117809460646047, "learning_rate": 9.166750946050147e-06, "loss": 0.6672, "step": 6879 }, { "epoch": 0.21086183645948264, "grad_norm": 1.3176009036172482, "learning_rate": 9.166476588127215e-06, "loss": 0.7162, "step": 6880 }, { "epoch": 0.21089248498222385, "grad_norm": 1.235038951184222, "learning_rate": 9.166202189150922e-06, "loss": 0.7412, "step": 6881 }, { "epoch": 0.21092313350496505, "grad_norm": 1.3114514479999948, "learning_rate": 9.165927749123972e-06, "loss": 0.8142, "step": 6882 }, { "epoch": 0.21095378202770626, "grad_norm": 1.2073368854993167, "learning_rate": 9.165653268049072e-06, "loss": 0.6594, "step": 6883 }, { "epoch": 0.21098443055044747, "grad_norm": 1.2032229845937468, "learning_rate": 9.165378745928923e-06, "loss": 0.7036, "step": 6884 }, { "epoch": 0.21101507907318867, "grad_norm": 1.361174370602795, "learning_rate": 9.165104182766233e-06, "loss": 0.7021, "step": 6885 }, { "epoch": 0.21104572759592988, "grad_norm": 1.223894326710821, "learning_rate": 9.164829578563705e-06, "loss": 0.6323, "step": 6886 }, { "epoch": 0.21107637611867108, "grad_norm": 0.7246992187990589, "learning_rate": 9.164554933324045e-06, "loss": 0.4618, "step": 6887 }, { "epoch": 0.2111070246414123, "grad_norm": 1.1546737524715935, "learning_rate": 9.164280247049961e-06, "loss": 0.6757, "step": 6888 }, { "epoch": 0.2111376731641535, "grad_norm": 1.255329957040297, "learning_rate": 9.164005519744157e-06, "loss": 0.6047, "step": 6889 }, { "epoch": 0.2111683216868947, "grad_norm": 0.4816122144390692, "learning_rate": 9.163730751409342e-06, "loss": 0.4368, "step": 6890 }, { "epoch": 0.2111989702096359, "grad_norm": 1.238342161570482, "learning_rate": 9.16345594204822e-06, "loss": 0.7801, "step": 6891 }, { "epoch": 0.2112296187323771, "grad_norm": 1.374439289692506, "learning_rate": 9.163181091663504e-06, "loss": 0.7492, "step": 6892 }, { "epoch": 0.21126026725511832, "grad_norm": 1.3382804094970944, "learning_rate": 9.162906200257899e-06, "loss": 0.6758, "step": 6893 }, { "epoch": 0.2112909157778595, "grad_norm": 0.5963269597208709, "learning_rate": 9.162631267834114e-06, "loss": 0.444, "step": 6894 }, { "epoch": 0.2113215643006007, "grad_norm": 1.2489755265901545, "learning_rate": 9.162356294394858e-06, "loss": 0.754, "step": 6895 }, { "epoch": 0.2113522128233419, "grad_norm": 1.28153824725487, "learning_rate": 9.16208127994284e-06, "loss": 0.6874, "step": 6896 }, { "epoch": 0.2113828613460831, "grad_norm": 1.341401312386191, "learning_rate": 9.161806224480772e-06, "loss": 0.66, "step": 6897 }, { "epoch": 0.21141350986882432, "grad_norm": 1.4617550914531552, "learning_rate": 9.161531128011361e-06, "loss": 0.7164, "step": 6898 }, { "epoch": 0.21144415839156552, "grad_norm": 1.3396510689073602, "learning_rate": 9.161255990537318e-06, "loss": 0.7267, "step": 6899 }, { "epoch": 0.21147480691430673, "grad_norm": 1.4009885531068498, "learning_rate": 9.160980812061357e-06, "loss": 0.7526, "step": 6900 }, { "epoch": 0.21150545543704793, "grad_norm": 1.3446693501964295, "learning_rate": 9.160705592586187e-06, "loss": 0.7266, "step": 6901 }, { "epoch": 0.21153610395978914, "grad_norm": 1.261091090919488, "learning_rate": 9.16043033211452e-06, "loss": 0.6766, "step": 6902 }, { "epoch": 0.21156675248253035, "grad_norm": 1.2035755218182662, "learning_rate": 9.16015503064907e-06, "loss": 0.6812, "step": 6903 }, { "epoch": 0.21159740100527155, "grad_norm": 1.3782192655144805, "learning_rate": 9.159879688192546e-06, "loss": 0.7771, "step": 6904 }, { "epoch": 0.21162804952801276, "grad_norm": 0.5539281162223745, "learning_rate": 9.159604304747662e-06, "loss": 0.4664, "step": 6905 }, { "epoch": 0.21165869805075396, "grad_norm": 1.3096584823196884, "learning_rate": 9.159328880317136e-06, "loss": 0.65, "step": 6906 }, { "epoch": 0.21168934657349517, "grad_norm": 0.4742971976558916, "learning_rate": 9.159053414903676e-06, "loss": 0.4703, "step": 6907 }, { "epoch": 0.21171999509623637, "grad_norm": 1.3085346267805185, "learning_rate": 9.158777908509999e-06, "loss": 0.6922, "step": 6908 }, { "epoch": 0.21175064361897755, "grad_norm": 1.3211536982347827, "learning_rate": 9.158502361138819e-06, "loss": 0.7052, "step": 6909 }, { "epoch": 0.21178129214171876, "grad_norm": 0.46142267875606485, "learning_rate": 9.158226772792852e-06, "loss": 0.4471, "step": 6910 }, { "epoch": 0.21181194066445996, "grad_norm": 1.4371799463180746, "learning_rate": 9.15795114347481e-06, "loss": 0.7147, "step": 6911 }, { "epoch": 0.21184258918720117, "grad_norm": 1.2129947120763525, "learning_rate": 9.157675473187414e-06, "loss": 0.7157, "step": 6912 }, { "epoch": 0.21187323770994237, "grad_norm": 1.2121112344975713, "learning_rate": 9.157399761933377e-06, "loss": 0.7171, "step": 6913 }, { "epoch": 0.21190388623268358, "grad_norm": 1.217645625043864, "learning_rate": 9.157124009715417e-06, "loss": 0.7466, "step": 6914 }, { "epoch": 0.21193453475542479, "grad_norm": 1.2391949069567603, "learning_rate": 9.156848216536251e-06, "loss": 0.6508, "step": 6915 }, { "epoch": 0.211965183278166, "grad_norm": 1.3264599702175133, "learning_rate": 9.156572382398594e-06, "loss": 0.6974, "step": 6916 }, { "epoch": 0.2119958318009072, "grad_norm": 1.2918274149571307, "learning_rate": 9.156296507305167e-06, "loss": 0.6488, "step": 6917 }, { "epoch": 0.2120264803236484, "grad_norm": 1.338176211942694, "learning_rate": 9.156020591258687e-06, "loss": 0.7236, "step": 6918 }, { "epoch": 0.2120571288463896, "grad_norm": 0.5035523673317379, "learning_rate": 9.155744634261874e-06, "loss": 0.4466, "step": 6919 }, { "epoch": 0.21208777736913081, "grad_norm": 1.3247230342235312, "learning_rate": 9.155468636317443e-06, "loss": 0.827, "step": 6920 }, { "epoch": 0.21211842589187202, "grad_norm": 1.2221560800316018, "learning_rate": 9.15519259742812e-06, "loss": 0.7021, "step": 6921 }, { "epoch": 0.21214907441461323, "grad_norm": 1.391624409646565, "learning_rate": 9.15491651759662e-06, "loss": 0.7526, "step": 6922 }, { "epoch": 0.21217972293735443, "grad_norm": 1.304060818784575, "learning_rate": 9.154640396825662e-06, "loss": 0.7021, "step": 6923 }, { "epoch": 0.21221037146009564, "grad_norm": 0.48405615137050884, "learning_rate": 9.15436423511797e-06, "loss": 0.4604, "step": 6924 }, { "epoch": 0.21224101998283681, "grad_norm": 1.1776064875151597, "learning_rate": 9.154088032476266e-06, "loss": 0.743, "step": 6925 }, { "epoch": 0.21227166850557802, "grad_norm": 1.3559451011402373, "learning_rate": 9.153811788903269e-06, "loss": 0.6711, "step": 6926 }, { "epoch": 0.21230231702831923, "grad_norm": 1.6245978550431825, "learning_rate": 9.1535355044017e-06, "loss": 0.7282, "step": 6927 }, { "epoch": 0.21233296555106043, "grad_norm": 1.146039654955304, "learning_rate": 9.153259178974286e-06, "loss": 0.6566, "step": 6928 }, { "epoch": 0.21236361407380164, "grad_norm": 1.1536700283320795, "learning_rate": 9.152982812623746e-06, "loss": 0.6542, "step": 6929 }, { "epoch": 0.21239426259654284, "grad_norm": 1.2868163756606517, "learning_rate": 9.152706405352802e-06, "loss": 0.7279, "step": 6930 }, { "epoch": 0.21242491111928405, "grad_norm": 1.2106718956267495, "learning_rate": 9.15242995716418e-06, "loss": 0.6595, "step": 6931 }, { "epoch": 0.21245555964202525, "grad_norm": 0.48850818670627033, "learning_rate": 9.152153468060603e-06, "loss": 0.4635, "step": 6932 }, { "epoch": 0.21248620816476646, "grad_norm": 1.2129794360324584, "learning_rate": 9.151876938044795e-06, "loss": 0.6567, "step": 6933 }, { "epoch": 0.21251685668750767, "grad_norm": 1.3223147845039656, "learning_rate": 9.151600367119482e-06, "loss": 0.7434, "step": 6934 }, { "epoch": 0.21254750521024887, "grad_norm": 1.501182732768591, "learning_rate": 9.15132375528739e-06, "loss": 0.7097, "step": 6935 }, { "epoch": 0.21257815373299008, "grad_norm": 0.452828895607825, "learning_rate": 9.15104710255124e-06, "loss": 0.4408, "step": 6936 }, { "epoch": 0.21260880225573128, "grad_norm": 1.268794506518745, "learning_rate": 9.150770408913763e-06, "loss": 0.7019, "step": 6937 }, { "epoch": 0.2126394507784725, "grad_norm": 1.25309169566666, "learning_rate": 9.15049367437768e-06, "loss": 0.7135, "step": 6938 }, { "epoch": 0.2126700993012137, "grad_norm": 1.2179197764938932, "learning_rate": 9.150216898945724e-06, "loss": 0.7389, "step": 6939 }, { "epoch": 0.21270074782395487, "grad_norm": 1.335004502463375, "learning_rate": 9.149940082620618e-06, "loss": 0.7537, "step": 6940 }, { "epoch": 0.21273139634669608, "grad_norm": 1.3649312328154464, "learning_rate": 9.149663225405092e-06, "loss": 0.7412, "step": 6941 }, { "epoch": 0.21276204486943728, "grad_norm": 1.3077716081496862, "learning_rate": 9.14938632730187e-06, "loss": 0.68, "step": 6942 }, { "epoch": 0.2127926933921785, "grad_norm": 1.0894787436769695, "learning_rate": 9.149109388313684e-06, "loss": 0.711, "step": 6943 }, { "epoch": 0.2128233419149197, "grad_norm": 1.2804060557436976, "learning_rate": 9.148832408443262e-06, "loss": 0.7258, "step": 6944 }, { "epoch": 0.2128539904376609, "grad_norm": 0.5307130582307947, "learning_rate": 9.148555387693332e-06, "loss": 0.4531, "step": 6945 }, { "epoch": 0.2128846389604021, "grad_norm": 1.3406335656536617, "learning_rate": 9.148278326066624e-06, "loss": 0.6595, "step": 6946 }, { "epoch": 0.2129152874831433, "grad_norm": 1.4396122827971658, "learning_rate": 9.14800122356587e-06, "loss": 0.741, "step": 6947 }, { "epoch": 0.21294593600588452, "grad_norm": 0.49414902674094774, "learning_rate": 9.147724080193798e-06, "loss": 0.4555, "step": 6948 }, { "epoch": 0.21297658452862572, "grad_norm": 1.354434489116388, "learning_rate": 9.147446895953138e-06, "loss": 0.6028, "step": 6949 }, { "epoch": 0.21300723305136693, "grad_norm": 0.4512010798675122, "learning_rate": 9.147169670846623e-06, "loss": 0.464, "step": 6950 }, { "epoch": 0.21303788157410813, "grad_norm": 1.1916411392712365, "learning_rate": 9.146892404876985e-06, "loss": 0.6919, "step": 6951 }, { "epoch": 0.21306853009684934, "grad_norm": 0.4593384015685346, "learning_rate": 9.146615098046953e-06, "loss": 0.4325, "step": 6952 }, { "epoch": 0.21309917861959055, "grad_norm": 1.3253223850468379, "learning_rate": 9.146337750359265e-06, "loss": 0.7255, "step": 6953 }, { "epoch": 0.21312982714233175, "grad_norm": 1.3855974220326577, "learning_rate": 9.146060361816648e-06, "loss": 0.7509, "step": 6954 }, { "epoch": 0.21316047566507296, "grad_norm": 1.3397847079811465, "learning_rate": 9.145782932421838e-06, "loss": 0.7632, "step": 6955 }, { "epoch": 0.21319112418781413, "grad_norm": 1.4234076089135237, "learning_rate": 9.145505462177569e-06, "loss": 0.7828, "step": 6956 }, { "epoch": 0.21322177271055534, "grad_norm": 1.4971095939291834, "learning_rate": 9.145227951086573e-06, "loss": 0.6762, "step": 6957 }, { "epoch": 0.21325242123329655, "grad_norm": 1.307226837428988, "learning_rate": 9.144950399151584e-06, "loss": 0.8357, "step": 6958 }, { "epoch": 0.21328306975603775, "grad_norm": 1.2868809709203766, "learning_rate": 9.144672806375341e-06, "loss": 0.719, "step": 6959 }, { "epoch": 0.21331371827877896, "grad_norm": 1.3813667216359358, "learning_rate": 9.144395172760574e-06, "loss": 0.6879, "step": 6960 }, { "epoch": 0.21334436680152016, "grad_norm": 1.3434082391145543, "learning_rate": 9.144117498310023e-06, "loss": 0.7832, "step": 6961 }, { "epoch": 0.21337501532426137, "grad_norm": 1.2877588704584466, "learning_rate": 9.14383978302642e-06, "loss": 0.6748, "step": 6962 }, { "epoch": 0.21340566384700257, "grad_norm": 1.4665149872704661, "learning_rate": 9.143562026912504e-06, "loss": 0.7677, "step": 6963 }, { "epoch": 0.21343631236974378, "grad_norm": 1.2258837899525572, "learning_rate": 9.143284229971011e-06, "loss": 0.6982, "step": 6964 }, { "epoch": 0.21346696089248499, "grad_norm": 1.4352271821880704, "learning_rate": 9.143006392204678e-06, "loss": 0.6852, "step": 6965 }, { "epoch": 0.2134976094152262, "grad_norm": 1.2753551346845493, "learning_rate": 9.142728513616245e-06, "loss": 0.6886, "step": 6966 }, { "epoch": 0.2135282579379674, "grad_norm": 1.3317402708282815, "learning_rate": 9.142450594208447e-06, "loss": 0.7717, "step": 6967 }, { "epoch": 0.2135589064607086, "grad_norm": 1.2951358035498945, "learning_rate": 9.142172633984024e-06, "loss": 0.6869, "step": 6968 }, { "epoch": 0.2135895549834498, "grad_norm": 1.398074577113066, "learning_rate": 9.141894632945712e-06, "loss": 0.7446, "step": 6969 }, { "epoch": 0.21362020350619101, "grad_norm": 1.1526228826346367, "learning_rate": 9.141616591096255e-06, "loss": 0.7071, "step": 6970 }, { "epoch": 0.2136508520289322, "grad_norm": 1.2133824091161503, "learning_rate": 9.141338508438388e-06, "loss": 0.6297, "step": 6971 }, { "epoch": 0.2136815005516734, "grad_norm": 1.4691275169519096, "learning_rate": 9.141060384974854e-06, "loss": 0.7024, "step": 6972 }, { "epoch": 0.2137121490744146, "grad_norm": 1.256549750030952, "learning_rate": 9.140782220708393e-06, "loss": 0.7337, "step": 6973 }, { "epoch": 0.2137427975971558, "grad_norm": 0.5739209585252505, "learning_rate": 9.140504015641745e-06, "loss": 0.4584, "step": 6974 }, { "epoch": 0.21377344611989701, "grad_norm": 1.3627824573296607, "learning_rate": 9.140225769777652e-06, "loss": 0.6397, "step": 6975 }, { "epoch": 0.21380409464263822, "grad_norm": 1.256160012275789, "learning_rate": 9.139947483118852e-06, "loss": 0.7213, "step": 6976 }, { "epoch": 0.21383474316537943, "grad_norm": 1.2516237961086205, "learning_rate": 9.139669155668095e-06, "loss": 0.7342, "step": 6977 }, { "epoch": 0.21386539168812063, "grad_norm": 1.3382410956806603, "learning_rate": 9.139390787428115e-06, "loss": 0.683, "step": 6978 }, { "epoch": 0.21389604021086184, "grad_norm": 1.245208899772656, "learning_rate": 9.139112378401659e-06, "loss": 0.6814, "step": 6979 }, { "epoch": 0.21392668873360304, "grad_norm": 1.2156334716406505, "learning_rate": 9.138833928591471e-06, "loss": 0.5857, "step": 6980 }, { "epoch": 0.21395733725634425, "grad_norm": 0.49803622827618543, "learning_rate": 9.138555438000291e-06, "loss": 0.4402, "step": 6981 }, { "epoch": 0.21398798577908545, "grad_norm": 1.3406542919244935, "learning_rate": 9.138276906630868e-06, "loss": 0.7499, "step": 6982 }, { "epoch": 0.21401863430182666, "grad_norm": 1.3123806371867577, "learning_rate": 9.137998334485944e-06, "loss": 0.7121, "step": 6983 }, { "epoch": 0.21404928282456787, "grad_norm": 1.3190409635806628, "learning_rate": 9.137719721568263e-06, "loss": 0.7344, "step": 6984 }, { "epoch": 0.21407993134730907, "grad_norm": 1.3508957519994056, "learning_rate": 9.13744106788057e-06, "loss": 0.715, "step": 6985 }, { "epoch": 0.21411057987005028, "grad_norm": 1.4344172554425145, "learning_rate": 9.137162373425612e-06, "loss": 0.7405, "step": 6986 }, { "epoch": 0.21414122839279146, "grad_norm": 1.1714842171823494, "learning_rate": 9.136883638206135e-06, "loss": 0.689, "step": 6987 }, { "epoch": 0.21417187691553266, "grad_norm": 1.3680594770551442, "learning_rate": 9.136604862224886e-06, "loss": 0.7279, "step": 6988 }, { "epoch": 0.21420252543827387, "grad_norm": 1.40475329848304, "learning_rate": 9.136326045484607e-06, "loss": 0.8463, "step": 6989 }, { "epoch": 0.21423317396101507, "grad_norm": 0.48083239891151724, "learning_rate": 9.136047187988053e-06, "loss": 0.4676, "step": 6990 }, { "epoch": 0.21426382248375628, "grad_norm": 1.4417165282621935, "learning_rate": 9.135768289737966e-06, "loss": 0.7451, "step": 6991 }, { "epoch": 0.21429447100649748, "grad_norm": 1.1607294884299377, "learning_rate": 9.135489350737096e-06, "loss": 0.7685, "step": 6992 }, { "epoch": 0.2143251195292387, "grad_norm": 1.3670768210232713, "learning_rate": 9.135210370988192e-06, "loss": 0.7344, "step": 6993 }, { "epoch": 0.2143557680519799, "grad_norm": 1.2239803200608537, "learning_rate": 9.134931350494001e-06, "loss": 0.7541, "step": 6994 }, { "epoch": 0.2143864165747211, "grad_norm": 1.3838952017802135, "learning_rate": 9.134652289257275e-06, "loss": 0.6316, "step": 6995 }, { "epoch": 0.2144170650974623, "grad_norm": 1.2610380179180363, "learning_rate": 9.134373187280761e-06, "loss": 0.6365, "step": 6996 }, { "epoch": 0.2144477136202035, "grad_norm": 1.267087720252599, "learning_rate": 9.134094044567213e-06, "loss": 0.674, "step": 6997 }, { "epoch": 0.21447836214294472, "grad_norm": 0.47351332033969024, "learning_rate": 9.133814861119375e-06, "loss": 0.4519, "step": 6998 }, { "epoch": 0.21450901066568592, "grad_norm": 1.2358375965346882, "learning_rate": 9.133535636940003e-06, "loss": 0.7102, "step": 6999 }, { "epoch": 0.21453965918842713, "grad_norm": 1.3159922175431547, "learning_rate": 9.133256372031845e-06, "loss": 0.7048, "step": 7000 }, { "epoch": 0.21457030771116833, "grad_norm": 0.4530936266841544, "learning_rate": 9.132977066397657e-06, "loss": 0.441, "step": 7001 }, { "epoch": 0.2146009562339095, "grad_norm": 0.4768790526191424, "learning_rate": 9.132697720040187e-06, "loss": 0.4674, "step": 7002 }, { "epoch": 0.21463160475665072, "grad_norm": 1.4911783586290437, "learning_rate": 9.132418332962189e-06, "loss": 0.6496, "step": 7003 }, { "epoch": 0.21466225327939192, "grad_norm": 1.257350268586036, "learning_rate": 9.132138905166417e-06, "loss": 0.6681, "step": 7004 }, { "epoch": 0.21469290180213313, "grad_norm": 1.2097522729549326, "learning_rate": 9.131859436655621e-06, "loss": 0.6945, "step": 7005 }, { "epoch": 0.21472355032487433, "grad_norm": 1.1167984314521986, "learning_rate": 9.131579927432559e-06, "loss": 0.6467, "step": 7006 }, { "epoch": 0.21475419884761554, "grad_norm": 1.162260915253898, "learning_rate": 9.131300377499983e-06, "loss": 0.6158, "step": 7007 }, { "epoch": 0.21478484737035675, "grad_norm": 1.1511553429234191, "learning_rate": 9.131020786860644e-06, "loss": 0.6631, "step": 7008 }, { "epoch": 0.21481549589309795, "grad_norm": 1.3370305587203493, "learning_rate": 9.130741155517305e-06, "loss": 0.7255, "step": 7009 }, { "epoch": 0.21484614441583916, "grad_norm": 0.4782980542709091, "learning_rate": 9.130461483472712e-06, "loss": 0.4429, "step": 7010 }, { "epoch": 0.21487679293858036, "grad_norm": 1.2428640268139204, "learning_rate": 9.130181770729628e-06, "loss": 0.6298, "step": 7011 }, { "epoch": 0.21490744146132157, "grad_norm": 1.2861414451465538, "learning_rate": 9.129902017290806e-06, "loss": 0.7049, "step": 7012 }, { "epoch": 0.21493808998406277, "grad_norm": 1.4710046352479484, "learning_rate": 9.129622223159002e-06, "loss": 0.717, "step": 7013 }, { "epoch": 0.21496873850680398, "grad_norm": 1.2411238691540447, "learning_rate": 9.129342388336973e-06, "loss": 0.6978, "step": 7014 }, { "epoch": 0.21499938702954519, "grad_norm": 1.2836179471911044, "learning_rate": 9.129062512827478e-06, "loss": 0.6328, "step": 7015 }, { "epoch": 0.2150300355522864, "grad_norm": 1.4479006137260135, "learning_rate": 9.128782596633275e-06, "loss": 0.7002, "step": 7016 }, { "epoch": 0.2150606840750276, "grad_norm": 0.4568421858006322, "learning_rate": 9.128502639757117e-06, "loss": 0.4517, "step": 7017 }, { "epoch": 0.21509133259776878, "grad_norm": 1.1966309223952147, "learning_rate": 9.12822264220177e-06, "loss": 0.6668, "step": 7018 }, { "epoch": 0.21512198112050998, "grad_norm": 1.2652383166454428, "learning_rate": 9.127942603969987e-06, "loss": 0.7404, "step": 7019 }, { "epoch": 0.2151526296432512, "grad_norm": 1.1990298336140905, "learning_rate": 9.12766252506453e-06, "loss": 0.6925, "step": 7020 }, { "epoch": 0.2151832781659924, "grad_norm": 1.3720254719566092, "learning_rate": 9.127382405488156e-06, "loss": 0.7026, "step": 7021 }, { "epoch": 0.2152139266887336, "grad_norm": 1.3416742718209715, "learning_rate": 9.12710224524363e-06, "loss": 0.6556, "step": 7022 }, { "epoch": 0.2152445752114748, "grad_norm": 1.1446045329198575, "learning_rate": 9.126822044333707e-06, "loss": 0.6195, "step": 7023 }, { "epoch": 0.215275223734216, "grad_norm": 1.2086648954845614, "learning_rate": 9.126541802761153e-06, "loss": 0.643, "step": 7024 }, { "epoch": 0.21530587225695721, "grad_norm": 1.275281522909367, "learning_rate": 9.126261520528725e-06, "loss": 0.7529, "step": 7025 }, { "epoch": 0.21533652077969842, "grad_norm": 1.1712702233524546, "learning_rate": 9.125981197639187e-06, "loss": 0.7125, "step": 7026 }, { "epoch": 0.21536716930243963, "grad_norm": 1.2077064520114926, "learning_rate": 9.125700834095301e-06, "loss": 0.7947, "step": 7027 }, { "epoch": 0.21539781782518083, "grad_norm": 1.4097240915846208, "learning_rate": 9.12542042989983e-06, "loss": 0.745, "step": 7028 }, { "epoch": 0.21542846634792204, "grad_norm": 1.3913038305542131, "learning_rate": 9.125139985055533e-06, "loss": 0.636, "step": 7029 }, { "epoch": 0.21545911487066324, "grad_norm": 1.2453223588283837, "learning_rate": 9.124859499565178e-06, "loss": 0.712, "step": 7030 }, { "epoch": 0.21548976339340445, "grad_norm": 0.44178378091541204, "learning_rate": 9.124578973431527e-06, "loss": 0.4306, "step": 7031 }, { "epoch": 0.21552041191614565, "grad_norm": 1.2696222510245, "learning_rate": 9.124298406657345e-06, "loss": 0.7733, "step": 7032 }, { "epoch": 0.21555106043888683, "grad_norm": 1.1773622177118002, "learning_rate": 9.124017799245396e-06, "loss": 0.7276, "step": 7033 }, { "epoch": 0.21558170896162804, "grad_norm": 1.2512550034139, "learning_rate": 9.123737151198442e-06, "loss": 0.6387, "step": 7034 }, { "epoch": 0.21561235748436924, "grad_norm": 1.231780144426085, "learning_rate": 9.12345646251925e-06, "loss": 0.6037, "step": 7035 }, { "epoch": 0.21564300600711045, "grad_norm": 1.182197819936877, "learning_rate": 9.12317573321059e-06, "loss": 0.6731, "step": 7036 }, { "epoch": 0.21567365452985165, "grad_norm": 1.5119474229489114, "learning_rate": 9.122894963275221e-06, "loss": 0.8481, "step": 7037 }, { "epoch": 0.21570430305259286, "grad_norm": 1.3220269164777065, "learning_rate": 9.122614152715917e-06, "loss": 0.7239, "step": 7038 }, { "epoch": 0.21573495157533407, "grad_norm": 1.4322606311221247, "learning_rate": 9.122333301535438e-06, "loss": 0.7756, "step": 7039 }, { "epoch": 0.21576560009807527, "grad_norm": 0.5372344882560712, "learning_rate": 9.122052409736554e-06, "loss": 0.4487, "step": 7040 }, { "epoch": 0.21579624862081648, "grad_norm": 1.2829320287173442, "learning_rate": 9.121771477322035e-06, "loss": 0.6884, "step": 7041 }, { "epoch": 0.21582689714355768, "grad_norm": 1.3801897658932234, "learning_rate": 9.121490504294645e-06, "loss": 0.8119, "step": 7042 }, { "epoch": 0.2158575456662989, "grad_norm": 1.5610684951046507, "learning_rate": 9.121209490657156e-06, "loss": 0.7142, "step": 7043 }, { "epoch": 0.2158881941890401, "grad_norm": 1.4952009868391405, "learning_rate": 9.120928436412334e-06, "loss": 0.7211, "step": 7044 }, { "epoch": 0.2159188427117813, "grad_norm": 1.2192614917017017, "learning_rate": 9.120647341562952e-06, "loss": 0.6772, "step": 7045 }, { "epoch": 0.2159494912345225, "grad_norm": 0.45413174230366493, "learning_rate": 9.120366206111777e-06, "loss": 0.4381, "step": 7046 }, { "epoch": 0.2159801397572637, "grad_norm": 1.1305724775067751, "learning_rate": 9.120085030061577e-06, "loss": 0.615, "step": 7047 }, { "epoch": 0.21601078828000492, "grad_norm": 1.1449695354573075, "learning_rate": 9.119803813415126e-06, "loss": 0.6985, "step": 7048 }, { "epoch": 0.2160414368027461, "grad_norm": 1.236570554540205, "learning_rate": 9.119522556175196e-06, "loss": 0.7601, "step": 7049 }, { "epoch": 0.2160720853254873, "grad_norm": 0.4832856990383628, "learning_rate": 9.119241258344554e-06, "loss": 0.4788, "step": 7050 }, { "epoch": 0.2161027338482285, "grad_norm": 1.2279096594899301, "learning_rate": 9.118959919925977e-06, "loss": 0.6676, "step": 7051 }, { "epoch": 0.2161333823709697, "grad_norm": 1.3613132806955526, "learning_rate": 9.11867854092223e-06, "loss": 0.7826, "step": 7052 }, { "epoch": 0.21616403089371092, "grad_norm": 1.1389684737024262, "learning_rate": 9.118397121336092e-06, "loss": 0.662, "step": 7053 }, { "epoch": 0.21619467941645212, "grad_norm": 1.2926879463631051, "learning_rate": 9.118115661170333e-06, "loss": 0.6811, "step": 7054 }, { "epoch": 0.21622532793919333, "grad_norm": 0.4353647173572, "learning_rate": 9.117834160427726e-06, "loss": 0.4397, "step": 7055 }, { "epoch": 0.21625597646193453, "grad_norm": 1.3118815430886068, "learning_rate": 9.117552619111046e-06, "loss": 0.8665, "step": 7056 }, { "epoch": 0.21628662498467574, "grad_norm": 1.1076906008829261, "learning_rate": 9.117271037223065e-06, "loss": 0.6327, "step": 7057 }, { "epoch": 0.21631727350741695, "grad_norm": 1.274319638785689, "learning_rate": 9.116989414766561e-06, "loss": 0.6804, "step": 7058 }, { "epoch": 0.21634792203015815, "grad_norm": 1.2156371948294378, "learning_rate": 9.116707751744307e-06, "loss": 0.7464, "step": 7059 }, { "epoch": 0.21637857055289936, "grad_norm": 1.2703035250725738, "learning_rate": 9.116426048159077e-06, "loss": 0.6873, "step": 7060 }, { "epoch": 0.21640921907564056, "grad_norm": 1.1550763813858484, "learning_rate": 9.116144304013648e-06, "loss": 0.6533, "step": 7061 }, { "epoch": 0.21643986759838177, "grad_norm": 1.166067749276958, "learning_rate": 9.115862519310797e-06, "loss": 0.767, "step": 7062 }, { "epoch": 0.21647051612112297, "grad_norm": 1.3498682580723698, "learning_rate": 9.115580694053298e-06, "loss": 0.7317, "step": 7063 }, { "epoch": 0.21650116464386415, "grad_norm": 1.1072549080520244, "learning_rate": 9.11529882824393e-06, "loss": 0.7111, "step": 7064 }, { "epoch": 0.21653181316660536, "grad_norm": 1.3206156612162485, "learning_rate": 9.115016921885471e-06, "loss": 0.7172, "step": 7065 }, { "epoch": 0.21656246168934656, "grad_norm": 1.0830925604364146, "learning_rate": 9.114734974980697e-06, "loss": 0.6682, "step": 7066 }, { "epoch": 0.21659311021208777, "grad_norm": 1.2070815734908311, "learning_rate": 9.114452987532387e-06, "loss": 0.6821, "step": 7067 }, { "epoch": 0.21662375873482898, "grad_norm": 1.2616906926531875, "learning_rate": 9.114170959543317e-06, "loss": 0.6813, "step": 7068 }, { "epoch": 0.21665440725757018, "grad_norm": 0.5092976885647916, "learning_rate": 9.11388889101627e-06, "loss": 0.4484, "step": 7069 }, { "epoch": 0.2166850557803114, "grad_norm": 1.296522574347512, "learning_rate": 9.11360678195402e-06, "loss": 0.7606, "step": 7070 }, { "epoch": 0.2167157043030526, "grad_norm": 1.3694976191360522, "learning_rate": 9.113324632359352e-06, "loss": 0.6022, "step": 7071 }, { "epoch": 0.2167463528257938, "grad_norm": 1.3018140322201475, "learning_rate": 9.113042442235045e-06, "loss": 0.6903, "step": 7072 }, { "epoch": 0.216777001348535, "grad_norm": 1.2335205428990739, "learning_rate": 9.112760211583878e-06, "loss": 0.6087, "step": 7073 }, { "epoch": 0.2168076498712762, "grad_norm": 0.46020992072020894, "learning_rate": 9.112477940408631e-06, "loss": 0.4708, "step": 7074 }, { "epoch": 0.21683829839401741, "grad_norm": 1.212369142616715, "learning_rate": 9.112195628712086e-06, "loss": 0.7273, "step": 7075 }, { "epoch": 0.21686894691675862, "grad_norm": 1.1774292590554936, "learning_rate": 9.111913276497026e-06, "loss": 0.7757, "step": 7076 }, { "epoch": 0.21689959543949983, "grad_norm": 1.1735914973956225, "learning_rate": 9.111630883766233e-06, "loss": 0.6348, "step": 7077 }, { "epoch": 0.21693024396224103, "grad_norm": 1.2294176355558184, "learning_rate": 9.111348450522491e-06, "loss": 0.6292, "step": 7078 }, { "epoch": 0.21696089248498224, "grad_norm": 1.5175644907763948, "learning_rate": 9.111065976768578e-06, "loss": 0.7632, "step": 7079 }, { "epoch": 0.21699154100772342, "grad_norm": 1.3318868889088065, "learning_rate": 9.11078346250728e-06, "loss": 0.6845, "step": 7080 }, { "epoch": 0.21702218953046462, "grad_norm": 1.3613644051821443, "learning_rate": 9.110500907741383e-06, "loss": 0.6766, "step": 7081 }, { "epoch": 0.21705283805320583, "grad_norm": 0.4863715526724649, "learning_rate": 9.110218312473667e-06, "loss": 0.434, "step": 7082 }, { "epoch": 0.21708348657594703, "grad_norm": 1.4407117163138041, "learning_rate": 9.109935676706918e-06, "loss": 0.707, "step": 7083 }, { "epoch": 0.21711413509868824, "grad_norm": 1.3873848687934713, "learning_rate": 9.109653000443921e-06, "loss": 0.7528, "step": 7084 }, { "epoch": 0.21714478362142944, "grad_norm": 1.2163372905311682, "learning_rate": 9.109370283687462e-06, "loss": 0.6641, "step": 7085 }, { "epoch": 0.21717543214417065, "grad_norm": 1.4272069229307012, "learning_rate": 9.109087526440328e-06, "loss": 0.6263, "step": 7086 }, { "epoch": 0.21720608066691185, "grad_norm": 1.307804927220581, "learning_rate": 9.108804728705302e-06, "loss": 0.8182, "step": 7087 }, { "epoch": 0.21723672918965306, "grad_norm": 1.3036191766477592, "learning_rate": 9.108521890485172e-06, "loss": 0.7363, "step": 7088 }, { "epoch": 0.21726737771239427, "grad_norm": 1.1869159770818376, "learning_rate": 9.108239011782722e-06, "loss": 0.7475, "step": 7089 }, { "epoch": 0.21729802623513547, "grad_norm": 1.317101339153091, "learning_rate": 9.107956092600745e-06, "loss": 0.6307, "step": 7090 }, { "epoch": 0.21732867475787668, "grad_norm": 0.47164475674119727, "learning_rate": 9.107673132942025e-06, "loss": 0.4616, "step": 7091 }, { "epoch": 0.21735932328061788, "grad_norm": 0.4648988056242877, "learning_rate": 9.10739013280935e-06, "loss": 0.4788, "step": 7092 }, { "epoch": 0.2173899718033591, "grad_norm": 1.3800728993755043, "learning_rate": 9.10710709220551e-06, "loss": 0.8096, "step": 7093 }, { "epoch": 0.2174206203261003, "grad_norm": 0.4631531992605863, "learning_rate": 9.10682401113329e-06, "loss": 0.4632, "step": 7094 }, { "epoch": 0.21745126884884147, "grad_norm": 1.1810903378371584, "learning_rate": 9.106540889595485e-06, "loss": 0.6117, "step": 7095 }, { "epoch": 0.21748191737158268, "grad_norm": 1.3721285286449088, "learning_rate": 9.106257727594883e-06, "loss": 0.7402, "step": 7096 }, { "epoch": 0.21751256589432388, "grad_norm": 1.313373809010924, "learning_rate": 9.105974525134272e-06, "loss": 0.7252, "step": 7097 }, { "epoch": 0.2175432144170651, "grad_norm": 1.4360461555462036, "learning_rate": 9.105691282216442e-06, "loss": 0.7758, "step": 7098 }, { "epoch": 0.2175738629398063, "grad_norm": 1.261141297768062, "learning_rate": 9.105407998844186e-06, "loss": 0.7347, "step": 7099 }, { "epoch": 0.2176045114625475, "grad_norm": 1.1185853478714918, "learning_rate": 9.105124675020294e-06, "loss": 0.6729, "step": 7100 }, { "epoch": 0.2176351599852887, "grad_norm": 1.3281243762707955, "learning_rate": 9.104841310747559e-06, "loss": 0.7418, "step": 7101 }, { "epoch": 0.2176658085080299, "grad_norm": 0.4804470895461791, "learning_rate": 9.104557906028773e-06, "loss": 0.4495, "step": 7102 }, { "epoch": 0.21769645703077112, "grad_norm": 1.3522941083481619, "learning_rate": 9.104274460866726e-06, "loss": 0.6626, "step": 7103 }, { "epoch": 0.21772710555351232, "grad_norm": 0.5121244041161561, "learning_rate": 9.103990975264214e-06, "loss": 0.4738, "step": 7104 }, { "epoch": 0.21775775407625353, "grad_norm": 1.2320771194488351, "learning_rate": 9.103707449224028e-06, "loss": 0.6435, "step": 7105 }, { "epoch": 0.21778840259899473, "grad_norm": 1.2596214730924304, "learning_rate": 9.103423882748963e-06, "loss": 0.7238, "step": 7106 }, { "epoch": 0.21781905112173594, "grad_norm": 1.2823572009572934, "learning_rate": 9.103140275841812e-06, "loss": 0.6268, "step": 7107 }, { "epoch": 0.21784969964447715, "grad_norm": 1.3296940775397, "learning_rate": 9.10285662850537e-06, "loss": 0.7369, "step": 7108 }, { "epoch": 0.21788034816721835, "grad_norm": 1.1430556469638753, "learning_rate": 9.102572940742433e-06, "loss": 0.6442, "step": 7109 }, { "epoch": 0.21791099668995956, "grad_norm": 1.25613782846349, "learning_rate": 9.102289212555795e-06, "loss": 0.7639, "step": 7110 }, { "epoch": 0.21794164521270074, "grad_norm": 1.2598371897078802, "learning_rate": 9.102005443948252e-06, "loss": 0.71, "step": 7111 }, { "epoch": 0.21797229373544194, "grad_norm": 0.5072316298628236, "learning_rate": 9.1017216349226e-06, "loss": 0.4466, "step": 7112 }, { "epoch": 0.21800294225818315, "grad_norm": 1.2445110023995645, "learning_rate": 9.101437785481633e-06, "loss": 0.6492, "step": 7113 }, { "epoch": 0.21803359078092435, "grad_norm": 1.3803194534614696, "learning_rate": 9.101153895628152e-06, "loss": 0.7884, "step": 7114 }, { "epoch": 0.21806423930366556, "grad_norm": 1.1589324046293357, "learning_rate": 9.10086996536495e-06, "loss": 0.6961, "step": 7115 }, { "epoch": 0.21809488782640676, "grad_norm": 1.3101908903894097, "learning_rate": 9.10058599469483e-06, "loss": 0.7221, "step": 7116 }, { "epoch": 0.21812553634914797, "grad_norm": 1.3752656589204204, "learning_rate": 9.100301983620587e-06, "loss": 0.7552, "step": 7117 }, { "epoch": 0.21815618487188917, "grad_norm": 1.2519652117071767, "learning_rate": 9.100017932145017e-06, "loss": 0.7009, "step": 7118 }, { "epoch": 0.21818683339463038, "grad_norm": 1.336931074611082, "learning_rate": 9.099733840270923e-06, "loss": 0.6446, "step": 7119 }, { "epoch": 0.2182174819173716, "grad_norm": 1.2455558048995197, "learning_rate": 9.099449708001102e-06, "loss": 0.6531, "step": 7120 }, { "epoch": 0.2182481304401128, "grad_norm": 1.1742691337543059, "learning_rate": 9.099165535338355e-06, "loss": 0.7731, "step": 7121 }, { "epoch": 0.218278778962854, "grad_norm": 1.2749146717799371, "learning_rate": 9.09888132228548e-06, "loss": 0.6324, "step": 7122 }, { "epoch": 0.2183094274855952, "grad_norm": 1.2032816432747728, "learning_rate": 9.098597068845279e-06, "loss": 0.7784, "step": 7123 }, { "epoch": 0.2183400760083364, "grad_norm": 1.4312212638494404, "learning_rate": 9.098312775020552e-06, "loss": 0.6116, "step": 7124 }, { "epoch": 0.21837072453107761, "grad_norm": 1.167754551479729, "learning_rate": 9.098028440814101e-06, "loss": 0.6293, "step": 7125 }, { "epoch": 0.2184013730538188, "grad_norm": 1.225955988188429, "learning_rate": 9.097744066228728e-06, "loss": 0.6042, "step": 7126 }, { "epoch": 0.21843202157656, "grad_norm": 1.2395599183414305, "learning_rate": 9.097459651267233e-06, "loss": 0.7598, "step": 7127 }, { "epoch": 0.2184626700993012, "grad_norm": 1.3963427361393914, "learning_rate": 9.09717519593242e-06, "loss": 0.7603, "step": 7128 }, { "epoch": 0.2184933186220424, "grad_norm": 1.5192211981486468, "learning_rate": 9.096890700227093e-06, "loss": 0.781, "step": 7129 }, { "epoch": 0.21852396714478362, "grad_norm": 1.3191334316674934, "learning_rate": 9.096606164154052e-06, "loss": 0.5873, "step": 7130 }, { "epoch": 0.21855461566752482, "grad_norm": 1.448932044294131, "learning_rate": 9.096321587716101e-06, "loss": 0.7768, "step": 7131 }, { "epoch": 0.21858526419026603, "grad_norm": 1.4412513098479847, "learning_rate": 9.096036970916048e-06, "loss": 0.7148, "step": 7132 }, { "epoch": 0.21861591271300723, "grad_norm": 1.3580863277162099, "learning_rate": 9.095752313756695e-06, "loss": 0.7563, "step": 7133 }, { "epoch": 0.21864656123574844, "grad_norm": 1.1622702980187356, "learning_rate": 9.095467616240844e-06, "loss": 0.7206, "step": 7134 }, { "epoch": 0.21867720975848964, "grad_norm": 1.3716530878533304, "learning_rate": 9.095182878371304e-06, "loss": 0.6583, "step": 7135 }, { "epoch": 0.21870785828123085, "grad_norm": 1.5363437062622556, "learning_rate": 9.09489810015088e-06, "loss": 0.7722, "step": 7136 }, { "epoch": 0.21873850680397205, "grad_norm": 1.357034753078645, "learning_rate": 9.094613281582376e-06, "loss": 0.739, "step": 7137 }, { "epoch": 0.21876915532671326, "grad_norm": 1.2281429765749912, "learning_rate": 9.0943284226686e-06, "loss": 0.7134, "step": 7138 }, { "epoch": 0.21879980384945447, "grad_norm": 1.2968913314122381, "learning_rate": 9.094043523412359e-06, "loss": 0.6784, "step": 7139 }, { "epoch": 0.21883045237219567, "grad_norm": 1.5013247700875203, "learning_rate": 9.093758583816459e-06, "loss": 0.6565, "step": 7140 }, { "epoch": 0.21886110089493688, "grad_norm": 1.2766291589524332, "learning_rate": 9.09347360388371e-06, "loss": 0.7876, "step": 7141 }, { "epoch": 0.21889174941767806, "grad_norm": 1.2763209375229285, "learning_rate": 9.093188583616917e-06, "loss": 0.6987, "step": 7142 }, { "epoch": 0.21892239794041926, "grad_norm": 1.336177119503613, "learning_rate": 9.092903523018888e-06, "loss": 0.6838, "step": 7143 }, { "epoch": 0.21895304646316047, "grad_norm": 0.5305797312337602, "learning_rate": 9.092618422092434e-06, "loss": 0.4422, "step": 7144 }, { "epoch": 0.21898369498590167, "grad_norm": 1.1925778268267153, "learning_rate": 9.092333280840365e-06, "loss": 0.6776, "step": 7145 }, { "epoch": 0.21901434350864288, "grad_norm": 1.3345007700961278, "learning_rate": 9.092048099265489e-06, "loss": 0.7369, "step": 7146 }, { "epoch": 0.21904499203138408, "grad_norm": 1.2431485162121536, "learning_rate": 9.091762877370616e-06, "loss": 0.7268, "step": 7147 }, { "epoch": 0.2190756405541253, "grad_norm": 1.306565684017516, "learning_rate": 9.091477615158555e-06, "loss": 0.7457, "step": 7148 }, { "epoch": 0.2191062890768665, "grad_norm": 1.0729585695377606, "learning_rate": 9.09119231263212e-06, "loss": 0.6381, "step": 7149 }, { "epoch": 0.2191369375996077, "grad_norm": 1.3260020721660892, "learning_rate": 9.09090696979412e-06, "loss": 0.7314, "step": 7150 }, { "epoch": 0.2191675861223489, "grad_norm": 1.4016090478687202, "learning_rate": 9.090621586647365e-06, "loss": 0.7515, "step": 7151 }, { "epoch": 0.2191982346450901, "grad_norm": 1.3915339565886797, "learning_rate": 9.09033616319467e-06, "loss": 0.7067, "step": 7152 }, { "epoch": 0.21922888316783132, "grad_norm": 1.3159924484905465, "learning_rate": 9.090050699438848e-06, "loss": 0.7603, "step": 7153 }, { "epoch": 0.21925953169057252, "grad_norm": 1.223552354474098, "learning_rate": 9.089765195382708e-06, "loss": 0.6401, "step": 7154 }, { "epoch": 0.21929018021331373, "grad_norm": 1.2965730027433175, "learning_rate": 9.089479651029065e-06, "loss": 0.718, "step": 7155 }, { "epoch": 0.21932082873605493, "grad_norm": 1.2662979038686872, "learning_rate": 9.089194066380735e-06, "loss": 0.6476, "step": 7156 }, { "epoch": 0.2193514772587961, "grad_norm": 1.2677508432820883, "learning_rate": 9.088908441440527e-06, "loss": 0.6645, "step": 7157 }, { "epoch": 0.21938212578153732, "grad_norm": 1.3180094505065456, "learning_rate": 9.088622776211257e-06, "loss": 0.6695, "step": 7158 }, { "epoch": 0.21941277430427852, "grad_norm": 1.4335588211471437, "learning_rate": 9.088337070695743e-06, "loss": 0.7672, "step": 7159 }, { "epoch": 0.21944342282701973, "grad_norm": 1.434058126035239, "learning_rate": 9.088051324896798e-06, "loss": 0.7206, "step": 7160 }, { "epoch": 0.21947407134976094, "grad_norm": 1.1968192808539821, "learning_rate": 9.087765538817237e-06, "loss": 0.6198, "step": 7161 }, { "epoch": 0.21950471987250214, "grad_norm": 1.2891834971865521, "learning_rate": 9.087479712459876e-06, "loss": 0.7056, "step": 7162 }, { "epoch": 0.21953536839524335, "grad_norm": 0.5538172549806966, "learning_rate": 9.08719384582753e-06, "loss": 0.4653, "step": 7163 }, { "epoch": 0.21956601691798455, "grad_norm": 0.4890319072577223, "learning_rate": 9.08690793892302e-06, "loss": 0.4349, "step": 7164 }, { "epoch": 0.21959666544072576, "grad_norm": 1.314393323979949, "learning_rate": 9.08662199174916e-06, "loss": 0.6826, "step": 7165 }, { "epoch": 0.21962731396346696, "grad_norm": 1.403449565335413, "learning_rate": 9.086336004308767e-06, "loss": 0.79, "step": 7166 }, { "epoch": 0.21965796248620817, "grad_norm": 1.5067372327642405, "learning_rate": 9.08604997660466e-06, "loss": 0.7831, "step": 7167 }, { "epoch": 0.21968861100894937, "grad_norm": 1.2937635325147097, "learning_rate": 9.085763908639657e-06, "loss": 0.6954, "step": 7168 }, { "epoch": 0.21971925953169058, "grad_norm": 1.3829351543009776, "learning_rate": 9.085477800416575e-06, "loss": 0.7106, "step": 7169 }, { "epoch": 0.2197499080544318, "grad_norm": 1.2049776677182233, "learning_rate": 9.085191651938238e-06, "loss": 0.6291, "step": 7170 }, { "epoch": 0.219780556577173, "grad_norm": 1.2426759991908947, "learning_rate": 9.08490546320746e-06, "loss": 0.646, "step": 7171 }, { "epoch": 0.2198112050999142, "grad_norm": 1.3510348914468864, "learning_rate": 9.084619234227066e-06, "loss": 0.6734, "step": 7172 }, { "epoch": 0.21984185362265538, "grad_norm": 1.4769190548269622, "learning_rate": 9.084332964999871e-06, "loss": 0.6457, "step": 7173 }, { "epoch": 0.21987250214539658, "grad_norm": 1.486046039367092, "learning_rate": 9.0840466555287e-06, "loss": 0.7037, "step": 7174 }, { "epoch": 0.2199031506681378, "grad_norm": 1.1637277617562198, "learning_rate": 9.083760305816372e-06, "loss": 0.6409, "step": 7175 }, { "epoch": 0.219933799190879, "grad_norm": 1.2498679771082533, "learning_rate": 9.083473915865707e-06, "loss": 0.7149, "step": 7176 }, { "epoch": 0.2199644477136202, "grad_norm": 1.3734717336509974, "learning_rate": 9.083187485679531e-06, "loss": 0.7079, "step": 7177 }, { "epoch": 0.2199950962363614, "grad_norm": 1.3038607732253704, "learning_rate": 9.082901015260664e-06, "loss": 0.6567, "step": 7178 }, { "epoch": 0.2200257447591026, "grad_norm": 1.1404478392781912, "learning_rate": 9.082614504611928e-06, "loss": 0.6476, "step": 7179 }, { "epoch": 0.22005639328184382, "grad_norm": 1.2569607828533207, "learning_rate": 9.082327953736146e-06, "loss": 0.6958, "step": 7180 }, { "epoch": 0.22008704180458502, "grad_norm": 1.1415255346977813, "learning_rate": 9.082041362636142e-06, "loss": 0.5357, "step": 7181 }, { "epoch": 0.22011769032732623, "grad_norm": 1.090696434256794, "learning_rate": 9.08175473131474e-06, "loss": 0.6716, "step": 7182 }, { "epoch": 0.22014833885006743, "grad_norm": 1.2835053513633106, "learning_rate": 9.081468059774766e-06, "loss": 0.6651, "step": 7183 }, { "epoch": 0.22017898737280864, "grad_norm": 0.766424795127698, "learning_rate": 9.081181348019041e-06, "loss": 0.4727, "step": 7184 }, { "epoch": 0.22020963589554984, "grad_norm": 1.3785295219713776, "learning_rate": 9.080894596050393e-06, "loss": 0.6433, "step": 7185 }, { "epoch": 0.22024028441829105, "grad_norm": 1.1947376120573905, "learning_rate": 9.080607803871646e-06, "loss": 0.7132, "step": 7186 }, { "epoch": 0.22027093294103225, "grad_norm": 1.360536117461475, "learning_rate": 9.080320971485628e-06, "loss": 0.7374, "step": 7187 }, { "epoch": 0.22030158146377343, "grad_norm": 1.113799543345537, "learning_rate": 9.080034098895161e-06, "loss": 0.7106, "step": 7188 }, { "epoch": 0.22033222998651464, "grad_norm": 1.4326471469305404, "learning_rate": 9.079747186103077e-06, "loss": 0.7232, "step": 7189 }, { "epoch": 0.22036287850925584, "grad_norm": 0.5022485924868946, "learning_rate": 9.079460233112196e-06, "loss": 0.4708, "step": 7190 }, { "epoch": 0.22039352703199705, "grad_norm": 1.2999548221303612, "learning_rate": 9.079173239925352e-06, "loss": 0.5974, "step": 7191 }, { "epoch": 0.22042417555473826, "grad_norm": 1.2779535869441887, "learning_rate": 9.078886206545371e-06, "loss": 0.7686, "step": 7192 }, { "epoch": 0.22045482407747946, "grad_norm": 1.1926713462397462, "learning_rate": 9.07859913297508e-06, "loss": 0.5575, "step": 7193 }, { "epoch": 0.22048547260022067, "grad_norm": 1.203514682534649, "learning_rate": 9.078312019217308e-06, "loss": 0.7438, "step": 7194 }, { "epoch": 0.22051612112296187, "grad_norm": 1.2153236129045428, "learning_rate": 9.078024865274883e-06, "loss": 0.6612, "step": 7195 }, { "epoch": 0.22054676964570308, "grad_norm": 1.4576796942363237, "learning_rate": 9.077737671150637e-06, "loss": 0.765, "step": 7196 }, { "epoch": 0.22057741816844428, "grad_norm": 1.4521540517183062, "learning_rate": 9.077450436847397e-06, "loss": 0.7273, "step": 7197 }, { "epoch": 0.2206080666911855, "grad_norm": 0.5806928511798195, "learning_rate": 9.077163162367997e-06, "loss": 0.4757, "step": 7198 }, { "epoch": 0.2206387152139267, "grad_norm": 0.5141908145109506, "learning_rate": 9.076875847715262e-06, "loss": 0.4445, "step": 7199 }, { "epoch": 0.2206693637366679, "grad_norm": 1.2985260796993778, "learning_rate": 9.076588492892029e-06, "loss": 0.6485, "step": 7200 }, { "epoch": 0.2207000122594091, "grad_norm": 0.47031989142543545, "learning_rate": 9.076301097901126e-06, "loss": 0.4625, "step": 7201 }, { "epoch": 0.2207306607821503, "grad_norm": 1.1858603474173033, "learning_rate": 9.076013662745385e-06, "loss": 0.6952, "step": 7202 }, { "epoch": 0.22076130930489152, "grad_norm": 1.3004385678097665, "learning_rate": 9.075726187427639e-06, "loss": 0.6587, "step": 7203 }, { "epoch": 0.2207919578276327, "grad_norm": 1.213111970720925, "learning_rate": 9.075438671950719e-06, "loss": 0.6343, "step": 7204 }, { "epoch": 0.2208226063503739, "grad_norm": 1.2595858327786285, "learning_rate": 9.07515111631746e-06, "loss": 0.7371, "step": 7205 }, { "epoch": 0.2208532548731151, "grad_norm": 0.623983931405502, "learning_rate": 9.074863520530691e-06, "loss": 0.4756, "step": 7206 }, { "epoch": 0.2208839033958563, "grad_norm": 0.5815746342934297, "learning_rate": 9.074575884593252e-06, "loss": 0.4476, "step": 7207 }, { "epoch": 0.22091455191859752, "grad_norm": 1.2239425570628657, "learning_rate": 9.074288208507976e-06, "loss": 0.6728, "step": 7208 }, { "epoch": 0.22094520044133872, "grad_norm": 1.2962920482111155, "learning_rate": 9.074000492277695e-06, "loss": 0.7611, "step": 7209 }, { "epoch": 0.22097584896407993, "grad_norm": 0.4509115272833295, "learning_rate": 9.073712735905242e-06, "loss": 0.4608, "step": 7210 }, { "epoch": 0.22100649748682114, "grad_norm": 0.4830836035236228, "learning_rate": 9.073424939393458e-06, "loss": 0.4516, "step": 7211 }, { "epoch": 0.22103714600956234, "grad_norm": 1.2315909514023569, "learning_rate": 9.073137102745174e-06, "loss": 0.7104, "step": 7212 }, { "epoch": 0.22106779453230355, "grad_norm": 1.3731304303954879, "learning_rate": 9.07284922596323e-06, "loss": 0.6918, "step": 7213 }, { "epoch": 0.22109844305504475, "grad_norm": 0.5356491705937604, "learning_rate": 9.072561309050459e-06, "loss": 0.4601, "step": 7214 }, { "epoch": 0.22112909157778596, "grad_norm": 1.3191369868828409, "learning_rate": 9.072273352009699e-06, "loss": 0.6102, "step": 7215 }, { "epoch": 0.22115974010052716, "grad_norm": 1.212924411927897, "learning_rate": 9.071985354843789e-06, "loss": 0.6612, "step": 7216 }, { "epoch": 0.22119038862326837, "grad_norm": 1.5090102180320575, "learning_rate": 9.071697317555564e-06, "loss": 0.7315, "step": 7217 }, { "epoch": 0.22122103714600957, "grad_norm": 1.3360564941142117, "learning_rate": 9.071409240147865e-06, "loss": 0.6671, "step": 7218 }, { "epoch": 0.22125168566875075, "grad_norm": 1.2805656465044308, "learning_rate": 9.071121122623528e-06, "loss": 0.6581, "step": 7219 }, { "epoch": 0.22128233419149196, "grad_norm": 1.4235895844404562, "learning_rate": 9.070832964985393e-06, "loss": 0.8206, "step": 7220 }, { "epoch": 0.22131298271423316, "grad_norm": 1.1910013892326388, "learning_rate": 9.0705447672363e-06, "loss": 0.649, "step": 7221 }, { "epoch": 0.22134363123697437, "grad_norm": 1.208308478879703, "learning_rate": 9.070256529379087e-06, "loss": 0.7031, "step": 7222 }, { "epoch": 0.22137427975971558, "grad_norm": 1.195267028383085, "learning_rate": 9.069968251416596e-06, "loss": 0.7347, "step": 7223 }, { "epoch": 0.22140492828245678, "grad_norm": 1.2525191163364011, "learning_rate": 9.069679933351666e-06, "loss": 0.7124, "step": 7224 }, { "epoch": 0.221435576805198, "grad_norm": 1.39878389666589, "learning_rate": 9.069391575187137e-06, "loss": 0.6885, "step": 7225 }, { "epoch": 0.2214662253279392, "grad_norm": 1.3728216239649456, "learning_rate": 9.069103176925853e-06, "loss": 0.6975, "step": 7226 }, { "epoch": 0.2214968738506804, "grad_norm": 1.2212466533059143, "learning_rate": 9.068814738570655e-06, "loss": 0.7975, "step": 7227 }, { "epoch": 0.2215275223734216, "grad_norm": 0.6096384555996707, "learning_rate": 9.068526260124383e-06, "loss": 0.4621, "step": 7228 }, { "epoch": 0.2215581708961628, "grad_norm": 1.1226566514682084, "learning_rate": 9.06823774158988e-06, "loss": 0.7331, "step": 7229 }, { "epoch": 0.22158881941890402, "grad_norm": 1.2639506993675598, "learning_rate": 9.067949182969993e-06, "loss": 0.7876, "step": 7230 }, { "epoch": 0.22161946794164522, "grad_norm": 1.218245297496227, "learning_rate": 9.067660584267559e-06, "loss": 0.6207, "step": 7231 }, { "epoch": 0.22165011646438643, "grad_norm": 1.2390534043099548, "learning_rate": 9.067371945485426e-06, "loss": 0.8022, "step": 7232 }, { "epoch": 0.22168076498712763, "grad_norm": 0.45345070458055253, "learning_rate": 9.067083266626436e-06, "loss": 0.4311, "step": 7233 }, { "epoch": 0.22171141350986884, "grad_norm": 1.3378390194478336, "learning_rate": 9.066794547693433e-06, "loss": 0.7445, "step": 7234 }, { "epoch": 0.22174206203261002, "grad_norm": 1.1681900862703734, "learning_rate": 9.066505788689264e-06, "loss": 0.6761, "step": 7235 }, { "epoch": 0.22177271055535122, "grad_norm": 1.2259017208397656, "learning_rate": 9.066216989616772e-06, "loss": 0.6641, "step": 7236 }, { "epoch": 0.22180335907809243, "grad_norm": 2.9946246678818524, "learning_rate": 9.065928150478804e-06, "loss": 0.6901, "step": 7237 }, { "epoch": 0.22183400760083363, "grad_norm": 1.1867329305786154, "learning_rate": 9.065639271278205e-06, "loss": 0.6964, "step": 7238 }, { "epoch": 0.22186465612357484, "grad_norm": 1.3137207780838103, "learning_rate": 9.065350352017822e-06, "loss": 0.8262, "step": 7239 }, { "epoch": 0.22189530464631604, "grad_norm": 0.5036764459344346, "learning_rate": 9.065061392700504e-06, "loss": 0.4544, "step": 7240 }, { "epoch": 0.22192595316905725, "grad_norm": 1.3065983703002775, "learning_rate": 9.064772393329094e-06, "loss": 0.7431, "step": 7241 }, { "epoch": 0.22195660169179846, "grad_norm": 1.3127091472603662, "learning_rate": 9.064483353906443e-06, "loss": 0.6823, "step": 7242 }, { "epoch": 0.22198725021453966, "grad_norm": 1.4223976410280175, "learning_rate": 9.064194274435396e-06, "loss": 0.6835, "step": 7243 }, { "epoch": 0.22201789873728087, "grad_norm": 1.3821706861776824, "learning_rate": 9.063905154918804e-06, "loss": 0.6959, "step": 7244 }, { "epoch": 0.22204854726002207, "grad_norm": 0.4700287648655682, "learning_rate": 9.063615995359514e-06, "loss": 0.4588, "step": 7245 }, { "epoch": 0.22207919578276328, "grad_norm": 1.3225092583067477, "learning_rate": 9.063326795760377e-06, "loss": 0.7054, "step": 7246 }, { "epoch": 0.22210984430550448, "grad_norm": 1.2548621637498178, "learning_rate": 9.06303755612424e-06, "loss": 0.6609, "step": 7247 }, { "epoch": 0.2221404928282457, "grad_norm": 1.573500739632131, "learning_rate": 9.062748276453956e-06, "loss": 0.6967, "step": 7248 }, { "epoch": 0.2221711413509869, "grad_norm": 1.2978658747202196, "learning_rate": 9.062458956752374e-06, "loss": 0.6806, "step": 7249 }, { "epoch": 0.22220178987372807, "grad_norm": 1.378838329826322, "learning_rate": 9.062169597022343e-06, "loss": 0.7938, "step": 7250 }, { "epoch": 0.22223243839646928, "grad_norm": 1.2595301697535963, "learning_rate": 9.061880197266715e-06, "loss": 0.6714, "step": 7251 }, { "epoch": 0.22226308691921048, "grad_norm": 1.3816139733640993, "learning_rate": 9.061590757488343e-06, "loss": 0.699, "step": 7252 }, { "epoch": 0.2222937354419517, "grad_norm": 1.176210620613185, "learning_rate": 9.061301277690079e-06, "loss": 0.6179, "step": 7253 }, { "epoch": 0.2223243839646929, "grad_norm": 1.2863055058727393, "learning_rate": 9.061011757874773e-06, "loss": 0.7803, "step": 7254 }, { "epoch": 0.2223550324874341, "grad_norm": 1.3255751486147227, "learning_rate": 9.06072219804528e-06, "loss": 0.6674, "step": 7255 }, { "epoch": 0.2223856810101753, "grad_norm": 1.202178366590764, "learning_rate": 9.060432598204452e-06, "loss": 0.6806, "step": 7256 }, { "epoch": 0.2224163295329165, "grad_norm": 1.1807089440567946, "learning_rate": 9.060142958355143e-06, "loss": 0.7224, "step": 7257 }, { "epoch": 0.22244697805565772, "grad_norm": 1.213894912046039, "learning_rate": 9.059853278500206e-06, "loss": 0.6496, "step": 7258 }, { "epoch": 0.22247762657839892, "grad_norm": 1.3882849426634083, "learning_rate": 9.059563558642495e-06, "loss": 0.7482, "step": 7259 }, { "epoch": 0.22250827510114013, "grad_norm": 1.3819359179927022, "learning_rate": 9.059273798784867e-06, "loss": 0.7449, "step": 7260 }, { "epoch": 0.22253892362388134, "grad_norm": 1.2865530412929624, "learning_rate": 9.058983998930176e-06, "loss": 0.7139, "step": 7261 }, { "epoch": 0.22256957214662254, "grad_norm": 1.188013774281022, "learning_rate": 9.058694159081275e-06, "loss": 0.6629, "step": 7262 }, { "epoch": 0.22260022066936375, "grad_norm": 1.362674218650316, "learning_rate": 9.058404279241024e-06, "loss": 0.8035, "step": 7263 }, { "epoch": 0.22263086919210495, "grad_norm": 1.3561948711583955, "learning_rate": 9.058114359412277e-06, "loss": 0.6978, "step": 7264 }, { "epoch": 0.22266151771484616, "grad_norm": 1.2909911445588211, "learning_rate": 9.057824399597892e-06, "loss": 0.7218, "step": 7265 }, { "epoch": 0.22269216623758734, "grad_norm": 1.3157562548392379, "learning_rate": 9.057534399800722e-06, "loss": 0.7378, "step": 7266 }, { "epoch": 0.22272281476032854, "grad_norm": 1.4600758524057489, "learning_rate": 9.05724436002363e-06, "loss": 0.6939, "step": 7267 }, { "epoch": 0.22275346328306975, "grad_norm": 1.2136850213929486, "learning_rate": 9.05695428026947e-06, "loss": 0.6747, "step": 7268 }, { "epoch": 0.22278411180581095, "grad_norm": 1.3470049135005242, "learning_rate": 9.0566641605411e-06, "loss": 0.7748, "step": 7269 }, { "epoch": 0.22281476032855216, "grad_norm": 1.1959337593882169, "learning_rate": 9.05637400084138e-06, "loss": 0.7109, "step": 7270 }, { "epoch": 0.22284540885129336, "grad_norm": 1.3171449427179824, "learning_rate": 9.056083801173172e-06, "loss": 0.7084, "step": 7271 }, { "epoch": 0.22287605737403457, "grad_norm": 1.1702677646659736, "learning_rate": 9.055793561539332e-06, "loss": 0.6516, "step": 7272 }, { "epoch": 0.22290670589677578, "grad_norm": 1.3006835671051695, "learning_rate": 9.05550328194272e-06, "loss": 0.7626, "step": 7273 }, { "epoch": 0.22293735441951698, "grad_norm": 1.4027729086806648, "learning_rate": 9.055212962386196e-06, "loss": 0.8207, "step": 7274 }, { "epoch": 0.2229680029422582, "grad_norm": 1.2495987268358242, "learning_rate": 9.054922602872621e-06, "loss": 0.7867, "step": 7275 }, { "epoch": 0.2229986514649994, "grad_norm": 1.3270653698567103, "learning_rate": 9.054632203404856e-06, "loss": 0.6944, "step": 7276 }, { "epoch": 0.2230292999877406, "grad_norm": 1.4612816706994645, "learning_rate": 9.054341763985764e-06, "loss": 0.6581, "step": 7277 }, { "epoch": 0.2230599485104818, "grad_norm": 0.5623761111147741, "learning_rate": 9.054051284618205e-06, "loss": 0.4282, "step": 7278 }, { "epoch": 0.223090597033223, "grad_norm": 1.3992669924411305, "learning_rate": 9.053760765305039e-06, "loss": 0.7941, "step": 7279 }, { "epoch": 0.22312124555596421, "grad_norm": 1.3238348422274837, "learning_rate": 9.053470206049133e-06, "loss": 0.6979, "step": 7280 }, { "epoch": 0.2231518940787054, "grad_norm": 1.3377114095319835, "learning_rate": 9.053179606853346e-06, "loss": 0.6026, "step": 7281 }, { "epoch": 0.2231825426014466, "grad_norm": 0.44899452546226537, "learning_rate": 9.052888967720546e-06, "loss": 0.4448, "step": 7282 }, { "epoch": 0.2232131911241878, "grad_norm": 1.235111064625768, "learning_rate": 9.052598288653592e-06, "loss": 0.7093, "step": 7283 }, { "epoch": 0.223243839646929, "grad_norm": 1.4219498182339072, "learning_rate": 9.052307569655351e-06, "loss": 0.6809, "step": 7284 }, { "epoch": 0.22327448816967022, "grad_norm": 1.3434948820537522, "learning_rate": 9.052016810728686e-06, "loss": 0.7339, "step": 7285 }, { "epoch": 0.22330513669241142, "grad_norm": 1.184098085736087, "learning_rate": 9.051726011876464e-06, "loss": 0.6295, "step": 7286 }, { "epoch": 0.22333578521515263, "grad_norm": 1.2758026899505186, "learning_rate": 9.051435173101549e-06, "loss": 0.5696, "step": 7287 }, { "epoch": 0.22336643373789383, "grad_norm": 1.2584813968789816, "learning_rate": 9.051144294406803e-06, "loss": 0.6601, "step": 7288 }, { "epoch": 0.22339708226063504, "grad_norm": 1.4736122220596464, "learning_rate": 9.0508533757951e-06, "loss": 0.7763, "step": 7289 }, { "epoch": 0.22342773078337624, "grad_norm": 1.4180667741855424, "learning_rate": 9.050562417269301e-06, "loss": 0.7779, "step": 7290 }, { "epoch": 0.22345837930611745, "grad_norm": 0.6370260284547254, "learning_rate": 9.050271418832272e-06, "loss": 0.4409, "step": 7291 }, { "epoch": 0.22348902782885866, "grad_norm": 1.3284184603348188, "learning_rate": 9.049980380486887e-06, "loss": 0.6212, "step": 7292 }, { "epoch": 0.22351967635159986, "grad_norm": 1.6021656027277618, "learning_rate": 9.049689302236005e-06, "loss": 0.8193, "step": 7293 }, { "epoch": 0.22355032487434107, "grad_norm": 1.2658297574057034, "learning_rate": 9.049398184082499e-06, "loss": 0.7164, "step": 7294 }, { "epoch": 0.22358097339708227, "grad_norm": 0.44867757693534205, "learning_rate": 9.049107026029236e-06, "loss": 0.4421, "step": 7295 }, { "epoch": 0.22361162191982348, "grad_norm": 1.4343057680826707, "learning_rate": 9.048815828079087e-06, "loss": 0.7201, "step": 7296 }, { "epoch": 0.22364227044256466, "grad_norm": 1.3372395484907698, "learning_rate": 9.04852459023492e-06, "loss": 0.6988, "step": 7297 }, { "epoch": 0.22367291896530586, "grad_norm": 1.324313354338568, "learning_rate": 9.048233312499604e-06, "loss": 0.7319, "step": 7298 }, { "epoch": 0.22370356748804707, "grad_norm": 1.3678020548030967, "learning_rate": 9.047941994876008e-06, "loss": 0.7699, "step": 7299 }, { "epoch": 0.22373421601078827, "grad_norm": 0.5369732180838903, "learning_rate": 9.047650637367005e-06, "loss": 0.4569, "step": 7300 }, { "epoch": 0.22376486453352948, "grad_norm": 1.3646163378165512, "learning_rate": 9.047359239975464e-06, "loss": 0.6379, "step": 7301 }, { "epoch": 0.22379551305627068, "grad_norm": 1.4375926201334628, "learning_rate": 9.047067802704259e-06, "loss": 0.7607, "step": 7302 }, { "epoch": 0.2238261615790119, "grad_norm": 1.4422991231994773, "learning_rate": 9.046776325556257e-06, "loss": 0.7922, "step": 7303 }, { "epoch": 0.2238568101017531, "grad_norm": 1.186364947011949, "learning_rate": 9.046484808534333e-06, "loss": 0.6418, "step": 7304 }, { "epoch": 0.2238874586244943, "grad_norm": 1.249141534061914, "learning_rate": 9.04619325164136e-06, "loss": 0.7669, "step": 7305 }, { "epoch": 0.2239181071472355, "grad_norm": 1.192803732819498, "learning_rate": 9.045901654880207e-06, "loss": 0.6303, "step": 7306 }, { "epoch": 0.2239487556699767, "grad_norm": 1.265122300156603, "learning_rate": 9.045610018253752e-06, "loss": 0.7261, "step": 7307 }, { "epoch": 0.22397940419271792, "grad_norm": 0.5123355730941362, "learning_rate": 9.045318341764866e-06, "loss": 0.4566, "step": 7308 }, { "epoch": 0.22401005271545912, "grad_norm": 0.4690736598231102, "learning_rate": 9.045026625416423e-06, "loss": 0.4549, "step": 7309 }, { "epoch": 0.22404070123820033, "grad_norm": 0.4527968848019452, "learning_rate": 9.044734869211298e-06, "loss": 0.4328, "step": 7310 }, { "epoch": 0.22407134976094154, "grad_norm": 1.1496326457196762, "learning_rate": 9.044443073152364e-06, "loss": 0.5697, "step": 7311 }, { "epoch": 0.2241019982836827, "grad_norm": 1.233345072884419, "learning_rate": 9.0441512372425e-06, "loss": 0.7671, "step": 7312 }, { "epoch": 0.22413264680642392, "grad_norm": 1.2984506230465322, "learning_rate": 9.043859361484578e-06, "loss": 0.7879, "step": 7313 }, { "epoch": 0.22416329532916512, "grad_norm": 1.2458291144648272, "learning_rate": 9.043567445881475e-06, "loss": 0.7147, "step": 7314 }, { "epoch": 0.22419394385190633, "grad_norm": 1.3180697840729625, "learning_rate": 9.043275490436068e-06, "loss": 0.6522, "step": 7315 }, { "epoch": 0.22422459237464754, "grad_norm": 1.392536452972678, "learning_rate": 9.042983495151232e-06, "loss": 0.7698, "step": 7316 }, { "epoch": 0.22425524089738874, "grad_norm": 0.6592741201786032, "learning_rate": 9.042691460029846e-06, "loss": 0.4427, "step": 7317 }, { "epoch": 0.22428588942012995, "grad_norm": 0.5724024168308564, "learning_rate": 9.042399385074785e-06, "loss": 0.4512, "step": 7318 }, { "epoch": 0.22431653794287115, "grad_norm": 1.2734737379743453, "learning_rate": 9.042107270288932e-06, "loss": 0.7736, "step": 7319 }, { "epoch": 0.22434718646561236, "grad_norm": 1.2414875031925767, "learning_rate": 9.04181511567516e-06, "loss": 0.7132, "step": 7320 }, { "epoch": 0.22437783498835356, "grad_norm": 0.5483417431813393, "learning_rate": 9.041522921236347e-06, "loss": 0.447, "step": 7321 }, { "epoch": 0.22440848351109477, "grad_norm": 1.3730612455105116, "learning_rate": 9.041230686975377e-06, "loss": 0.7511, "step": 7322 }, { "epoch": 0.22443913203383598, "grad_norm": 1.430931497343546, "learning_rate": 9.040938412895127e-06, "loss": 0.7618, "step": 7323 }, { "epoch": 0.22446978055657718, "grad_norm": 1.4246903341620205, "learning_rate": 9.040646098998477e-06, "loss": 0.7317, "step": 7324 }, { "epoch": 0.2245004290793184, "grad_norm": 1.0936866928942033, "learning_rate": 9.040353745288307e-06, "loss": 0.6457, "step": 7325 }, { "epoch": 0.2245310776020596, "grad_norm": 1.3086132687329952, "learning_rate": 9.040061351767498e-06, "loss": 0.7586, "step": 7326 }, { "epoch": 0.2245617261248008, "grad_norm": 1.4655695722743989, "learning_rate": 9.039768918438931e-06, "loss": 0.801, "step": 7327 }, { "epoch": 0.22459237464754198, "grad_norm": 1.2379932388069996, "learning_rate": 9.039476445305486e-06, "loss": 0.6889, "step": 7328 }, { "epoch": 0.22462302317028318, "grad_norm": 1.3520594304926514, "learning_rate": 9.039183932370046e-06, "loss": 0.8971, "step": 7329 }, { "epoch": 0.2246536716930244, "grad_norm": 1.273232804087648, "learning_rate": 9.038891379635494e-06, "loss": 0.7269, "step": 7330 }, { "epoch": 0.2246843202157656, "grad_norm": 1.4838078491266529, "learning_rate": 9.038598787104714e-06, "loss": 0.6699, "step": 7331 }, { "epoch": 0.2247149687385068, "grad_norm": 0.6975217723487408, "learning_rate": 9.038306154780585e-06, "loss": 0.475, "step": 7332 }, { "epoch": 0.224745617261248, "grad_norm": 1.4068689820251201, "learning_rate": 9.03801348266599e-06, "loss": 0.7136, "step": 7333 }, { "epoch": 0.2247762657839892, "grad_norm": 1.4833478252987373, "learning_rate": 9.037720770763818e-06, "loss": 0.7947, "step": 7334 }, { "epoch": 0.22480691430673042, "grad_norm": 0.4918215925300453, "learning_rate": 9.037428019076948e-06, "loss": 0.4432, "step": 7335 }, { "epoch": 0.22483756282947162, "grad_norm": 1.2664209497330514, "learning_rate": 9.037135227608269e-06, "loss": 0.6762, "step": 7336 }, { "epoch": 0.22486821135221283, "grad_norm": 1.450032158729198, "learning_rate": 9.036842396360661e-06, "loss": 0.6926, "step": 7337 }, { "epoch": 0.22489885987495403, "grad_norm": 1.2126293765542706, "learning_rate": 9.036549525337015e-06, "loss": 0.7506, "step": 7338 }, { "epoch": 0.22492950839769524, "grad_norm": 0.5615691029353951, "learning_rate": 9.036256614540211e-06, "loss": 0.4523, "step": 7339 }, { "epoch": 0.22496015692043644, "grad_norm": 1.2897429762409984, "learning_rate": 9.03596366397314e-06, "loss": 0.7558, "step": 7340 }, { "epoch": 0.22499080544317765, "grad_norm": 1.2007157431394127, "learning_rate": 9.035670673638684e-06, "loss": 0.6838, "step": 7341 }, { "epoch": 0.22502145396591886, "grad_norm": 1.3772451157305161, "learning_rate": 9.035377643539735e-06, "loss": 0.6426, "step": 7342 }, { "epoch": 0.22505210248866003, "grad_norm": 0.5204768914530152, "learning_rate": 9.035084573679176e-06, "loss": 0.4727, "step": 7343 }, { "epoch": 0.22508275101140124, "grad_norm": 1.4515832412806322, "learning_rate": 9.034791464059896e-06, "loss": 0.6665, "step": 7344 }, { "epoch": 0.22511339953414244, "grad_norm": 1.1249679602795777, "learning_rate": 9.034498314684784e-06, "loss": 0.7307, "step": 7345 }, { "epoch": 0.22514404805688365, "grad_norm": 1.071301570914636, "learning_rate": 9.034205125556728e-06, "loss": 0.5927, "step": 7346 }, { "epoch": 0.22517469657962486, "grad_norm": 1.409682911511709, "learning_rate": 9.033911896678617e-06, "loss": 0.708, "step": 7347 }, { "epoch": 0.22520534510236606, "grad_norm": 1.2918955840493944, "learning_rate": 9.033618628053338e-06, "loss": 0.686, "step": 7348 }, { "epoch": 0.22523599362510727, "grad_norm": 1.2617070687629175, "learning_rate": 9.033325319683786e-06, "loss": 0.609, "step": 7349 }, { "epoch": 0.22526664214784847, "grad_norm": 1.212555973873169, "learning_rate": 9.033031971572845e-06, "loss": 0.6699, "step": 7350 }, { "epoch": 0.22529729067058968, "grad_norm": 1.3444219720330253, "learning_rate": 9.032738583723407e-06, "loss": 0.7187, "step": 7351 }, { "epoch": 0.22532793919333088, "grad_norm": 1.1963710548482007, "learning_rate": 9.032445156138367e-06, "loss": 0.679, "step": 7352 }, { "epoch": 0.2253585877160721, "grad_norm": 1.2552163709904631, "learning_rate": 9.032151688820612e-06, "loss": 0.7434, "step": 7353 }, { "epoch": 0.2253892362388133, "grad_norm": 1.298178234154532, "learning_rate": 9.031858181773034e-06, "loss": 0.6693, "step": 7354 }, { "epoch": 0.2254198847615545, "grad_norm": 1.4650164512683612, "learning_rate": 9.031564634998527e-06, "loss": 0.7108, "step": 7355 }, { "epoch": 0.2254505332842957, "grad_norm": 1.361060029493581, "learning_rate": 9.031271048499982e-06, "loss": 0.5669, "step": 7356 }, { "epoch": 0.2254811818070369, "grad_norm": 0.6418885311905355, "learning_rate": 9.030977422280291e-06, "loss": 0.4366, "step": 7357 }, { "epoch": 0.22551183032977812, "grad_norm": 1.247632037433764, "learning_rate": 9.030683756342348e-06, "loss": 0.6259, "step": 7358 }, { "epoch": 0.2255424788525193, "grad_norm": 1.1719397129904803, "learning_rate": 9.030390050689047e-06, "loss": 0.5755, "step": 7359 }, { "epoch": 0.2255731273752605, "grad_norm": 1.435251501214752, "learning_rate": 9.030096305323281e-06, "loss": 0.766, "step": 7360 }, { "epoch": 0.2256037758980017, "grad_norm": 1.3433648773096365, "learning_rate": 9.029802520247946e-06, "loss": 0.7119, "step": 7361 }, { "epoch": 0.2256344244207429, "grad_norm": 1.5157879531737755, "learning_rate": 9.029508695465935e-06, "loss": 0.6995, "step": 7362 }, { "epoch": 0.22566507294348412, "grad_norm": 1.218955307473612, "learning_rate": 9.029214830980145e-06, "loss": 0.6019, "step": 7363 }, { "epoch": 0.22569572146622532, "grad_norm": 1.466820588458913, "learning_rate": 9.028920926793468e-06, "loss": 0.7297, "step": 7364 }, { "epoch": 0.22572636998896653, "grad_norm": 1.4520825968550368, "learning_rate": 9.028626982908805e-06, "loss": 0.6981, "step": 7365 }, { "epoch": 0.22575701851170774, "grad_norm": 0.4959409191899878, "learning_rate": 9.028332999329048e-06, "loss": 0.4592, "step": 7366 }, { "epoch": 0.22578766703444894, "grad_norm": 1.4431583559948935, "learning_rate": 9.028038976057097e-06, "loss": 0.6779, "step": 7367 }, { "epoch": 0.22581831555719015, "grad_norm": 1.5216999660891917, "learning_rate": 9.027744913095844e-06, "loss": 0.8097, "step": 7368 }, { "epoch": 0.22584896407993135, "grad_norm": 1.353956781824017, "learning_rate": 9.027450810448193e-06, "loss": 0.703, "step": 7369 }, { "epoch": 0.22587961260267256, "grad_norm": 0.5006368503662033, "learning_rate": 9.027156668117036e-06, "loss": 0.449, "step": 7370 }, { "epoch": 0.22591026112541376, "grad_norm": 1.1429858658046952, "learning_rate": 9.026862486105277e-06, "loss": 0.6002, "step": 7371 }, { "epoch": 0.22594090964815497, "grad_norm": 1.3033120417226216, "learning_rate": 9.026568264415809e-06, "loss": 0.6997, "step": 7372 }, { "epoch": 0.22597155817089618, "grad_norm": 1.2050486166648093, "learning_rate": 9.026274003051535e-06, "loss": 0.5739, "step": 7373 }, { "epoch": 0.22600220669363735, "grad_norm": 0.45325414669032543, "learning_rate": 9.025979702015352e-06, "loss": 0.4638, "step": 7374 }, { "epoch": 0.22603285521637856, "grad_norm": 1.5197392945053039, "learning_rate": 9.025685361310162e-06, "loss": 0.656, "step": 7375 }, { "epoch": 0.22606350373911976, "grad_norm": 0.460543209617026, "learning_rate": 9.025390980938864e-06, "loss": 0.4526, "step": 7376 }, { "epoch": 0.22609415226186097, "grad_norm": 1.31323108821371, "learning_rate": 9.025096560904359e-06, "loss": 0.6806, "step": 7377 }, { "epoch": 0.22612480078460218, "grad_norm": 1.27438336671246, "learning_rate": 9.024802101209547e-06, "loss": 0.7104, "step": 7378 }, { "epoch": 0.22615544930734338, "grad_norm": 1.1580187434533777, "learning_rate": 9.02450760185733e-06, "loss": 0.6656, "step": 7379 }, { "epoch": 0.2261860978300846, "grad_norm": 1.4074684029251103, "learning_rate": 9.02421306285061e-06, "loss": 0.8054, "step": 7380 }, { "epoch": 0.2262167463528258, "grad_norm": 1.251225338766378, "learning_rate": 9.023918484192289e-06, "loss": 0.7084, "step": 7381 }, { "epoch": 0.226247394875567, "grad_norm": 0.45962340050280903, "learning_rate": 9.023623865885272e-06, "loss": 0.4294, "step": 7382 }, { "epoch": 0.2262780433983082, "grad_norm": 1.369642164374175, "learning_rate": 9.023329207932456e-06, "loss": 0.7907, "step": 7383 }, { "epoch": 0.2263086919210494, "grad_norm": 1.3429044041132239, "learning_rate": 9.02303451033675e-06, "loss": 0.7428, "step": 7384 }, { "epoch": 0.22633934044379062, "grad_norm": 1.2716637393541375, "learning_rate": 9.022739773101055e-06, "loss": 0.6713, "step": 7385 }, { "epoch": 0.22636998896653182, "grad_norm": 1.3242285068893374, "learning_rate": 9.022444996228276e-06, "loss": 0.7405, "step": 7386 }, { "epoch": 0.22640063748927303, "grad_norm": 0.4324609388852876, "learning_rate": 9.022150179721316e-06, "loss": 0.3936, "step": 7387 }, { "epoch": 0.22643128601201423, "grad_norm": 0.4652316075488059, "learning_rate": 9.021855323583082e-06, "loss": 0.4376, "step": 7388 }, { "epoch": 0.22646193453475544, "grad_norm": 1.2558817295092828, "learning_rate": 9.02156042781648e-06, "loss": 0.6875, "step": 7389 }, { "epoch": 0.22649258305749662, "grad_norm": 1.4319586734041332, "learning_rate": 9.021265492424412e-06, "loss": 0.707, "step": 7390 }, { "epoch": 0.22652323158023782, "grad_norm": 1.272057656243861, "learning_rate": 9.020970517409786e-06, "loss": 0.7633, "step": 7391 }, { "epoch": 0.22655388010297903, "grad_norm": 0.5010613560246308, "learning_rate": 9.020675502775511e-06, "loss": 0.4626, "step": 7392 }, { "epoch": 0.22658452862572023, "grad_norm": 0.4677294566775631, "learning_rate": 9.020380448524489e-06, "loss": 0.4396, "step": 7393 }, { "epoch": 0.22661517714846144, "grad_norm": 0.46337935868944385, "learning_rate": 9.020085354659631e-06, "loss": 0.4298, "step": 7394 }, { "epoch": 0.22664582567120264, "grad_norm": 1.0724859599359102, "learning_rate": 9.019790221183844e-06, "loss": 0.5695, "step": 7395 }, { "epoch": 0.22667647419394385, "grad_norm": 1.226640553779396, "learning_rate": 9.019495048100035e-06, "loss": 0.7149, "step": 7396 }, { "epoch": 0.22670712271668506, "grad_norm": 1.3652226004594386, "learning_rate": 9.019199835411112e-06, "loss": 0.7164, "step": 7397 }, { "epoch": 0.22673777123942626, "grad_norm": 1.4535738840734371, "learning_rate": 9.018904583119987e-06, "loss": 0.818, "step": 7398 }, { "epoch": 0.22676841976216747, "grad_norm": 1.3549333806640564, "learning_rate": 9.018609291229565e-06, "loss": 0.7908, "step": 7399 }, { "epoch": 0.22679906828490867, "grad_norm": 1.2072995252614491, "learning_rate": 9.018313959742756e-06, "loss": 0.6897, "step": 7400 }, { "epoch": 0.22682971680764988, "grad_norm": 1.3893272287553098, "learning_rate": 9.018018588662474e-06, "loss": 0.7041, "step": 7401 }, { "epoch": 0.22686036533039108, "grad_norm": 1.265804077669454, "learning_rate": 9.017723177991627e-06, "loss": 0.6635, "step": 7402 }, { "epoch": 0.2268910138531323, "grad_norm": 1.3817001672268745, "learning_rate": 9.017427727733124e-06, "loss": 0.7862, "step": 7403 }, { "epoch": 0.2269216623758735, "grad_norm": 1.3468870497733632, "learning_rate": 9.017132237889877e-06, "loss": 0.7382, "step": 7404 }, { "epoch": 0.22695231089861467, "grad_norm": 1.357916093711436, "learning_rate": 9.0168367084648e-06, "loss": 0.5422, "step": 7405 }, { "epoch": 0.22698295942135588, "grad_norm": 1.393820995946278, "learning_rate": 9.016541139460803e-06, "loss": 0.7231, "step": 7406 }, { "epoch": 0.22701360794409708, "grad_norm": 1.3598131366779096, "learning_rate": 9.016245530880798e-06, "loss": 0.7641, "step": 7407 }, { "epoch": 0.2270442564668383, "grad_norm": 1.2756031595626314, "learning_rate": 9.015949882727697e-06, "loss": 0.72, "step": 7408 }, { "epoch": 0.2270749049895795, "grad_norm": 1.2054477246736148, "learning_rate": 9.015654195004416e-06, "loss": 0.6577, "step": 7409 }, { "epoch": 0.2271055535123207, "grad_norm": 1.3188762955677658, "learning_rate": 9.015358467713865e-06, "loss": 0.7536, "step": 7410 }, { "epoch": 0.2271362020350619, "grad_norm": 1.3819316867766873, "learning_rate": 9.015062700858963e-06, "loss": 0.7742, "step": 7411 }, { "epoch": 0.2271668505578031, "grad_norm": 1.3611176039468864, "learning_rate": 9.014766894442619e-06, "loss": 0.7183, "step": 7412 }, { "epoch": 0.22719749908054432, "grad_norm": 1.3982872855017365, "learning_rate": 9.01447104846775e-06, "loss": 0.6975, "step": 7413 }, { "epoch": 0.22722814760328552, "grad_norm": 1.2193509079979927, "learning_rate": 9.01417516293727e-06, "loss": 0.6704, "step": 7414 }, { "epoch": 0.22725879612602673, "grad_norm": 1.2799268015230518, "learning_rate": 9.013879237854095e-06, "loss": 0.6968, "step": 7415 }, { "epoch": 0.22728944464876794, "grad_norm": 0.8025389795486866, "learning_rate": 9.013583273221141e-06, "loss": 0.4474, "step": 7416 }, { "epoch": 0.22732009317150914, "grad_norm": 1.2597669838146273, "learning_rate": 9.013287269041322e-06, "loss": 0.7286, "step": 7417 }, { "epoch": 0.22735074169425035, "grad_norm": 0.5901270791817984, "learning_rate": 9.01299122531756e-06, "loss": 0.4739, "step": 7418 }, { "epoch": 0.22738139021699155, "grad_norm": 1.1699325963302878, "learning_rate": 9.012695142052767e-06, "loss": 0.7072, "step": 7419 }, { "epoch": 0.22741203873973276, "grad_norm": 1.4774003231803983, "learning_rate": 9.012399019249863e-06, "loss": 0.8395, "step": 7420 }, { "epoch": 0.22744268726247394, "grad_norm": 1.4339238934115024, "learning_rate": 9.012102856911764e-06, "loss": 0.7529, "step": 7421 }, { "epoch": 0.22747333578521514, "grad_norm": 1.181893377229819, "learning_rate": 9.011806655041389e-06, "loss": 0.646, "step": 7422 }, { "epoch": 0.22750398430795635, "grad_norm": 1.228638119657645, "learning_rate": 9.011510413641658e-06, "loss": 0.7156, "step": 7423 }, { "epoch": 0.22753463283069755, "grad_norm": 0.9741544964301063, "learning_rate": 9.011214132715486e-06, "loss": 0.4728, "step": 7424 }, { "epoch": 0.22756528135343876, "grad_norm": 1.2872909088930136, "learning_rate": 9.010917812265796e-06, "loss": 0.761, "step": 7425 }, { "epoch": 0.22759592987617996, "grad_norm": 0.5829569909044598, "learning_rate": 9.010621452295508e-06, "loss": 0.4531, "step": 7426 }, { "epoch": 0.22762657839892117, "grad_norm": 1.38142884265503, "learning_rate": 9.010325052807538e-06, "loss": 0.6894, "step": 7427 }, { "epoch": 0.22765722692166238, "grad_norm": 1.433793197444086, "learning_rate": 9.01002861380481e-06, "loss": 0.6448, "step": 7428 }, { "epoch": 0.22768787544440358, "grad_norm": 1.4438398370161394, "learning_rate": 9.009732135290246e-06, "loss": 0.8278, "step": 7429 }, { "epoch": 0.2277185239671448, "grad_norm": 1.299492090326007, "learning_rate": 9.009435617266764e-06, "loss": 0.7221, "step": 7430 }, { "epoch": 0.227749172489886, "grad_norm": 0.6457539246881182, "learning_rate": 9.009139059737286e-06, "loss": 0.4359, "step": 7431 }, { "epoch": 0.2277798210126272, "grad_norm": 1.3717122329789617, "learning_rate": 9.008842462704737e-06, "loss": 0.7238, "step": 7432 }, { "epoch": 0.2278104695353684, "grad_norm": 1.3386939720615862, "learning_rate": 9.008545826172037e-06, "loss": 0.7036, "step": 7433 }, { "epoch": 0.2278411180581096, "grad_norm": 1.1048870654075678, "learning_rate": 9.00824915014211e-06, "loss": 0.7202, "step": 7434 }, { "epoch": 0.22787176658085082, "grad_norm": 1.2705764336431251, "learning_rate": 9.007952434617877e-06, "loss": 0.7094, "step": 7435 }, { "epoch": 0.227902415103592, "grad_norm": 1.2844771395685939, "learning_rate": 9.007655679602262e-06, "loss": 0.6589, "step": 7436 }, { "epoch": 0.2279330636263332, "grad_norm": 1.2398595374898933, "learning_rate": 9.007358885098192e-06, "loss": 0.7348, "step": 7437 }, { "epoch": 0.2279637121490744, "grad_norm": 1.4050018458805762, "learning_rate": 9.00706205110859e-06, "loss": 0.8216, "step": 7438 }, { "epoch": 0.2279943606718156, "grad_norm": 1.2220123691527038, "learning_rate": 9.00676517763638e-06, "loss": 0.7131, "step": 7439 }, { "epoch": 0.22802500919455682, "grad_norm": 1.4017627856932149, "learning_rate": 9.006468264684487e-06, "loss": 0.7239, "step": 7440 }, { "epoch": 0.22805565771729802, "grad_norm": 0.5611253714579267, "learning_rate": 9.006171312255837e-06, "loss": 0.4256, "step": 7441 }, { "epoch": 0.22808630624003923, "grad_norm": 1.4578806695992275, "learning_rate": 9.005874320353356e-06, "loss": 0.7275, "step": 7442 }, { "epoch": 0.22811695476278043, "grad_norm": 1.314296365668288, "learning_rate": 9.005577288979972e-06, "loss": 0.695, "step": 7443 }, { "epoch": 0.22814760328552164, "grad_norm": 1.4386210990834354, "learning_rate": 9.00528021813861e-06, "loss": 0.7018, "step": 7444 }, { "epoch": 0.22817825180826284, "grad_norm": 1.309271829237161, "learning_rate": 9.004983107832195e-06, "loss": 0.7344, "step": 7445 }, { "epoch": 0.22820890033100405, "grad_norm": 0.504537595013964, "learning_rate": 9.004685958063657e-06, "loss": 0.4577, "step": 7446 }, { "epoch": 0.22823954885374526, "grad_norm": 1.3979449118418017, "learning_rate": 9.004388768835926e-06, "loss": 0.7144, "step": 7447 }, { "epoch": 0.22827019737648646, "grad_norm": 1.2941905775210076, "learning_rate": 9.004091540151926e-06, "loss": 0.7798, "step": 7448 }, { "epoch": 0.22830084589922767, "grad_norm": 1.3821639056761215, "learning_rate": 9.003794272014587e-06, "loss": 0.7612, "step": 7449 }, { "epoch": 0.22833149442196887, "grad_norm": 1.1660443087131465, "learning_rate": 9.003496964426842e-06, "loss": 0.7454, "step": 7450 }, { "epoch": 0.22836214294471008, "grad_norm": 0.5142719773985188, "learning_rate": 9.003199617391613e-06, "loss": 0.4637, "step": 7451 }, { "epoch": 0.22839279146745126, "grad_norm": 1.2205920367872796, "learning_rate": 9.002902230911836e-06, "loss": 0.6914, "step": 7452 }, { "epoch": 0.22842343999019246, "grad_norm": 1.2980300303219003, "learning_rate": 9.002604804990438e-06, "loss": 0.6799, "step": 7453 }, { "epoch": 0.22845408851293367, "grad_norm": 1.6956530158943062, "learning_rate": 9.002307339630352e-06, "loss": 0.5957, "step": 7454 }, { "epoch": 0.22848473703567487, "grad_norm": 1.4434550802045458, "learning_rate": 9.002009834834506e-06, "loss": 0.737, "step": 7455 }, { "epoch": 0.22851538555841608, "grad_norm": 1.153527491602727, "learning_rate": 9.001712290605835e-06, "loss": 0.7034, "step": 7456 }, { "epoch": 0.22854603408115728, "grad_norm": 1.3987792343929095, "learning_rate": 9.001414706947269e-06, "loss": 0.7097, "step": 7457 }, { "epoch": 0.2285766826038985, "grad_norm": 1.4338213929477033, "learning_rate": 9.00111708386174e-06, "loss": 0.6449, "step": 7458 }, { "epoch": 0.2286073311266397, "grad_norm": 1.2748115216718865, "learning_rate": 9.000819421352178e-06, "loss": 0.7481, "step": 7459 }, { "epoch": 0.2286379796493809, "grad_norm": 1.3394363795814546, "learning_rate": 9.000521719421522e-06, "loss": 0.8625, "step": 7460 }, { "epoch": 0.2286686281721221, "grad_norm": 1.1219840467068305, "learning_rate": 9.0002239780727e-06, "loss": 0.7669, "step": 7461 }, { "epoch": 0.2286992766948633, "grad_norm": 0.5459304743428071, "learning_rate": 8.999926197308649e-06, "loss": 0.4502, "step": 7462 }, { "epoch": 0.22872992521760452, "grad_norm": 0.5148107971950255, "learning_rate": 8.999628377132298e-06, "loss": 0.4564, "step": 7463 }, { "epoch": 0.22876057374034572, "grad_norm": 1.3061227057724185, "learning_rate": 8.99933051754659e-06, "loss": 0.6564, "step": 7464 }, { "epoch": 0.22879122226308693, "grad_norm": 1.2456985787825965, "learning_rate": 8.999032618554453e-06, "loss": 0.7089, "step": 7465 }, { "epoch": 0.22882187078582814, "grad_norm": 1.283606497522142, "learning_rate": 8.998734680158824e-06, "loss": 0.702, "step": 7466 }, { "epoch": 0.2288525193085693, "grad_norm": 1.2536996095265596, "learning_rate": 8.99843670236264e-06, "loss": 0.7465, "step": 7467 }, { "epoch": 0.22888316783131052, "grad_norm": 0.4784094564967611, "learning_rate": 8.998138685168836e-06, "loss": 0.4257, "step": 7468 }, { "epoch": 0.22891381635405172, "grad_norm": 1.3493553033621537, "learning_rate": 8.997840628580348e-06, "loss": 0.6966, "step": 7469 }, { "epoch": 0.22894446487679293, "grad_norm": 1.3252727083359104, "learning_rate": 8.997542532600114e-06, "loss": 0.7448, "step": 7470 }, { "epoch": 0.22897511339953414, "grad_norm": 1.2760747761304658, "learning_rate": 8.99724439723107e-06, "loss": 0.6791, "step": 7471 }, { "epoch": 0.22900576192227534, "grad_norm": 1.2790156486317095, "learning_rate": 8.996946222476156e-06, "loss": 0.6803, "step": 7472 }, { "epoch": 0.22903641044501655, "grad_norm": 0.5139341349548082, "learning_rate": 8.996648008338307e-06, "loss": 0.4463, "step": 7473 }, { "epoch": 0.22906705896775775, "grad_norm": 1.2561050339447908, "learning_rate": 8.996349754820461e-06, "loss": 0.6868, "step": 7474 }, { "epoch": 0.22909770749049896, "grad_norm": 1.579952845360554, "learning_rate": 8.996051461925562e-06, "loss": 0.7374, "step": 7475 }, { "epoch": 0.22912835601324016, "grad_norm": 1.32792025921486, "learning_rate": 8.995753129656542e-06, "loss": 0.6876, "step": 7476 }, { "epoch": 0.22915900453598137, "grad_norm": 0.46284518879683045, "learning_rate": 8.995454758016345e-06, "loss": 0.4372, "step": 7477 }, { "epoch": 0.22918965305872258, "grad_norm": 1.3840995351522862, "learning_rate": 8.99515634700791e-06, "loss": 0.6994, "step": 7478 }, { "epoch": 0.22922030158146378, "grad_norm": 1.3550197718442771, "learning_rate": 8.994857896634178e-06, "loss": 0.6135, "step": 7479 }, { "epoch": 0.229250950104205, "grad_norm": 1.301761530679448, "learning_rate": 8.994559406898088e-06, "loss": 0.7096, "step": 7480 }, { "epoch": 0.2292815986269462, "grad_norm": 1.3948894494334956, "learning_rate": 8.994260877802585e-06, "loss": 0.7246, "step": 7481 }, { "epoch": 0.2293122471496874, "grad_norm": 1.2079921627132135, "learning_rate": 8.993962309350605e-06, "loss": 0.6988, "step": 7482 }, { "epoch": 0.22934289567242858, "grad_norm": 1.2809043598700625, "learning_rate": 8.993663701545091e-06, "loss": 0.6584, "step": 7483 }, { "epoch": 0.22937354419516978, "grad_norm": 1.2353436193920835, "learning_rate": 8.993365054388989e-06, "loss": 0.7345, "step": 7484 }, { "epoch": 0.229404192717911, "grad_norm": 1.3934919391764233, "learning_rate": 8.99306636788524e-06, "loss": 0.7165, "step": 7485 }, { "epoch": 0.2294348412406522, "grad_norm": 1.160008009442718, "learning_rate": 8.992767642036786e-06, "loss": 0.7371, "step": 7486 }, { "epoch": 0.2294654897633934, "grad_norm": 1.406486782392055, "learning_rate": 8.992468876846569e-06, "loss": 0.7539, "step": 7487 }, { "epoch": 0.2294961382861346, "grad_norm": 1.2938917386567137, "learning_rate": 8.992170072317536e-06, "loss": 0.7645, "step": 7488 }, { "epoch": 0.2295267868088758, "grad_norm": 0.4869515838134801, "learning_rate": 8.99187122845263e-06, "loss": 0.4487, "step": 7489 }, { "epoch": 0.22955743533161702, "grad_norm": 1.2374749782005579, "learning_rate": 8.991572345254796e-06, "loss": 0.7322, "step": 7490 }, { "epoch": 0.22958808385435822, "grad_norm": 1.2927811712558726, "learning_rate": 8.991273422726975e-06, "loss": 0.7291, "step": 7491 }, { "epoch": 0.22961873237709943, "grad_norm": 1.173949242258385, "learning_rate": 8.990974460872119e-06, "loss": 0.6639, "step": 7492 }, { "epoch": 0.22964938089984063, "grad_norm": 1.3476120322754699, "learning_rate": 8.99067545969317e-06, "loss": 0.6876, "step": 7493 }, { "epoch": 0.22968002942258184, "grad_norm": 1.3650603793899092, "learning_rate": 8.990376419193074e-06, "loss": 0.7126, "step": 7494 }, { "epoch": 0.22971067794532304, "grad_norm": 1.435180121940794, "learning_rate": 8.990077339374778e-06, "loss": 0.6839, "step": 7495 }, { "epoch": 0.22974132646806425, "grad_norm": 1.2340369137573526, "learning_rate": 8.98977822024123e-06, "loss": 0.7685, "step": 7496 }, { "epoch": 0.22977197499080546, "grad_norm": 1.2445326488716513, "learning_rate": 8.989479061795377e-06, "loss": 0.7183, "step": 7497 }, { "epoch": 0.22980262351354663, "grad_norm": 1.222656873310766, "learning_rate": 8.989179864040166e-06, "loss": 0.7462, "step": 7498 }, { "epoch": 0.22983327203628784, "grad_norm": 1.2078441330121967, "learning_rate": 8.988880626978543e-06, "loss": 0.6663, "step": 7499 }, { "epoch": 0.22986392055902904, "grad_norm": 1.1838814516991925, "learning_rate": 8.98858135061346e-06, "loss": 0.6672, "step": 7500 }, { "epoch": 0.22989456908177025, "grad_norm": 1.33735019472711, "learning_rate": 8.988282034947864e-06, "loss": 0.76, "step": 7501 }, { "epoch": 0.22992521760451146, "grad_norm": 0.5170302540031291, "learning_rate": 8.987982679984704e-06, "loss": 0.4584, "step": 7502 }, { "epoch": 0.22995586612725266, "grad_norm": 0.47353927058002904, "learning_rate": 8.987683285726931e-06, "loss": 0.4315, "step": 7503 }, { "epoch": 0.22998651464999387, "grad_norm": 1.1798377969763747, "learning_rate": 8.987383852177497e-06, "loss": 0.6849, "step": 7504 }, { "epoch": 0.23001716317273507, "grad_norm": 1.309096996165273, "learning_rate": 8.987084379339345e-06, "loss": 0.6528, "step": 7505 }, { "epoch": 0.23004781169547628, "grad_norm": 1.2107835781129572, "learning_rate": 8.986784867215433e-06, "loss": 0.72, "step": 7506 }, { "epoch": 0.23007846021821748, "grad_norm": 1.3027417160476802, "learning_rate": 8.98648531580871e-06, "loss": 0.6949, "step": 7507 }, { "epoch": 0.2301091087409587, "grad_norm": 1.6367130572172532, "learning_rate": 8.986185725122125e-06, "loss": 0.7461, "step": 7508 }, { "epoch": 0.2301397572636999, "grad_norm": 1.1484422190059715, "learning_rate": 8.985886095158634e-06, "loss": 0.7038, "step": 7509 }, { "epoch": 0.2301704057864411, "grad_norm": 1.2640496528749208, "learning_rate": 8.985586425921187e-06, "loss": 0.7092, "step": 7510 }, { "epoch": 0.2302010543091823, "grad_norm": 1.2339780479210594, "learning_rate": 8.985286717412737e-06, "loss": 0.662, "step": 7511 }, { "epoch": 0.2302317028319235, "grad_norm": 1.3436770680788335, "learning_rate": 8.984986969636238e-06, "loss": 0.627, "step": 7512 }, { "epoch": 0.23026235135466472, "grad_norm": 1.4413129862626652, "learning_rate": 8.984687182594642e-06, "loss": 0.7471, "step": 7513 }, { "epoch": 0.2302929998774059, "grad_norm": 1.166884795575826, "learning_rate": 8.984387356290905e-06, "loss": 0.7537, "step": 7514 }, { "epoch": 0.2303236484001471, "grad_norm": 1.36367570715011, "learning_rate": 8.984087490727978e-06, "loss": 0.6528, "step": 7515 }, { "epoch": 0.2303542969228883, "grad_norm": 1.2887648589674279, "learning_rate": 8.983787585908819e-06, "loss": 0.6403, "step": 7516 }, { "epoch": 0.2303849454456295, "grad_norm": 1.3374290289153963, "learning_rate": 8.98348764183638e-06, "loss": 0.8432, "step": 7517 }, { "epoch": 0.23041559396837072, "grad_norm": 1.3568445533880953, "learning_rate": 8.983187658513618e-06, "loss": 0.7575, "step": 7518 }, { "epoch": 0.23044624249111192, "grad_norm": 1.1592858712900072, "learning_rate": 8.982887635943492e-06, "loss": 0.6734, "step": 7519 }, { "epoch": 0.23047689101385313, "grad_norm": 1.5342337861409858, "learning_rate": 8.982587574128953e-06, "loss": 0.6679, "step": 7520 }, { "epoch": 0.23050753953659434, "grad_norm": 3.421205910633264, "learning_rate": 8.98228747307296e-06, "loss": 0.6337, "step": 7521 }, { "epoch": 0.23053818805933554, "grad_norm": 0.6468751359280138, "learning_rate": 8.981987332778468e-06, "loss": 0.4516, "step": 7522 }, { "epoch": 0.23056883658207675, "grad_norm": 1.3249515976351827, "learning_rate": 8.981687153248438e-06, "loss": 0.6951, "step": 7523 }, { "epoch": 0.23059948510481795, "grad_norm": 1.1614718990213693, "learning_rate": 8.981386934485825e-06, "loss": 0.7036, "step": 7524 }, { "epoch": 0.23063013362755916, "grad_norm": 1.3720592075513447, "learning_rate": 8.98108667649359e-06, "loss": 0.7456, "step": 7525 }, { "epoch": 0.23066078215030036, "grad_norm": 1.2992560998659344, "learning_rate": 8.980786379274685e-06, "loss": 0.7432, "step": 7526 }, { "epoch": 0.23069143067304157, "grad_norm": 1.2688940918081644, "learning_rate": 8.980486042832076e-06, "loss": 0.6356, "step": 7527 }, { "epoch": 0.23072207919578278, "grad_norm": 1.2916955029613937, "learning_rate": 8.98018566716872e-06, "loss": 0.6657, "step": 7528 }, { "epoch": 0.23075272771852395, "grad_norm": 1.256514362228521, "learning_rate": 8.979885252287575e-06, "loss": 0.6648, "step": 7529 }, { "epoch": 0.23078337624126516, "grad_norm": 1.276212915708054, "learning_rate": 8.9795847981916e-06, "loss": 0.7349, "step": 7530 }, { "epoch": 0.23081402476400636, "grad_norm": 1.3896934112409067, "learning_rate": 8.979284304883762e-06, "loss": 0.6909, "step": 7531 }, { "epoch": 0.23084467328674757, "grad_norm": 0.6140998519237886, "learning_rate": 8.978983772367015e-06, "loss": 0.4159, "step": 7532 }, { "epoch": 0.23087532180948878, "grad_norm": 1.161310507733596, "learning_rate": 8.978683200644325e-06, "loss": 0.6527, "step": 7533 }, { "epoch": 0.23090597033222998, "grad_norm": 1.5371437181680823, "learning_rate": 8.97838258971865e-06, "loss": 0.6972, "step": 7534 }, { "epoch": 0.2309366188549712, "grad_norm": 1.3584202317535388, "learning_rate": 8.978081939592953e-06, "loss": 0.762, "step": 7535 }, { "epoch": 0.2309672673777124, "grad_norm": 1.3068048239030474, "learning_rate": 8.9777812502702e-06, "loss": 0.7163, "step": 7536 }, { "epoch": 0.2309979159004536, "grad_norm": 1.1750691077993267, "learning_rate": 8.977480521753346e-06, "loss": 0.7259, "step": 7537 }, { "epoch": 0.2310285644231948, "grad_norm": 1.2238544506920763, "learning_rate": 8.977179754045362e-06, "loss": 0.7394, "step": 7538 }, { "epoch": 0.231059212945936, "grad_norm": 1.3436642084715078, "learning_rate": 8.976878947149206e-06, "loss": 0.7004, "step": 7539 }, { "epoch": 0.23108986146867722, "grad_norm": 1.2543374242187635, "learning_rate": 8.976578101067845e-06, "loss": 0.6521, "step": 7540 }, { "epoch": 0.23112050999141842, "grad_norm": 1.419643923853905, "learning_rate": 8.976277215804243e-06, "loss": 0.6113, "step": 7541 }, { "epoch": 0.23115115851415963, "grad_norm": 1.171541409831118, "learning_rate": 8.975976291361364e-06, "loss": 0.6832, "step": 7542 }, { "epoch": 0.23118180703690083, "grad_norm": 1.3206272041363851, "learning_rate": 8.975675327742173e-06, "loss": 0.6705, "step": 7543 }, { "epoch": 0.23121245555964204, "grad_norm": 1.973707914862169, "learning_rate": 8.975374324949638e-06, "loss": 0.6014, "step": 7544 }, { "epoch": 0.23124310408238322, "grad_norm": 1.3135450286172752, "learning_rate": 8.975073282986719e-06, "loss": 0.7122, "step": 7545 }, { "epoch": 0.23127375260512442, "grad_norm": 1.349458647803026, "learning_rate": 8.974772201856387e-06, "loss": 0.7349, "step": 7546 }, { "epoch": 0.23130440112786563, "grad_norm": 1.4500301990830011, "learning_rate": 8.974471081561608e-06, "loss": 0.756, "step": 7547 }, { "epoch": 0.23133504965060683, "grad_norm": 1.2857757072096918, "learning_rate": 8.97416992210535e-06, "loss": 0.7754, "step": 7548 }, { "epoch": 0.23136569817334804, "grad_norm": 0.6423798286185878, "learning_rate": 8.973868723490578e-06, "loss": 0.4407, "step": 7549 }, { "epoch": 0.23139634669608924, "grad_norm": 1.3287337976722284, "learning_rate": 8.97356748572026e-06, "loss": 0.7322, "step": 7550 }, { "epoch": 0.23142699521883045, "grad_norm": 0.46188465144268676, "learning_rate": 8.973266208797365e-06, "loss": 0.4299, "step": 7551 }, { "epoch": 0.23145764374157166, "grad_norm": 1.3101923024408868, "learning_rate": 8.972964892724862e-06, "loss": 0.6908, "step": 7552 }, { "epoch": 0.23148829226431286, "grad_norm": 1.4428769815605917, "learning_rate": 8.97266353750572e-06, "loss": 0.7598, "step": 7553 }, { "epoch": 0.23151894078705407, "grad_norm": 1.2727866378534867, "learning_rate": 8.972362143142905e-06, "loss": 0.7102, "step": 7554 }, { "epoch": 0.23154958930979527, "grad_norm": 0.5632452567151962, "learning_rate": 8.972060709639393e-06, "loss": 0.4496, "step": 7555 }, { "epoch": 0.23158023783253648, "grad_norm": 1.543712265851809, "learning_rate": 8.971759236998147e-06, "loss": 0.7211, "step": 7556 }, { "epoch": 0.23161088635527768, "grad_norm": 1.1880892404113013, "learning_rate": 8.971457725222143e-06, "loss": 0.6383, "step": 7557 }, { "epoch": 0.2316415348780189, "grad_norm": 1.1997422412289231, "learning_rate": 8.971156174314349e-06, "loss": 0.6779, "step": 7558 }, { "epoch": 0.2316721834007601, "grad_norm": 0.5449096407677076, "learning_rate": 8.970854584277738e-06, "loss": 0.448, "step": 7559 }, { "epoch": 0.23170283192350127, "grad_norm": 1.3617863056623563, "learning_rate": 8.970552955115282e-06, "loss": 0.6767, "step": 7560 }, { "epoch": 0.23173348044624248, "grad_norm": 1.3003129982096018, "learning_rate": 8.970251286829949e-06, "loss": 0.7111, "step": 7561 }, { "epoch": 0.23176412896898368, "grad_norm": 1.3293717319599239, "learning_rate": 8.969949579424715e-06, "loss": 0.6537, "step": 7562 }, { "epoch": 0.2317947774917249, "grad_norm": 1.194351713553034, "learning_rate": 8.969647832902552e-06, "loss": 0.6231, "step": 7563 }, { "epoch": 0.2318254260144661, "grad_norm": 1.2117110262568023, "learning_rate": 8.969346047266436e-06, "loss": 0.7125, "step": 7564 }, { "epoch": 0.2318560745372073, "grad_norm": 1.2688427460457878, "learning_rate": 8.969044222519333e-06, "loss": 0.6702, "step": 7565 }, { "epoch": 0.2318867230599485, "grad_norm": 1.2298643014068678, "learning_rate": 8.968742358664227e-06, "loss": 0.675, "step": 7566 }, { "epoch": 0.2319173715826897, "grad_norm": 1.2010545466661202, "learning_rate": 8.968440455704085e-06, "loss": 0.7021, "step": 7567 }, { "epoch": 0.23194802010543092, "grad_norm": 1.1366508712038224, "learning_rate": 8.968138513641882e-06, "loss": 0.6505, "step": 7568 }, { "epoch": 0.23197866862817212, "grad_norm": 0.6204960272892824, "learning_rate": 8.967836532480595e-06, "loss": 0.4662, "step": 7569 }, { "epoch": 0.23200931715091333, "grad_norm": 1.278764551311432, "learning_rate": 8.967534512223202e-06, "loss": 0.7248, "step": 7570 }, { "epoch": 0.23203996567365454, "grad_norm": 1.2156743347335737, "learning_rate": 8.967232452872676e-06, "loss": 0.687, "step": 7571 }, { "epoch": 0.23207061419639574, "grad_norm": 1.3717244500371506, "learning_rate": 8.966930354431991e-06, "loss": 0.7773, "step": 7572 }, { "epoch": 0.23210126271913695, "grad_norm": 1.3984592103803484, "learning_rate": 8.966628216904128e-06, "loss": 0.707, "step": 7573 }, { "epoch": 0.23213191124187815, "grad_norm": 1.1926637362261165, "learning_rate": 8.966326040292062e-06, "loss": 0.6854, "step": 7574 }, { "epoch": 0.23216255976461936, "grad_norm": 0.4487121834010065, "learning_rate": 8.966023824598771e-06, "loss": 0.4598, "step": 7575 }, { "epoch": 0.23219320828736054, "grad_norm": 0.4882869295566622, "learning_rate": 8.965721569827233e-06, "loss": 0.4492, "step": 7576 }, { "epoch": 0.23222385681010174, "grad_norm": 1.173744907008067, "learning_rate": 8.965419275980425e-06, "loss": 0.6942, "step": 7577 }, { "epoch": 0.23225450533284295, "grad_norm": 1.194399872346848, "learning_rate": 8.965116943061325e-06, "loss": 0.6954, "step": 7578 }, { "epoch": 0.23228515385558415, "grad_norm": 1.3083044769205872, "learning_rate": 8.964814571072916e-06, "loss": 0.7329, "step": 7579 }, { "epoch": 0.23231580237832536, "grad_norm": 1.197589734299377, "learning_rate": 8.964512160018173e-06, "loss": 0.6667, "step": 7580 }, { "epoch": 0.23234645090106656, "grad_norm": 1.1784136590848961, "learning_rate": 8.964209709900078e-06, "loss": 0.7245, "step": 7581 }, { "epoch": 0.23237709942380777, "grad_norm": 1.2699377227169726, "learning_rate": 8.963907220721609e-06, "loss": 0.7275, "step": 7582 }, { "epoch": 0.23240774794654898, "grad_norm": 1.1033894097805532, "learning_rate": 8.963604692485748e-06, "loss": 0.6666, "step": 7583 }, { "epoch": 0.23243839646929018, "grad_norm": 1.2487016577762116, "learning_rate": 8.963302125195476e-06, "loss": 0.6296, "step": 7584 }, { "epoch": 0.2324690449920314, "grad_norm": 1.168012700756869, "learning_rate": 8.962999518853775e-06, "loss": 0.717, "step": 7585 }, { "epoch": 0.2324996935147726, "grad_norm": 1.2790726944516635, "learning_rate": 8.962696873463625e-06, "loss": 0.7556, "step": 7586 }, { "epoch": 0.2325303420375138, "grad_norm": 1.1527772154519909, "learning_rate": 8.96239418902801e-06, "loss": 0.7359, "step": 7587 }, { "epoch": 0.232560990560255, "grad_norm": 1.2451378457835214, "learning_rate": 8.962091465549912e-06, "loss": 0.6574, "step": 7588 }, { "epoch": 0.2325916390829962, "grad_norm": 0.5620930565648026, "learning_rate": 8.96178870303231e-06, "loss": 0.4732, "step": 7589 }, { "epoch": 0.23262228760573742, "grad_norm": 1.4310134193387833, "learning_rate": 8.961485901478193e-06, "loss": 0.7495, "step": 7590 }, { "epoch": 0.2326529361284786, "grad_norm": 1.2347869902102613, "learning_rate": 8.96118306089054e-06, "loss": 0.7096, "step": 7591 }, { "epoch": 0.2326835846512198, "grad_norm": 1.4079587302875667, "learning_rate": 8.960880181272338e-06, "loss": 0.7685, "step": 7592 }, { "epoch": 0.232714233173961, "grad_norm": 1.1778880527283142, "learning_rate": 8.960577262626569e-06, "loss": 0.6761, "step": 7593 }, { "epoch": 0.2327448816967022, "grad_norm": 1.2588447092085229, "learning_rate": 8.96027430495622e-06, "loss": 0.7008, "step": 7594 }, { "epoch": 0.23277553021944342, "grad_norm": 1.2097825724857354, "learning_rate": 8.959971308264275e-06, "loss": 0.6548, "step": 7595 }, { "epoch": 0.23280617874218462, "grad_norm": 1.1930418339544293, "learning_rate": 8.959668272553717e-06, "loss": 0.6366, "step": 7596 }, { "epoch": 0.23283682726492583, "grad_norm": 1.2514129599831962, "learning_rate": 8.959365197827537e-06, "loss": 0.661, "step": 7597 }, { "epoch": 0.23286747578766703, "grad_norm": 0.517831807273158, "learning_rate": 8.95906208408872e-06, "loss": 0.4303, "step": 7598 }, { "epoch": 0.23289812431040824, "grad_norm": 1.3707969671670361, "learning_rate": 8.958758931340247e-06, "loss": 0.7648, "step": 7599 }, { "epoch": 0.23292877283314944, "grad_norm": 1.2501459993521982, "learning_rate": 8.958455739585113e-06, "loss": 0.6208, "step": 7600 }, { "epoch": 0.23295942135589065, "grad_norm": 1.2766148414309737, "learning_rate": 8.958152508826299e-06, "loss": 0.7296, "step": 7601 }, { "epoch": 0.23299006987863186, "grad_norm": 1.2526854947713826, "learning_rate": 8.957849239066797e-06, "loss": 0.6297, "step": 7602 }, { "epoch": 0.23302071840137306, "grad_norm": 1.1929136068929451, "learning_rate": 8.957545930309595e-06, "loss": 0.733, "step": 7603 }, { "epoch": 0.23305136692411427, "grad_norm": 1.1698687278895823, "learning_rate": 8.95724258255768e-06, "loss": 0.6796, "step": 7604 }, { "epoch": 0.23308201544685547, "grad_norm": 1.3234751051219233, "learning_rate": 8.95693919581404e-06, "loss": 0.6954, "step": 7605 }, { "epoch": 0.23311266396959668, "grad_norm": 1.234869034079375, "learning_rate": 8.956635770081665e-06, "loss": 0.7016, "step": 7606 }, { "epoch": 0.23314331249233786, "grad_norm": 1.304567967725099, "learning_rate": 8.956332305363546e-06, "loss": 0.7842, "step": 7607 }, { "epoch": 0.23317396101507906, "grad_norm": 1.3990676085723184, "learning_rate": 8.956028801662675e-06, "loss": 0.8179, "step": 7608 }, { "epoch": 0.23320460953782027, "grad_norm": 0.4994440083381749, "learning_rate": 8.955725258982038e-06, "loss": 0.4474, "step": 7609 }, { "epoch": 0.23323525806056147, "grad_norm": 1.1929646869657737, "learning_rate": 8.955421677324628e-06, "loss": 0.7031, "step": 7610 }, { "epoch": 0.23326590658330268, "grad_norm": 1.54397591890012, "learning_rate": 8.955118056693436e-06, "loss": 0.7228, "step": 7611 }, { "epoch": 0.23329655510604388, "grad_norm": 1.157626583724836, "learning_rate": 8.954814397091454e-06, "loss": 0.7256, "step": 7612 }, { "epoch": 0.2333272036287851, "grad_norm": 1.223387873117667, "learning_rate": 8.954510698521674e-06, "loss": 0.6787, "step": 7613 }, { "epoch": 0.2333578521515263, "grad_norm": 1.4026295450011994, "learning_rate": 8.954206960987088e-06, "loss": 0.7057, "step": 7614 }, { "epoch": 0.2333885006742675, "grad_norm": 0.48553424424037306, "learning_rate": 8.953903184490688e-06, "loss": 0.4495, "step": 7615 }, { "epoch": 0.2334191491970087, "grad_norm": 1.3576487997673832, "learning_rate": 8.953599369035471e-06, "loss": 0.8001, "step": 7616 }, { "epoch": 0.2334497977197499, "grad_norm": 1.498300195406316, "learning_rate": 8.953295514624428e-06, "loss": 0.6251, "step": 7617 }, { "epoch": 0.23348044624249112, "grad_norm": 0.45587630436940973, "learning_rate": 8.95299162126055e-06, "loss": 0.4418, "step": 7618 }, { "epoch": 0.23351109476523232, "grad_norm": 1.3727187807197156, "learning_rate": 8.952687688946836e-06, "loss": 0.7655, "step": 7619 }, { "epoch": 0.23354174328797353, "grad_norm": 1.2719730826452609, "learning_rate": 8.952383717686277e-06, "loss": 0.6704, "step": 7620 }, { "epoch": 0.23357239181071474, "grad_norm": 1.270244067257774, "learning_rate": 8.952079707481872e-06, "loss": 0.6004, "step": 7621 }, { "epoch": 0.2336030403334559, "grad_norm": 1.2547702555944011, "learning_rate": 8.951775658336612e-06, "loss": 0.6338, "step": 7622 }, { "epoch": 0.23363368885619712, "grad_norm": 1.2573038214240742, "learning_rate": 8.951471570253498e-06, "loss": 0.6785, "step": 7623 }, { "epoch": 0.23366433737893832, "grad_norm": 1.1751247711677124, "learning_rate": 8.951167443235522e-06, "loss": 0.6936, "step": 7624 }, { "epoch": 0.23369498590167953, "grad_norm": 1.2983459804210673, "learning_rate": 8.950863277285683e-06, "loss": 0.7008, "step": 7625 }, { "epoch": 0.23372563442442074, "grad_norm": 1.1988219062840064, "learning_rate": 8.950559072406977e-06, "loss": 0.6871, "step": 7626 }, { "epoch": 0.23375628294716194, "grad_norm": 1.2915815015603054, "learning_rate": 8.950254828602402e-06, "loss": 0.6611, "step": 7627 }, { "epoch": 0.23378693146990315, "grad_norm": 1.2013275846353122, "learning_rate": 8.949950545874954e-06, "loss": 0.6316, "step": 7628 }, { "epoch": 0.23381757999264435, "grad_norm": 1.1979625664176081, "learning_rate": 8.949646224227635e-06, "loss": 0.685, "step": 7629 }, { "epoch": 0.23384822851538556, "grad_norm": 1.165812545089733, "learning_rate": 8.94934186366344e-06, "loss": 0.6914, "step": 7630 }, { "epoch": 0.23387887703812676, "grad_norm": 1.270672099551615, "learning_rate": 8.94903746418537e-06, "loss": 0.7687, "step": 7631 }, { "epoch": 0.23390952556086797, "grad_norm": 1.4300390747369331, "learning_rate": 8.94873302579642e-06, "loss": 0.693, "step": 7632 }, { "epoch": 0.23394017408360918, "grad_norm": 1.3251205538071829, "learning_rate": 8.948428548499597e-06, "loss": 0.7269, "step": 7633 }, { "epoch": 0.23397082260635038, "grad_norm": 1.289684591570354, "learning_rate": 8.948124032297897e-06, "loss": 0.7083, "step": 7634 }, { "epoch": 0.2340014711290916, "grad_norm": 1.2904322101737888, "learning_rate": 8.94781947719432e-06, "loss": 0.6374, "step": 7635 }, { "epoch": 0.2340321196518328, "grad_norm": 1.3394474754254917, "learning_rate": 8.947514883191868e-06, "loss": 0.5992, "step": 7636 }, { "epoch": 0.234062768174574, "grad_norm": 1.2637264691625922, "learning_rate": 8.94721025029354e-06, "loss": 0.6315, "step": 7637 }, { "epoch": 0.23409341669731518, "grad_norm": 1.4663912049639352, "learning_rate": 8.94690557850234e-06, "loss": 0.7918, "step": 7638 }, { "epoch": 0.23412406522005638, "grad_norm": 1.282905276077117, "learning_rate": 8.946600867821272e-06, "loss": 0.7974, "step": 7639 }, { "epoch": 0.2341547137427976, "grad_norm": 0.6288703220889443, "learning_rate": 8.946296118253333e-06, "loss": 0.4819, "step": 7640 }, { "epoch": 0.2341853622655388, "grad_norm": 1.3251509463635194, "learning_rate": 8.945991329801528e-06, "loss": 0.8112, "step": 7641 }, { "epoch": 0.23421601078828, "grad_norm": 1.407471529303757, "learning_rate": 8.945686502468865e-06, "loss": 0.7582, "step": 7642 }, { "epoch": 0.2342466593110212, "grad_norm": 1.1489288077385624, "learning_rate": 8.94538163625834e-06, "loss": 0.6976, "step": 7643 }, { "epoch": 0.2342773078337624, "grad_norm": 1.448036340511755, "learning_rate": 8.945076731172961e-06, "loss": 0.6258, "step": 7644 }, { "epoch": 0.23430795635650362, "grad_norm": 1.3603212462359089, "learning_rate": 8.944771787215731e-06, "loss": 0.6872, "step": 7645 }, { "epoch": 0.23433860487924482, "grad_norm": 1.6388090182878416, "learning_rate": 8.944466804389657e-06, "loss": 0.7342, "step": 7646 }, { "epoch": 0.23436925340198603, "grad_norm": 1.2086999096641446, "learning_rate": 8.94416178269774e-06, "loss": 0.7377, "step": 7647 }, { "epoch": 0.23439990192472723, "grad_norm": 1.4183222512560227, "learning_rate": 8.94385672214299e-06, "loss": 0.7231, "step": 7648 }, { "epoch": 0.23443055044746844, "grad_norm": 1.365330315461527, "learning_rate": 8.94355162272841e-06, "loss": 0.7192, "step": 7649 }, { "epoch": 0.23446119897020964, "grad_norm": 1.2598637267437813, "learning_rate": 8.943246484457006e-06, "loss": 0.6945, "step": 7650 }, { "epoch": 0.23449184749295085, "grad_norm": 1.2439342703833027, "learning_rate": 8.942941307331786e-06, "loss": 0.542, "step": 7651 }, { "epoch": 0.23452249601569206, "grad_norm": 1.3084670768513305, "learning_rate": 8.942636091355756e-06, "loss": 0.681, "step": 7652 }, { "epoch": 0.23455314453843326, "grad_norm": 1.2869846358451964, "learning_rate": 8.942330836531925e-06, "loss": 0.7147, "step": 7653 }, { "epoch": 0.23458379306117444, "grad_norm": 1.3031473966335354, "learning_rate": 8.9420255428633e-06, "loss": 0.636, "step": 7654 }, { "epoch": 0.23461444158391564, "grad_norm": 1.3611700711486034, "learning_rate": 8.941720210352886e-06, "loss": 0.6319, "step": 7655 }, { "epoch": 0.23464509010665685, "grad_norm": 1.3458204826352202, "learning_rate": 8.941414839003695e-06, "loss": 0.6977, "step": 7656 }, { "epoch": 0.23467573862939806, "grad_norm": 1.2958464122549318, "learning_rate": 8.941109428818737e-06, "loss": 0.7405, "step": 7657 }, { "epoch": 0.23470638715213926, "grad_norm": 1.3585054426264236, "learning_rate": 8.940803979801019e-06, "loss": 0.6844, "step": 7658 }, { "epoch": 0.23473703567488047, "grad_norm": 1.2382354331598813, "learning_rate": 8.940498491953549e-06, "loss": 0.7508, "step": 7659 }, { "epoch": 0.23476768419762167, "grad_norm": 1.3508850813388553, "learning_rate": 8.940192965279342e-06, "loss": 0.6502, "step": 7660 }, { "epoch": 0.23479833272036288, "grad_norm": 1.3876416466125734, "learning_rate": 8.939887399781404e-06, "loss": 0.6785, "step": 7661 }, { "epoch": 0.23482898124310408, "grad_norm": 1.1518160842531031, "learning_rate": 8.939581795462747e-06, "loss": 0.7017, "step": 7662 }, { "epoch": 0.2348596297658453, "grad_norm": 1.4224357822403273, "learning_rate": 8.939276152326384e-06, "loss": 0.6748, "step": 7663 }, { "epoch": 0.2348902782885865, "grad_norm": 0.6999846183392081, "learning_rate": 8.938970470375324e-06, "loss": 0.4865, "step": 7664 }, { "epoch": 0.2349209268113277, "grad_norm": 1.3593279955650115, "learning_rate": 8.93866474961258e-06, "loss": 0.7318, "step": 7665 }, { "epoch": 0.2349515753340689, "grad_norm": 1.3766284647780547, "learning_rate": 8.938358990041164e-06, "loss": 0.693, "step": 7666 }, { "epoch": 0.2349822238568101, "grad_norm": 1.158877105056088, "learning_rate": 8.938053191664091e-06, "loss": 0.5306, "step": 7667 }, { "epoch": 0.23501287237955132, "grad_norm": 0.445715173603036, "learning_rate": 8.937747354484372e-06, "loss": 0.4531, "step": 7668 }, { "epoch": 0.2350435209022925, "grad_norm": 1.339519375362678, "learning_rate": 8.93744147850502e-06, "loss": 0.6963, "step": 7669 }, { "epoch": 0.2350741694250337, "grad_norm": 0.5203708589595863, "learning_rate": 8.93713556372905e-06, "loss": 0.4537, "step": 7670 }, { "epoch": 0.2351048179477749, "grad_norm": 0.5045900882865355, "learning_rate": 8.936829610159477e-06, "loss": 0.4737, "step": 7671 }, { "epoch": 0.2351354664705161, "grad_norm": 0.4824817040294328, "learning_rate": 8.936523617799312e-06, "loss": 0.4475, "step": 7672 }, { "epoch": 0.23516611499325732, "grad_norm": 1.2259728567756785, "learning_rate": 8.936217586651574e-06, "loss": 0.7297, "step": 7673 }, { "epoch": 0.23519676351599852, "grad_norm": 1.1275717205055473, "learning_rate": 8.935911516719278e-06, "loss": 0.6595, "step": 7674 }, { "epoch": 0.23522741203873973, "grad_norm": 1.3255932492378872, "learning_rate": 8.935605408005437e-06, "loss": 0.6931, "step": 7675 }, { "epoch": 0.23525806056148094, "grad_norm": 1.2162577163204435, "learning_rate": 8.93529926051307e-06, "loss": 0.7296, "step": 7676 }, { "epoch": 0.23528870908422214, "grad_norm": 1.2117761687578008, "learning_rate": 8.934993074245193e-06, "loss": 0.6205, "step": 7677 }, { "epoch": 0.23531935760696335, "grad_norm": 1.4551573927968422, "learning_rate": 8.93468684920482e-06, "loss": 0.6882, "step": 7678 }, { "epoch": 0.23535000612970455, "grad_norm": 1.414074077577746, "learning_rate": 8.934380585394972e-06, "loss": 0.756, "step": 7679 }, { "epoch": 0.23538065465244576, "grad_norm": 1.4860689466319634, "learning_rate": 8.934074282818667e-06, "loss": 0.7866, "step": 7680 }, { "epoch": 0.23541130317518696, "grad_norm": 1.2369339866975102, "learning_rate": 8.93376794147892e-06, "loss": 0.6341, "step": 7681 }, { "epoch": 0.23544195169792817, "grad_norm": 1.2097811415096746, "learning_rate": 8.933461561378752e-06, "loss": 0.5995, "step": 7682 }, { "epoch": 0.23547260022066938, "grad_norm": 1.3700186022476453, "learning_rate": 8.933155142521179e-06, "loss": 0.7892, "step": 7683 }, { "epoch": 0.23550324874341058, "grad_norm": 1.1891766803628308, "learning_rate": 8.932848684909223e-06, "loss": 0.5746, "step": 7684 }, { "epoch": 0.23553389726615176, "grad_norm": 1.2985828009666731, "learning_rate": 8.932542188545903e-06, "loss": 0.684, "step": 7685 }, { "epoch": 0.23556454578889297, "grad_norm": 1.164722569417507, "learning_rate": 8.93223565343424e-06, "loss": 0.6501, "step": 7686 }, { "epoch": 0.23559519431163417, "grad_norm": 1.0700863530862768, "learning_rate": 8.93192907957725e-06, "loss": 0.5811, "step": 7687 }, { "epoch": 0.23562584283437538, "grad_norm": 1.3708118005991559, "learning_rate": 8.931622466977959e-06, "loss": 0.7525, "step": 7688 }, { "epoch": 0.23565649135711658, "grad_norm": 1.1718558683336797, "learning_rate": 8.931315815639385e-06, "loss": 0.7042, "step": 7689 }, { "epoch": 0.2356871398798578, "grad_norm": 1.2522938511623043, "learning_rate": 8.93100912556455e-06, "loss": 0.7288, "step": 7690 }, { "epoch": 0.235717788402599, "grad_norm": 0.6810709366835764, "learning_rate": 8.930702396756476e-06, "loss": 0.4363, "step": 7691 }, { "epoch": 0.2357484369253402, "grad_norm": 1.245115925787663, "learning_rate": 8.930395629218187e-06, "loss": 0.8302, "step": 7692 }, { "epoch": 0.2357790854480814, "grad_norm": 0.5215711535999927, "learning_rate": 8.930088822952703e-06, "loss": 0.4705, "step": 7693 }, { "epoch": 0.2358097339708226, "grad_norm": 1.330298466793888, "learning_rate": 8.92978197796305e-06, "loss": 0.7399, "step": 7694 }, { "epoch": 0.23584038249356382, "grad_norm": 1.3095162349891654, "learning_rate": 8.92947509425225e-06, "loss": 0.6275, "step": 7695 }, { "epoch": 0.23587103101630502, "grad_norm": 0.52431308192457, "learning_rate": 8.929168171823323e-06, "loss": 0.423, "step": 7696 }, { "epoch": 0.23590167953904623, "grad_norm": 1.4226721494586954, "learning_rate": 8.928861210679298e-06, "loss": 0.6789, "step": 7697 }, { "epoch": 0.23593232806178743, "grad_norm": 1.131192050123007, "learning_rate": 8.928554210823201e-06, "loss": 0.7451, "step": 7698 }, { "epoch": 0.23596297658452864, "grad_norm": 1.3763834729921567, "learning_rate": 8.92824717225805e-06, "loss": 0.8346, "step": 7699 }, { "epoch": 0.23599362510726982, "grad_norm": 1.3024948062710857, "learning_rate": 8.927940094986879e-06, "loss": 0.6825, "step": 7700 }, { "epoch": 0.23602427363001102, "grad_norm": 1.316086344719439, "learning_rate": 8.927632979012707e-06, "loss": 0.7305, "step": 7701 }, { "epoch": 0.23605492215275223, "grad_norm": 0.5632622784899162, "learning_rate": 8.927325824338561e-06, "loss": 0.443, "step": 7702 }, { "epoch": 0.23608557067549343, "grad_norm": 0.5763488247798635, "learning_rate": 8.92701863096747e-06, "loss": 0.4594, "step": 7703 }, { "epoch": 0.23611621919823464, "grad_norm": 1.2190077567812767, "learning_rate": 8.92671139890246e-06, "loss": 0.6586, "step": 7704 }, { "epoch": 0.23614686772097584, "grad_norm": 1.3319244757236484, "learning_rate": 8.926404128146558e-06, "loss": 0.6813, "step": 7705 }, { "epoch": 0.23617751624371705, "grad_norm": 1.4318600068251963, "learning_rate": 8.92609681870279e-06, "loss": 0.6978, "step": 7706 }, { "epoch": 0.23620816476645826, "grad_norm": 1.3171347209763018, "learning_rate": 8.925789470574187e-06, "loss": 0.6983, "step": 7707 }, { "epoch": 0.23623881328919946, "grad_norm": 1.411931025519601, "learning_rate": 8.925482083763776e-06, "loss": 0.7565, "step": 7708 }, { "epoch": 0.23626946181194067, "grad_norm": 1.332450779387462, "learning_rate": 8.925174658274585e-06, "loss": 0.619, "step": 7709 }, { "epoch": 0.23630011033468187, "grad_norm": 1.3915959344576212, "learning_rate": 8.924867194109643e-06, "loss": 0.6363, "step": 7710 }, { "epoch": 0.23633075885742308, "grad_norm": 1.3600451052688125, "learning_rate": 8.924559691271983e-06, "loss": 0.7339, "step": 7711 }, { "epoch": 0.23636140738016428, "grad_norm": 1.384836033343521, "learning_rate": 8.92425214976463e-06, "loss": 0.725, "step": 7712 }, { "epoch": 0.2363920559029055, "grad_norm": 0.7185451136071236, "learning_rate": 8.923944569590617e-06, "loss": 0.4201, "step": 7713 }, { "epoch": 0.2364227044256467, "grad_norm": 1.4156830034892893, "learning_rate": 8.923636950752974e-06, "loss": 0.5949, "step": 7714 }, { "epoch": 0.2364533529483879, "grad_norm": 1.3703695784294374, "learning_rate": 8.923329293254732e-06, "loss": 0.7657, "step": 7715 }, { "epoch": 0.23648400147112908, "grad_norm": 1.4015512374449766, "learning_rate": 8.923021597098924e-06, "loss": 0.6981, "step": 7716 }, { "epoch": 0.23651464999387029, "grad_norm": 1.4662167029042732, "learning_rate": 8.922713862288579e-06, "loss": 0.7634, "step": 7717 }, { "epoch": 0.2365452985166115, "grad_norm": 1.3600437799649703, "learning_rate": 8.922406088826732e-06, "loss": 0.6461, "step": 7718 }, { "epoch": 0.2365759470393527, "grad_norm": 0.47035583452600377, "learning_rate": 8.922098276716413e-06, "loss": 0.4416, "step": 7719 }, { "epoch": 0.2366065955620939, "grad_norm": 1.2813613904720988, "learning_rate": 8.921790425960658e-06, "loss": 0.6535, "step": 7720 }, { "epoch": 0.2366372440848351, "grad_norm": 1.1977374731741555, "learning_rate": 8.921482536562495e-06, "loss": 0.6763, "step": 7721 }, { "epoch": 0.2366678926075763, "grad_norm": 0.5041282058867963, "learning_rate": 8.921174608524964e-06, "loss": 0.4444, "step": 7722 }, { "epoch": 0.23669854113031752, "grad_norm": 0.5259953038519035, "learning_rate": 8.920866641851094e-06, "loss": 0.456, "step": 7723 }, { "epoch": 0.23672918965305872, "grad_norm": 1.160287815325525, "learning_rate": 8.920558636543924e-06, "loss": 0.692, "step": 7724 }, { "epoch": 0.23675983817579993, "grad_norm": 1.2505695328509145, "learning_rate": 8.920250592606486e-06, "loss": 0.6965, "step": 7725 }, { "epoch": 0.23679048669854114, "grad_norm": 1.3530909012550487, "learning_rate": 8.919942510041817e-06, "loss": 0.6423, "step": 7726 }, { "epoch": 0.23682113522128234, "grad_norm": 1.449851933640323, "learning_rate": 8.91963438885295e-06, "loss": 0.8162, "step": 7727 }, { "epoch": 0.23685178374402355, "grad_norm": 1.4440026534933246, "learning_rate": 8.919326229042922e-06, "loss": 0.6931, "step": 7728 }, { "epoch": 0.23688243226676475, "grad_norm": 1.2476245697322756, "learning_rate": 8.91901803061477e-06, "loss": 0.7309, "step": 7729 }, { "epoch": 0.23691308078950596, "grad_norm": 0.4879285137024275, "learning_rate": 8.918709793571532e-06, "loss": 0.4594, "step": 7730 }, { "epoch": 0.23694372931224714, "grad_norm": 1.2187451000170197, "learning_rate": 8.918401517916243e-06, "loss": 0.6419, "step": 7731 }, { "epoch": 0.23697437783498834, "grad_norm": 1.3486119452569063, "learning_rate": 8.918093203651941e-06, "loss": 0.7616, "step": 7732 }, { "epoch": 0.23700502635772955, "grad_norm": 1.3071912493729918, "learning_rate": 8.917784850781665e-06, "loss": 0.704, "step": 7733 }, { "epoch": 0.23703567488047075, "grad_norm": 1.3400326892436465, "learning_rate": 8.917476459308452e-06, "loss": 0.7215, "step": 7734 }, { "epoch": 0.23706632340321196, "grad_norm": 1.447871957200679, "learning_rate": 8.917168029235341e-06, "loss": 0.7215, "step": 7735 }, { "epoch": 0.23709697192595316, "grad_norm": 1.4232688541718639, "learning_rate": 8.916859560565372e-06, "loss": 0.697, "step": 7736 }, { "epoch": 0.23712762044869437, "grad_norm": 1.2818521330090662, "learning_rate": 8.916551053301582e-06, "loss": 0.6938, "step": 7737 }, { "epoch": 0.23715826897143558, "grad_norm": 1.4671214360439693, "learning_rate": 8.916242507447013e-06, "loss": 0.6481, "step": 7738 }, { "epoch": 0.23718891749417678, "grad_norm": 1.1886651680347757, "learning_rate": 8.915933923004705e-06, "loss": 0.7564, "step": 7739 }, { "epoch": 0.237219566016918, "grad_norm": 1.3616605166335134, "learning_rate": 8.915625299977699e-06, "loss": 0.7632, "step": 7740 }, { "epoch": 0.2372502145396592, "grad_norm": 1.2965872912709375, "learning_rate": 8.915316638369033e-06, "loss": 0.6593, "step": 7741 }, { "epoch": 0.2372808630624004, "grad_norm": 0.5341700069246564, "learning_rate": 8.915007938181752e-06, "loss": 0.441, "step": 7742 }, { "epoch": 0.2373115115851416, "grad_norm": 1.158253544274013, "learning_rate": 8.914699199418895e-06, "loss": 0.8067, "step": 7743 }, { "epoch": 0.2373421601078828, "grad_norm": 0.46630981343508765, "learning_rate": 8.914390422083506e-06, "loss": 0.4671, "step": 7744 }, { "epoch": 0.23737280863062402, "grad_norm": 0.47138905901332356, "learning_rate": 8.914081606178627e-06, "loss": 0.4693, "step": 7745 }, { "epoch": 0.23740345715336522, "grad_norm": 1.2229812043032204, "learning_rate": 8.9137727517073e-06, "loss": 0.6439, "step": 7746 }, { "epoch": 0.2374341056761064, "grad_norm": 1.2940789711308935, "learning_rate": 8.913463858672566e-06, "loss": 0.816, "step": 7747 }, { "epoch": 0.2374647541988476, "grad_norm": 1.3800250219847043, "learning_rate": 8.913154927077475e-06, "loss": 0.6338, "step": 7748 }, { "epoch": 0.2374954027215888, "grad_norm": 1.2684308955758663, "learning_rate": 8.912845956925064e-06, "loss": 0.6285, "step": 7749 }, { "epoch": 0.23752605124433002, "grad_norm": 1.3021481845395686, "learning_rate": 8.912536948218385e-06, "loss": 0.722, "step": 7750 }, { "epoch": 0.23755669976707122, "grad_norm": 0.5104465476595866, "learning_rate": 8.912227900960475e-06, "loss": 0.4368, "step": 7751 }, { "epoch": 0.23758734828981243, "grad_norm": 0.44901843040828593, "learning_rate": 8.911918815154384e-06, "loss": 0.4301, "step": 7752 }, { "epoch": 0.23761799681255363, "grad_norm": 1.2913386828643634, "learning_rate": 8.911609690803154e-06, "loss": 0.5925, "step": 7753 }, { "epoch": 0.23764864533529484, "grad_norm": 1.4147906806803643, "learning_rate": 8.911300527909836e-06, "loss": 0.8058, "step": 7754 }, { "epoch": 0.23767929385803604, "grad_norm": 1.2422908843429827, "learning_rate": 8.91099132647747e-06, "loss": 0.7729, "step": 7755 }, { "epoch": 0.23770994238077725, "grad_norm": 0.4916712989754226, "learning_rate": 8.910682086509108e-06, "loss": 0.4446, "step": 7756 }, { "epoch": 0.23774059090351846, "grad_norm": 1.25613515316028, "learning_rate": 8.910372808007795e-06, "loss": 0.7327, "step": 7757 }, { "epoch": 0.23777123942625966, "grad_norm": 1.2791051568690421, "learning_rate": 8.910063490976576e-06, "loss": 0.6671, "step": 7758 }, { "epoch": 0.23780188794900087, "grad_norm": 1.2858330980594075, "learning_rate": 8.909754135418503e-06, "loss": 0.7175, "step": 7759 }, { "epoch": 0.23783253647174207, "grad_norm": 1.4134771402011364, "learning_rate": 8.909444741336622e-06, "loss": 0.6814, "step": 7760 }, { "epoch": 0.23786318499448328, "grad_norm": 1.2401777559331775, "learning_rate": 8.90913530873398e-06, "loss": 0.6597, "step": 7761 }, { "epoch": 0.23789383351722446, "grad_norm": 1.1570670223047077, "learning_rate": 8.90882583761363e-06, "loss": 0.5854, "step": 7762 }, { "epoch": 0.23792448203996566, "grad_norm": 1.5332400421871115, "learning_rate": 8.908516327978618e-06, "loss": 0.6499, "step": 7763 }, { "epoch": 0.23795513056270687, "grad_norm": 0.4810392750825773, "learning_rate": 8.908206779831995e-06, "loss": 0.4575, "step": 7764 }, { "epoch": 0.23798577908544807, "grad_norm": 1.2705002919710833, "learning_rate": 8.907897193176809e-06, "loss": 0.7322, "step": 7765 }, { "epoch": 0.23801642760818928, "grad_norm": 1.1480896135864311, "learning_rate": 8.907587568016112e-06, "loss": 0.6566, "step": 7766 }, { "epoch": 0.23804707613093049, "grad_norm": 1.1534075226889684, "learning_rate": 8.907277904352955e-06, "loss": 0.7054, "step": 7767 }, { "epoch": 0.2380777246536717, "grad_norm": 1.1340478312145592, "learning_rate": 8.906968202190392e-06, "loss": 0.6406, "step": 7768 }, { "epoch": 0.2381083731764129, "grad_norm": 1.562240274877685, "learning_rate": 8.906658461531469e-06, "loss": 0.7716, "step": 7769 }, { "epoch": 0.2381390216991541, "grad_norm": 1.220668260985703, "learning_rate": 8.90634868237924e-06, "loss": 0.6819, "step": 7770 }, { "epoch": 0.2381696702218953, "grad_norm": 1.286641348294744, "learning_rate": 8.90603886473676e-06, "loss": 0.7585, "step": 7771 }, { "epoch": 0.2382003187446365, "grad_norm": 0.46861727606592707, "learning_rate": 8.905729008607079e-06, "loss": 0.4604, "step": 7772 }, { "epoch": 0.23823096726737772, "grad_norm": 1.092639943366214, "learning_rate": 8.905419113993252e-06, "loss": 0.7497, "step": 7773 }, { "epoch": 0.23826161579011892, "grad_norm": 1.2520673051964586, "learning_rate": 8.905109180898328e-06, "loss": 0.6378, "step": 7774 }, { "epoch": 0.23829226431286013, "grad_norm": 1.1921858904287232, "learning_rate": 8.904799209325367e-06, "loss": 0.7815, "step": 7775 }, { "epoch": 0.23832291283560134, "grad_norm": 1.4293875393636595, "learning_rate": 8.904489199277419e-06, "loss": 0.6739, "step": 7776 }, { "epoch": 0.23835356135834254, "grad_norm": 0.450692172733619, "learning_rate": 8.904179150757539e-06, "loss": 0.4229, "step": 7777 }, { "epoch": 0.23838420988108372, "grad_norm": 1.1363145195815227, "learning_rate": 8.903869063768784e-06, "loss": 0.7015, "step": 7778 }, { "epoch": 0.23841485840382493, "grad_norm": 1.3836985605969856, "learning_rate": 8.903558938314209e-06, "loss": 0.6826, "step": 7779 }, { "epoch": 0.23844550692656613, "grad_norm": 1.3979871186805601, "learning_rate": 8.90324877439687e-06, "loss": 0.7573, "step": 7780 }, { "epoch": 0.23847615544930734, "grad_norm": 1.201048522391076, "learning_rate": 8.90293857201982e-06, "loss": 0.6308, "step": 7781 }, { "epoch": 0.23850680397204854, "grad_norm": 0.4589160192492112, "learning_rate": 8.902628331186117e-06, "loss": 0.4419, "step": 7782 }, { "epoch": 0.23853745249478975, "grad_norm": 1.3589090755211028, "learning_rate": 8.902318051898819e-06, "loss": 0.7385, "step": 7783 }, { "epoch": 0.23856810101753095, "grad_norm": 0.4603362757617026, "learning_rate": 8.902007734160985e-06, "loss": 0.463, "step": 7784 }, { "epoch": 0.23859874954027216, "grad_norm": 0.4698070628742183, "learning_rate": 8.90169737797567e-06, "loss": 0.4784, "step": 7785 }, { "epoch": 0.23862939806301336, "grad_norm": 1.178001004172067, "learning_rate": 8.90138698334593e-06, "loss": 0.6978, "step": 7786 }, { "epoch": 0.23866004658575457, "grad_norm": 1.8481597871844266, "learning_rate": 8.901076550274827e-06, "loss": 0.6593, "step": 7787 }, { "epoch": 0.23869069510849578, "grad_norm": 1.2107785406108966, "learning_rate": 8.900766078765417e-06, "loss": 0.7513, "step": 7788 }, { "epoch": 0.23872134363123698, "grad_norm": 1.2791791661171537, "learning_rate": 8.900455568820763e-06, "loss": 0.7085, "step": 7789 }, { "epoch": 0.2387519921539782, "grad_norm": 1.173533801261428, "learning_rate": 8.900145020443922e-06, "loss": 0.7264, "step": 7790 }, { "epoch": 0.2387826406767194, "grad_norm": 1.229860930942602, "learning_rate": 8.899834433637955e-06, "loss": 0.7141, "step": 7791 }, { "epoch": 0.2388132891994606, "grad_norm": 1.3442432444418129, "learning_rate": 8.89952380840592e-06, "loss": 0.7303, "step": 7792 }, { "epoch": 0.23884393772220178, "grad_norm": 1.2449316609638759, "learning_rate": 8.89921314475088e-06, "loss": 0.7395, "step": 7793 }, { "epoch": 0.23887458624494298, "grad_norm": 1.2469518047295702, "learning_rate": 8.898902442675894e-06, "loss": 0.6846, "step": 7794 }, { "epoch": 0.2389052347676842, "grad_norm": 1.1888803074108323, "learning_rate": 8.898591702184027e-06, "loss": 0.7155, "step": 7795 }, { "epoch": 0.2389358832904254, "grad_norm": 1.239630199608031, "learning_rate": 8.898280923278336e-06, "loss": 0.7258, "step": 7796 }, { "epoch": 0.2389665318131666, "grad_norm": 0.5833407535738309, "learning_rate": 8.897970105961887e-06, "loss": 0.459, "step": 7797 }, { "epoch": 0.2389971803359078, "grad_norm": 1.3616718174554898, "learning_rate": 8.897659250237742e-06, "loss": 0.7024, "step": 7798 }, { "epoch": 0.239027828858649, "grad_norm": 0.5345372722766115, "learning_rate": 8.897348356108961e-06, "loss": 0.4494, "step": 7799 }, { "epoch": 0.23905847738139022, "grad_norm": 0.5152562969521814, "learning_rate": 8.897037423578611e-06, "loss": 0.4509, "step": 7800 }, { "epoch": 0.23908912590413142, "grad_norm": 1.335357626731276, "learning_rate": 8.896726452649754e-06, "loss": 0.6981, "step": 7801 }, { "epoch": 0.23911977442687263, "grad_norm": 1.3298260373466446, "learning_rate": 8.896415443325453e-06, "loss": 0.8267, "step": 7802 }, { "epoch": 0.23915042294961383, "grad_norm": 0.5223486370042676, "learning_rate": 8.896104395608775e-06, "loss": 0.4351, "step": 7803 }, { "epoch": 0.23918107147235504, "grad_norm": 1.349040391798197, "learning_rate": 8.895793309502782e-06, "loss": 0.728, "step": 7804 }, { "epoch": 0.23921171999509624, "grad_norm": 1.3651624594524656, "learning_rate": 8.895482185010543e-06, "loss": 0.7036, "step": 7805 }, { "epoch": 0.23924236851783745, "grad_norm": 1.2859120127840267, "learning_rate": 8.89517102213512e-06, "loss": 0.6724, "step": 7806 }, { "epoch": 0.23927301704057866, "grad_norm": 1.2384766970317125, "learning_rate": 8.89485982087958e-06, "loss": 0.6017, "step": 7807 }, { "epoch": 0.23930366556331986, "grad_norm": 1.2545704696877809, "learning_rate": 8.89454858124699e-06, "loss": 0.5917, "step": 7808 }, { "epoch": 0.23933431408606104, "grad_norm": 1.3530887514382721, "learning_rate": 8.894237303240417e-06, "loss": 0.6705, "step": 7809 }, { "epoch": 0.23936496260880225, "grad_norm": 1.2334013599824925, "learning_rate": 8.893925986862928e-06, "loss": 0.8043, "step": 7810 }, { "epoch": 0.23939561113154345, "grad_norm": 1.3163071642669475, "learning_rate": 8.893614632117589e-06, "loss": 0.6596, "step": 7811 }, { "epoch": 0.23942625965428466, "grad_norm": 1.2622232622941467, "learning_rate": 8.893303239007468e-06, "loss": 0.6741, "step": 7812 }, { "epoch": 0.23945690817702586, "grad_norm": 0.7163232506564281, "learning_rate": 8.892991807535635e-06, "loss": 0.45, "step": 7813 }, { "epoch": 0.23948755669976707, "grad_norm": 1.3086936841250918, "learning_rate": 8.892680337705157e-06, "loss": 0.6129, "step": 7814 }, { "epoch": 0.23951820522250827, "grad_norm": 1.2361122067421682, "learning_rate": 8.892368829519105e-06, "loss": 0.6995, "step": 7815 }, { "epoch": 0.23954885374524948, "grad_norm": 1.1789550986939537, "learning_rate": 8.892057282980545e-06, "loss": 0.6679, "step": 7816 }, { "epoch": 0.23957950226799068, "grad_norm": 1.2744345043490966, "learning_rate": 8.89174569809255e-06, "loss": 0.7336, "step": 7817 }, { "epoch": 0.2396101507907319, "grad_norm": 1.1804101562820108, "learning_rate": 8.891434074858189e-06, "loss": 0.6432, "step": 7818 }, { "epoch": 0.2396407993134731, "grad_norm": 1.1964673793585061, "learning_rate": 8.891122413280533e-06, "loss": 0.6793, "step": 7819 }, { "epoch": 0.2396714478362143, "grad_norm": 1.3319300597529218, "learning_rate": 8.890810713362651e-06, "loss": 0.7573, "step": 7820 }, { "epoch": 0.2397020963589555, "grad_norm": 1.2788375971324786, "learning_rate": 8.890498975107616e-06, "loss": 0.7695, "step": 7821 }, { "epoch": 0.2397327448816967, "grad_norm": 1.2187834478718897, "learning_rate": 8.890187198518498e-06, "loss": 0.6934, "step": 7822 }, { "epoch": 0.23976339340443792, "grad_norm": 1.1020386546627787, "learning_rate": 8.889875383598372e-06, "loss": 0.6764, "step": 7823 }, { "epoch": 0.2397940419271791, "grad_norm": 1.3902066887789468, "learning_rate": 8.889563530350307e-06, "loss": 0.7159, "step": 7824 }, { "epoch": 0.2398246904499203, "grad_norm": 1.446330246181229, "learning_rate": 8.88925163877738e-06, "loss": 0.6237, "step": 7825 }, { "epoch": 0.2398553389726615, "grad_norm": 1.2458064402724733, "learning_rate": 8.888939708882658e-06, "loss": 0.68, "step": 7826 }, { "epoch": 0.2398859874954027, "grad_norm": 1.1639899004578773, "learning_rate": 8.888627740669221e-06, "loss": 0.7225, "step": 7827 }, { "epoch": 0.23991663601814392, "grad_norm": 1.2001777306857737, "learning_rate": 8.888315734140139e-06, "loss": 0.6375, "step": 7828 }, { "epoch": 0.23994728454088513, "grad_norm": 1.5719638844813755, "learning_rate": 8.888003689298487e-06, "loss": 0.7152, "step": 7829 }, { "epoch": 0.23997793306362633, "grad_norm": 1.5045986422370055, "learning_rate": 8.88769160614734e-06, "loss": 0.7648, "step": 7830 }, { "epoch": 0.24000858158636754, "grad_norm": 1.1791181141153206, "learning_rate": 8.887379484689772e-06, "loss": 0.6645, "step": 7831 }, { "epoch": 0.24003923010910874, "grad_norm": 1.1972870762959913, "learning_rate": 8.88706732492886e-06, "loss": 0.7141, "step": 7832 }, { "epoch": 0.24006987863184995, "grad_norm": 1.2687355259237503, "learning_rate": 8.88675512686768e-06, "loss": 0.6601, "step": 7833 }, { "epoch": 0.24010052715459115, "grad_norm": 1.7422393843911477, "learning_rate": 8.886442890509305e-06, "loss": 0.7154, "step": 7834 }, { "epoch": 0.24013117567733236, "grad_norm": 1.3868074392087977, "learning_rate": 8.886130615856815e-06, "loss": 0.7332, "step": 7835 }, { "epoch": 0.24016182420007356, "grad_norm": 1.243344786626792, "learning_rate": 8.885818302913286e-06, "loss": 0.7891, "step": 7836 }, { "epoch": 0.24019247272281477, "grad_norm": 1.3941531455419427, "learning_rate": 8.885505951681795e-06, "loss": 0.7513, "step": 7837 }, { "epoch": 0.24022312124555598, "grad_norm": 1.3157441988154912, "learning_rate": 8.88519356216542e-06, "loss": 0.7187, "step": 7838 }, { "epoch": 0.24025376976829718, "grad_norm": 0.6424343472816535, "learning_rate": 8.884881134367239e-06, "loss": 0.4841, "step": 7839 }, { "epoch": 0.24028441829103836, "grad_norm": 0.5600703654893232, "learning_rate": 8.884568668290329e-06, "loss": 0.4668, "step": 7840 }, { "epoch": 0.24031506681377957, "grad_norm": 1.1064086855124682, "learning_rate": 8.88425616393777e-06, "loss": 0.679, "step": 7841 }, { "epoch": 0.24034571533652077, "grad_norm": 1.2097960090173738, "learning_rate": 8.883943621312644e-06, "loss": 0.6369, "step": 7842 }, { "epoch": 0.24037636385926198, "grad_norm": 0.4793000054129831, "learning_rate": 8.883631040418024e-06, "loss": 0.4637, "step": 7843 }, { "epoch": 0.24040701238200318, "grad_norm": 1.3558050062708802, "learning_rate": 8.883318421256994e-06, "loss": 0.7066, "step": 7844 }, { "epoch": 0.2404376609047444, "grad_norm": 1.582586036731752, "learning_rate": 8.883005763832636e-06, "loss": 0.7142, "step": 7845 }, { "epoch": 0.2404683094274856, "grad_norm": 1.2040880621918875, "learning_rate": 8.882693068148027e-06, "loss": 0.6153, "step": 7846 }, { "epoch": 0.2404989579502268, "grad_norm": 0.6275948652075112, "learning_rate": 8.882380334206252e-06, "loss": 0.4509, "step": 7847 }, { "epoch": 0.240529606472968, "grad_norm": 1.3886554019002948, "learning_rate": 8.882067562010388e-06, "loss": 0.6837, "step": 7848 }, { "epoch": 0.2405602549957092, "grad_norm": 1.172356024245857, "learning_rate": 8.881754751563521e-06, "loss": 0.6171, "step": 7849 }, { "epoch": 0.24059090351845042, "grad_norm": 1.2926115423207682, "learning_rate": 8.88144190286873e-06, "loss": 0.6974, "step": 7850 }, { "epoch": 0.24062155204119162, "grad_norm": 0.500262800393738, "learning_rate": 8.881129015929098e-06, "loss": 0.4672, "step": 7851 }, { "epoch": 0.24065220056393283, "grad_norm": 0.47687651435689976, "learning_rate": 8.88081609074771e-06, "loss": 0.4614, "step": 7852 }, { "epoch": 0.24068284908667403, "grad_norm": 1.362064431453778, "learning_rate": 8.880503127327648e-06, "loss": 0.6897, "step": 7853 }, { "epoch": 0.24071349760941524, "grad_norm": 1.265728205928107, "learning_rate": 8.880190125671998e-06, "loss": 0.6517, "step": 7854 }, { "epoch": 0.24074414613215642, "grad_norm": 1.3056963151101408, "learning_rate": 8.879877085783838e-06, "loss": 0.6273, "step": 7855 }, { "epoch": 0.24077479465489762, "grad_norm": 1.2009831416908698, "learning_rate": 8.879564007666257e-06, "loss": 0.6671, "step": 7856 }, { "epoch": 0.24080544317763883, "grad_norm": 1.3449752536901711, "learning_rate": 8.879250891322341e-06, "loss": 0.7638, "step": 7857 }, { "epoch": 0.24083609170038003, "grad_norm": 1.3641225663907794, "learning_rate": 8.878937736755172e-06, "loss": 0.6487, "step": 7858 }, { "epoch": 0.24086674022312124, "grad_norm": 1.4679983074996783, "learning_rate": 8.878624543967837e-06, "loss": 0.6933, "step": 7859 }, { "epoch": 0.24089738874586245, "grad_norm": 0.600594959760205, "learning_rate": 8.878311312963423e-06, "loss": 0.4638, "step": 7860 }, { "epoch": 0.24092803726860365, "grad_norm": 1.4837646935696043, "learning_rate": 8.877998043745015e-06, "loss": 0.774, "step": 7861 }, { "epoch": 0.24095868579134486, "grad_norm": 1.391120762395841, "learning_rate": 8.8776847363157e-06, "loss": 0.7555, "step": 7862 }, { "epoch": 0.24098933431408606, "grad_norm": 1.3928320989275296, "learning_rate": 8.877371390678565e-06, "loss": 0.6432, "step": 7863 }, { "epoch": 0.24101998283682727, "grad_norm": 1.7246098355678423, "learning_rate": 8.877058006836698e-06, "loss": 0.7684, "step": 7864 }, { "epoch": 0.24105063135956847, "grad_norm": 0.49177634789184277, "learning_rate": 8.876744584793186e-06, "loss": 0.4518, "step": 7865 }, { "epoch": 0.24108127988230968, "grad_norm": 1.3098398459653313, "learning_rate": 8.876431124551118e-06, "loss": 0.7524, "step": 7866 }, { "epoch": 0.24111192840505088, "grad_norm": 1.2010336214878061, "learning_rate": 8.876117626113583e-06, "loss": 0.6912, "step": 7867 }, { "epoch": 0.2411425769277921, "grad_norm": 1.2380542871508213, "learning_rate": 8.875804089483669e-06, "loss": 0.7458, "step": 7868 }, { "epoch": 0.2411732254505333, "grad_norm": 1.172610830564587, "learning_rate": 8.875490514664464e-06, "loss": 0.7327, "step": 7869 }, { "epoch": 0.2412038739732745, "grad_norm": 1.2322216743034529, "learning_rate": 8.875176901659061e-06, "loss": 0.6675, "step": 7870 }, { "epoch": 0.24123452249601568, "grad_norm": 0.548278335038863, "learning_rate": 8.874863250470547e-06, "loss": 0.4383, "step": 7871 }, { "epoch": 0.24126517101875689, "grad_norm": 0.5357333997480839, "learning_rate": 8.874549561102014e-06, "loss": 0.4529, "step": 7872 }, { "epoch": 0.2412958195414981, "grad_norm": 0.49352047859332515, "learning_rate": 8.874235833556554e-06, "loss": 0.4529, "step": 7873 }, { "epoch": 0.2413264680642393, "grad_norm": 0.4656404766665517, "learning_rate": 8.873922067837258e-06, "loss": 0.4625, "step": 7874 }, { "epoch": 0.2413571165869805, "grad_norm": 1.271526934478275, "learning_rate": 8.873608263947216e-06, "loss": 0.6538, "step": 7875 }, { "epoch": 0.2413877651097217, "grad_norm": 1.3020746875307454, "learning_rate": 8.87329442188952e-06, "loss": 0.7109, "step": 7876 }, { "epoch": 0.2414184136324629, "grad_norm": 1.3137091391525861, "learning_rate": 8.872980541667261e-06, "loss": 0.6217, "step": 7877 }, { "epoch": 0.24144906215520412, "grad_norm": 1.1965607732620658, "learning_rate": 8.872666623283539e-06, "loss": 0.6983, "step": 7878 }, { "epoch": 0.24147971067794533, "grad_norm": 1.4319722879982901, "learning_rate": 8.872352666741438e-06, "loss": 0.7335, "step": 7879 }, { "epoch": 0.24151035920068653, "grad_norm": 1.1239372635911, "learning_rate": 8.872038672044056e-06, "loss": 0.6995, "step": 7880 }, { "epoch": 0.24154100772342774, "grad_norm": 1.1647525579720885, "learning_rate": 8.871724639194487e-06, "loss": 0.6722, "step": 7881 }, { "epoch": 0.24157165624616894, "grad_norm": 1.2601070371669938, "learning_rate": 8.871410568195824e-06, "loss": 0.7089, "step": 7882 }, { "epoch": 0.24160230476891015, "grad_norm": 0.7978647568231448, "learning_rate": 8.871096459051162e-06, "loss": 0.4496, "step": 7883 }, { "epoch": 0.24163295329165135, "grad_norm": 1.1489345802465032, "learning_rate": 8.870782311763596e-06, "loss": 0.7005, "step": 7884 }, { "epoch": 0.24166360181439256, "grad_norm": 1.364909623022985, "learning_rate": 8.870468126336221e-06, "loss": 0.8032, "step": 7885 }, { "epoch": 0.24169425033713374, "grad_norm": 1.109365205699053, "learning_rate": 8.870153902772133e-06, "loss": 0.604, "step": 7886 }, { "epoch": 0.24172489885987494, "grad_norm": 0.5298890046715208, "learning_rate": 8.86983964107443e-06, "loss": 0.4544, "step": 7887 }, { "epoch": 0.24175554738261615, "grad_norm": 1.6950511468048832, "learning_rate": 8.869525341246209e-06, "loss": 0.6822, "step": 7888 }, { "epoch": 0.24178619590535735, "grad_norm": 1.4252151065319647, "learning_rate": 8.86921100329056e-06, "loss": 0.6067, "step": 7889 }, { "epoch": 0.24181684442809856, "grad_norm": 1.416059252335951, "learning_rate": 8.868896627210587e-06, "loss": 0.705, "step": 7890 }, { "epoch": 0.24184749295083977, "grad_norm": 1.2191980405813083, "learning_rate": 8.868582213009387e-06, "loss": 0.7089, "step": 7891 }, { "epoch": 0.24187814147358097, "grad_norm": 1.3955641835239927, "learning_rate": 8.868267760690055e-06, "loss": 0.6983, "step": 7892 }, { "epoch": 0.24190878999632218, "grad_norm": 1.1640310314897635, "learning_rate": 8.867953270255691e-06, "loss": 0.6792, "step": 7893 }, { "epoch": 0.24193943851906338, "grad_norm": 1.3036356689092032, "learning_rate": 8.867638741709395e-06, "loss": 0.6817, "step": 7894 }, { "epoch": 0.2419700870418046, "grad_norm": 1.441129907730236, "learning_rate": 8.867324175054264e-06, "loss": 0.8101, "step": 7895 }, { "epoch": 0.2420007355645458, "grad_norm": 1.1772755245023503, "learning_rate": 8.867009570293398e-06, "loss": 0.6521, "step": 7896 }, { "epoch": 0.242031384087287, "grad_norm": 1.1287383982494636, "learning_rate": 8.866694927429897e-06, "loss": 0.6618, "step": 7897 }, { "epoch": 0.2420620326100282, "grad_norm": 1.4142939406796236, "learning_rate": 8.866380246466863e-06, "loss": 0.707, "step": 7898 }, { "epoch": 0.2420926811327694, "grad_norm": 1.2523729306729614, "learning_rate": 8.866065527407393e-06, "loss": 0.7532, "step": 7899 }, { "epoch": 0.24212332965551062, "grad_norm": 1.3098068545165933, "learning_rate": 8.865750770254593e-06, "loss": 0.7702, "step": 7900 }, { "epoch": 0.24215397817825182, "grad_norm": 1.2239480235397773, "learning_rate": 8.865435975011559e-06, "loss": 0.7186, "step": 7901 }, { "epoch": 0.242184626700993, "grad_norm": 1.123840762464414, "learning_rate": 8.865121141681397e-06, "loss": 0.6535, "step": 7902 }, { "epoch": 0.2422152752237342, "grad_norm": 0.8768824242379366, "learning_rate": 8.864806270267207e-06, "loss": 0.4418, "step": 7903 }, { "epoch": 0.2422459237464754, "grad_norm": 1.2106604461957808, "learning_rate": 8.864491360772091e-06, "loss": 0.7444, "step": 7904 }, { "epoch": 0.24227657226921662, "grad_norm": 1.327190708557682, "learning_rate": 8.864176413199155e-06, "loss": 0.6712, "step": 7905 }, { "epoch": 0.24230722079195782, "grad_norm": 1.3879807684386887, "learning_rate": 8.8638614275515e-06, "loss": 0.721, "step": 7906 }, { "epoch": 0.24233786931469903, "grad_norm": 1.265889463905895, "learning_rate": 8.863546403832227e-06, "loss": 0.7205, "step": 7907 }, { "epoch": 0.24236851783744023, "grad_norm": 0.4883609195703639, "learning_rate": 8.863231342044445e-06, "loss": 0.4608, "step": 7908 }, { "epoch": 0.24239916636018144, "grad_norm": 0.5267525693006313, "learning_rate": 8.862916242191255e-06, "loss": 0.4486, "step": 7909 }, { "epoch": 0.24242981488292265, "grad_norm": 1.2956656379251088, "learning_rate": 8.862601104275763e-06, "loss": 0.6809, "step": 7910 }, { "epoch": 0.24246046340566385, "grad_norm": 1.327871249281275, "learning_rate": 8.862285928301075e-06, "loss": 0.6764, "step": 7911 }, { "epoch": 0.24249111192840506, "grad_norm": 1.324363255023116, "learning_rate": 8.861970714270294e-06, "loss": 0.7621, "step": 7912 }, { "epoch": 0.24252176045114626, "grad_norm": 1.4308704234360863, "learning_rate": 8.86165546218653e-06, "loss": 0.6818, "step": 7913 }, { "epoch": 0.24255240897388747, "grad_norm": 1.2739930640619765, "learning_rate": 8.861340172052883e-06, "loss": 0.7398, "step": 7914 }, { "epoch": 0.24258305749662867, "grad_norm": 0.49926446782216477, "learning_rate": 8.861024843872465e-06, "loss": 0.4551, "step": 7915 }, { "epoch": 0.24261370601936988, "grad_norm": 1.3527136265138753, "learning_rate": 8.860709477648383e-06, "loss": 0.731, "step": 7916 }, { "epoch": 0.24264435454211106, "grad_norm": 1.3643146421497523, "learning_rate": 8.86039407338374e-06, "loss": 0.7512, "step": 7917 }, { "epoch": 0.24267500306485226, "grad_norm": 1.2329126368577472, "learning_rate": 8.86007863108165e-06, "loss": 0.7099, "step": 7918 }, { "epoch": 0.24270565158759347, "grad_norm": 1.1491770882210355, "learning_rate": 8.859763150745215e-06, "loss": 0.6454, "step": 7919 }, { "epoch": 0.24273630011033467, "grad_norm": 1.2288496252023815, "learning_rate": 8.859447632377547e-06, "loss": 0.6605, "step": 7920 }, { "epoch": 0.24276694863307588, "grad_norm": 1.2033149596859112, "learning_rate": 8.859132075981753e-06, "loss": 0.7453, "step": 7921 }, { "epoch": 0.24279759715581709, "grad_norm": 1.3130034713567753, "learning_rate": 8.858816481560944e-06, "loss": 0.7856, "step": 7922 }, { "epoch": 0.2428282456785583, "grad_norm": 1.2570733099058342, "learning_rate": 8.85850084911823e-06, "loss": 0.6751, "step": 7923 }, { "epoch": 0.2428588942012995, "grad_norm": 0.5266061051275653, "learning_rate": 8.858185178656718e-06, "loss": 0.4428, "step": 7924 }, { "epoch": 0.2428895427240407, "grad_norm": 1.2729622351043977, "learning_rate": 8.857869470179521e-06, "loss": 0.7238, "step": 7925 }, { "epoch": 0.2429201912467819, "grad_norm": 1.2314948740473162, "learning_rate": 8.857553723689749e-06, "loss": 0.7217, "step": 7926 }, { "epoch": 0.2429508397695231, "grad_norm": 1.430754501355756, "learning_rate": 8.857237939190513e-06, "loss": 0.654, "step": 7927 }, { "epoch": 0.24298148829226432, "grad_norm": 1.1672236951245931, "learning_rate": 8.856922116684924e-06, "loss": 0.7564, "step": 7928 }, { "epoch": 0.24301213681500552, "grad_norm": 1.3037923919404744, "learning_rate": 8.856606256176096e-06, "loss": 0.7527, "step": 7929 }, { "epoch": 0.24304278533774673, "grad_norm": 1.2004794571137674, "learning_rate": 8.856290357667139e-06, "loss": 0.6632, "step": 7930 }, { "epoch": 0.24307343386048794, "grad_norm": 1.2412725714935422, "learning_rate": 8.855974421161167e-06, "loss": 0.7089, "step": 7931 }, { "epoch": 0.24310408238322914, "grad_norm": 1.2035517103225215, "learning_rate": 8.855658446661292e-06, "loss": 0.6764, "step": 7932 }, { "epoch": 0.24313473090597032, "grad_norm": 1.241460182178478, "learning_rate": 8.855342434170628e-06, "loss": 0.7666, "step": 7933 }, { "epoch": 0.24316537942871153, "grad_norm": 1.1611386860407773, "learning_rate": 8.855026383692288e-06, "loss": 0.6935, "step": 7934 }, { "epoch": 0.24319602795145273, "grad_norm": 1.3520042741232456, "learning_rate": 8.854710295229387e-06, "loss": 0.685, "step": 7935 }, { "epoch": 0.24322667647419394, "grad_norm": 1.381805175052228, "learning_rate": 8.854394168785038e-06, "loss": 0.626, "step": 7936 }, { "epoch": 0.24325732499693514, "grad_norm": 1.3102649103812414, "learning_rate": 8.85407800436236e-06, "loss": 0.6068, "step": 7937 }, { "epoch": 0.24328797351967635, "grad_norm": 1.1142002181659811, "learning_rate": 8.853761801964462e-06, "loss": 0.5981, "step": 7938 }, { "epoch": 0.24331862204241755, "grad_norm": 1.2907033080702086, "learning_rate": 8.853445561594466e-06, "loss": 0.8038, "step": 7939 }, { "epoch": 0.24334927056515876, "grad_norm": 1.421484438712077, "learning_rate": 8.853129283255484e-06, "loss": 0.7462, "step": 7940 }, { "epoch": 0.24337991908789997, "grad_norm": 1.2555486712612558, "learning_rate": 8.852812966950633e-06, "loss": 0.6784, "step": 7941 }, { "epoch": 0.24341056761064117, "grad_norm": 1.30605827282274, "learning_rate": 8.852496612683028e-06, "loss": 0.7573, "step": 7942 }, { "epoch": 0.24344121613338238, "grad_norm": 1.257194957523387, "learning_rate": 8.852180220455791e-06, "loss": 0.6517, "step": 7943 }, { "epoch": 0.24347186465612358, "grad_norm": 0.5963035447495413, "learning_rate": 8.851863790272036e-06, "loss": 0.4307, "step": 7944 }, { "epoch": 0.2435025131788648, "grad_norm": 1.2253883968512354, "learning_rate": 8.851547322134882e-06, "loss": 0.727, "step": 7945 }, { "epoch": 0.243533161701606, "grad_norm": 1.163992585015797, "learning_rate": 8.851230816047445e-06, "loss": 0.6237, "step": 7946 }, { "epoch": 0.2435638102243472, "grad_norm": 1.2013089888450925, "learning_rate": 8.850914272012846e-06, "loss": 0.6513, "step": 7947 }, { "epoch": 0.24359445874708838, "grad_norm": 1.2989283114706534, "learning_rate": 8.850597690034204e-06, "loss": 0.7154, "step": 7948 }, { "epoch": 0.24362510726982958, "grad_norm": 1.3805469188192663, "learning_rate": 8.850281070114637e-06, "loss": 0.6778, "step": 7949 }, { "epoch": 0.2436557557925708, "grad_norm": 1.2762202732023773, "learning_rate": 8.849964412257268e-06, "loss": 0.6915, "step": 7950 }, { "epoch": 0.243686404315312, "grad_norm": 1.286408417294142, "learning_rate": 8.84964771646521e-06, "loss": 0.6843, "step": 7951 }, { "epoch": 0.2437170528380532, "grad_norm": 1.368206525602666, "learning_rate": 8.849330982741594e-06, "loss": 0.6813, "step": 7952 }, { "epoch": 0.2437477013607944, "grad_norm": 1.1247434843827675, "learning_rate": 8.849014211089529e-06, "loss": 0.7066, "step": 7953 }, { "epoch": 0.2437783498835356, "grad_norm": 1.2040099338882313, "learning_rate": 8.848697401512146e-06, "loss": 0.7224, "step": 7954 }, { "epoch": 0.24380899840627682, "grad_norm": 0.5012901499649965, "learning_rate": 8.848380554012562e-06, "loss": 0.4569, "step": 7955 }, { "epoch": 0.24383964692901802, "grad_norm": 1.3059669511745589, "learning_rate": 8.848063668593898e-06, "loss": 0.6057, "step": 7956 }, { "epoch": 0.24387029545175923, "grad_norm": 1.1833468849401303, "learning_rate": 8.84774674525928e-06, "loss": 0.6908, "step": 7957 }, { "epoch": 0.24390094397450043, "grad_norm": 1.271444408691137, "learning_rate": 8.847429784011828e-06, "loss": 0.6387, "step": 7958 }, { "epoch": 0.24393159249724164, "grad_norm": 1.2781686468254552, "learning_rate": 8.847112784854666e-06, "loss": 0.695, "step": 7959 }, { "epoch": 0.24396224101998285, "grad_norm": 1.3367885129774484, "learning_rate": 8.846795747790918e-06, "loss": 0.8504, "step": 7960 }, { "epoch": 0.24399288954272405, "grad_norm": 1.3351638357049629, "learning_rate": 8.846478672823707e-06, "loss": 0.6339, "step": 7961 }, { "epoch": 0.24402353806546526, "grad_norm": 1.407147696496177, "learning_rate": 8.846161559956156e-06, "loss": 0.6809, "step": 7962 }, { "epoch": 0.24405418658820646, "grad_norm": 0.4612511632779192, "learning_rate": 8.845844409191393e-06, "loss": 0.4504, "step": 7963 }, { "epoch": 0.24408483511094764, "grad_norm": 0.46476603336090583, "learning_rate": 8.84552722053254e-06, "loss": 0.4429, "step": 7964 }, { "epoch": 0.24411548363368885, "grad_norm": 1.541191029758836, "learning_rate": 8.845209993982724e-06, "loss": 0.8066, "step": 7965 }, { "epoch": 0.24414613215643005, "grad_norm": 1.2281281425578667, "learning_rate": 8.84489272954507e-06, "loss": 0.704, "step": 7966 }, { "epoch": 0.24417678067917126, "grad_norm": 0.4358018115815876, "learning_rate": 8.844575427222703e-06, "loss": 0.4332, "step": 7967 }, { "epoch": 0.24420742920191246, "grad_norm": 1.2335932870718058, "learning_rate": 8.844258087018754e-06, "loss": 0.6251, "step": 7968 }, { "epoch": 0.24423807772465367, "grad_norm": 1.227723733004178, "learning_rate": 8.843940708936342e-06, "loss": 0.6957, "step": 7969 }, { "epoch": 0.24426872624739487, "grad_norm": 1.577662169006196, "learning_rate": 8.843623292978602e-06, "loss": 0.7577, "step": 7970 }, { "epoch": 0.24429937477013608, "grad_norm": 1.6569227818043277, "learning_rate": 8.843305839148657e-06, "loss": 0.6593, "step": 7971 }, { "epoch": 0.24433002329287729, "grad_norm": 1.5279540628119233, "learning_rate": 8.842988347449636e-06, "loss": 0.7975, "step": 7972 }, { "epoch": 0.2443606718156185, "grad_norm": 1.3320136180258186, "learning_rate": 8.842670817884669e-06, "loss": 0.7506, "step": 7973 }, { "epoch": 0.2443913203383597, "grad_norm": 1.3224491057387124, "learning_rate": 8.842353250456882e-06, "loss": 0.7119, "step": 7974 }, { "epoch": 0.2444219688611009, "grad_norm": 1.3006549123610303, "learning_rate": 8.842035645169404e-06, "loss": 0.7071, "step": 7975 }, { "epoch": 0.2444526173838421, "grad_norm": 1.1145189636738726, "learning_rate": 8.841718002025367e-06, "loss": 0.6751, "step": 7976 }, { "epoch": 0.2444832659065833, "grad_norm": 1.1930795226242328, "learning_rate": 8.841400321027899e-06, "loss": 0.6848, "step": 7977 }, { "epoch": 0.24451391442932452, "grad_norm": 1.153458504391778, "learning_rate": 8.841082602180134e-06, "loss": 0.6272, "step": 7978 }, { "epoch": 0.2445445629520657, "grad_norm": 0.5279931271685526, "learning_rate": 8.840764845485196e-06, "loss": 0.4556, "step": 7979 }, { "epoch": 0.2445752114748069, "grad_norm": 1.29793912097803, "learning_rate": 8.840447050946219e-06, "loss": 0.746, "step": 7980 }, { "epoch": 0.2446058599975481, "grad_norm": 1.195955928778277, "learning_rate": 8.840129218566335e-06, "loss": 0.7536, "step": 7981 }, { "epoch": 0.24463650852028931, "grad_norm": 0.49940920705216507, "learning_rate": 8.839811348348677e-06, "loss": 0.4499, "step": 7982 }, { "epoch": 0.24466715704303052, "grad_norm": 0.4970727041097686, "learning_rate": 8.839493440296376e-06, "loss": 0.4443, "step": 7983 }, { "epoch": 0.24469780556577173, "grad_norm": 1.2404854667899294, "learning_rate": 8.83917549441256e-06, "loss": 0.6752, "step": 7984 }, { "epoch": 0.24472845408851293, "grad_norm": 0.4884013362090122, "learning_rate": 8.838857510700369e-06, "loss": 0.4645, "step": 7985 }, { "epoch": 0.24475910261125414, "grad_norm": 1.3077514067734948, "learning_rate": 8.83853948916293e-06, "loss": 0.6695, "step": 7986 }, { "epoch": 0.24478975113399534, "grad_norm": 1.4683621165653566, "learning_rate": 8.838221429803381e-06, "loss": 0.7177, "step": 7987 }, { "epoch": 0.24482039965673655, "grad_norm": 1.176578331797678, "learning_rate": 8.837903332624855e-06, "loss": 0.6641, "step": 7988 }, { "epoch": 0.24485104817947775, "grad_norm": 1.3173763400642022, "learning_rate": 8.837585197630483e-06, "loss": 0.7401, "step": 7989 }, { "epoch": 0.24488169670221896, "grad_norm": 1.5472416841660317, "learning_rate": 8.837267024823404e-06, "loss": 0.7035, "step": 7990 }, { "epoch": 0.24491234522496017, "grad_norm": 1.3839328975291263, "learning_rate": 8.836948814206751e-06, "loss": 0.6518, "step": 7991 }, { "epoch": 0.24494299374770137, "grad_norm": 1.3337438976981495, "learning_rate": 8.83663056578366e-06, "loss": 0.7065, "step": 7992 }, { "epoch": 0.24497364227044258, "grad_norm": 1.1963142469424821, "learning_rate": 8.836312279557264e-06, "loss": 0.6466, "step": 7993 }, { "epoch": 0.24500429079318378, "grad_norm": 1.3622703792152306, "learning_rate": 8.835993955530704e-06, "loss": 0.7129, "step": 7994 }, { "epoch": 0.24503493931592496, "grad_norm": 1.104488704193292, "learning_rate": 8.835675593707113e-06, "loss": 0.6957, "step": 7995 }, { "epoch": 0.24506558783866617, "grad_norm": 1.2911481061059757, "learning_rate": 8.83535719408963e-06, "loss": 0.7317, "step": 7996 }, { "epoch": 0.24509623636140737, "grad_norm": 1.3780220796369538, "learning_rate": 8.83503875668139e-06, "loss": 0.6733, "step": 7997 }, { "epoch": 0.24512688488414858, "grad_norm": 1.4684048221605426, "learning_rate": 8.834720281485533e-06, "loss": 0.7337, "step": 7998 }, { "epoch": 0.24515753340688978, "grad_norm": 1.2630917779751152, "learning_rate": 8.834401768505194e-06, "loss": 0.6977, "step": 7999 }, { "epoch": 0.245188181929631, "grad_norm": 1.2430490717873688, "learning_rate": 8.834083217743516e-06, "loss": 0.6417, "step": 8000 }, { "epoch": 0.2452188304523722, "grad_norm": 1.2817570891379881, "learning_rate": 8.833764629203634e-06, "loss": 0.712, "step": 8001 }, { "epoch": 0.2452494789751134, "grad_norm": 1.2656103455783396, "learning_rate": 8.833446002888689e-06, "loss": 0.6417, "step": 8002 }, { "epoch": 0.2452801274978546, "grad_norm": 1.3276923859529763, "learning_rate": 8.833127338801818e-06, "loss": 0.7694, "step": 8003 }, { "epoch": 0.2453107760205958, "grad_norm": 1.3080801056171811, "learning_rate": 8.832808636946162e-06, "loss": 0.6435, "step": 8004 }, { "epoch": 0.24534142454333702, "grad_norm": 1.1565589541172314, "learning_rate": 8.832489897324863e-06, "loss": 0.6566, "step": 8005 }, { "epoch": 0.24537207306607822, "grad_norm": 1.3592960910413638, "learning_rate": 8.83217111994106e-06, "loss": 0.7754, "step": 8006 }, { "epoch": 0.24540272158881943, "grad_norm": 1.1011959478606328, "learning_rate": 8.831852304797896e-06, "loss": 0.5312, "step": 8007 }, { "epoch": 0.24543337011156063, "grad_norm": 1.212869742462904, "learning_rate": 8.831533451898508e-06, "loss": 0.7056, "step": 8008 }, { "epoch": 0.24546401863430184, "grad_norm": 1.3826475854376625, "learning_rate": 8.831214561246044e-06, "loss": 0.7086, "step": 8009 }, { "epoch": 0.24549466715704302, "grad_norm": 1.3354437915187163, "learning_rate": 8.830895632843641e-06, "loss": 0.6866, "step": 8010 }, { "epoch": 0.24552531567978422, "grad_norm": 1.1092811267764882, "learning_rate": 8.830576666694443e-06, "loss": 0.6445, "step": 8011 }, { "epoch": 0.24555596420252543, "grad_norm": 1.30496014694277, "learning_rate": 8.830257662801594e-06, "loss": 0.6818, "step": 8012 }, { "epoch": 0.24558661272526663, "grad_norm": 1.5269754850421549, "learning_rate": 8.829938621168234e-06, "loss": 0.7631, "step": 8013 }, { "epoch": 0.24561726124800784, "grad_norm": 0.7228279176561347, "learning_rate": 8.829619541797511e-06, "loss": 0.4541, "step": 8014 }, { "epoch": 0.24564790977074905, "grad_norm": 1.1972009556007428, "learning_rate": 8.829300424692566e-06, "loss": 0.7222, "step": 8015 }, { "epoch": 0.24567855829349025, "grad_norm": 1.0798059119281027, "learning_rate": 8.828981269856543e-06, "loss": 0.6467, "step": 8016 }, { "epoch": 0.24570920681623146, "grad_norm": 1.2846506830714288, "learning_rate": 8.828662077292588e-06, "loss": 0.7083, "step": 8017 }, { "epoch": 0.24573985533897266, "grad_norm": 1.2271137086339643, "learning_rate": 8.828342847003848e-06, "loss": 0.7805, "step": 8018 }, { "epoch": 0.24577050386171387, "grad_norm": 1.316726118101314, "learning_rate": 8.828023578993466e-06, "loss": 0.6922, "step": 8019 }, { "epoch": 0.24580115238445507, "grad_norm": 1.3719429689962306, "learning_rate": 8.827704273264588e-06, "loss": 0.6643, "step": 8020 }, { "epoch": 0.24583180090719628, "grad_norm": 1.2373553890700617, "learning_rate": 8.82738492982036e-06, "loss": 0.637, "step": 8021 }, { "epoch": 0.24586244942993749, "grad_norm": 1.2742680707313716, "learning_rate": 8.82706554866393e-06, "loss": 0.6984, "step": 8022 }, { "epoch": 0.2458930979526787, "grad_norm": 1.121418377656205, "learning_rate": 8.826746129798442e-06, "loss": 0.643, "step": 8023 }, { "epoch": 0.2459237464754199, "grad_norm": 1.279061000746834, "learning_rate": 8.826426673227047e-06, "loss": 0.717, "step": 8024 }, { "epoch": 0.2459543949981611, "grad_norm": 1.1430914032646882, "learning_rate": 8.826107178952889e-06, "loss": 0.7133, "step": 8025 }, { "epoch": 0.24598504352090228, "grad_norm": 1.3494931420545897, "learning_rate": 8.825787646979119e-06, "loss": 0.7012, "step": 8026 }, { "epoch": 0.24601569204364349, "grad_norm": 1.2600582996433825, "learning_rate": 8.825468077308885e-06, "loss": 0.7492, "step": 8027 }, { "epoch": 0.2460463405663847, "grad_norm": 0.5300308869165489, "learning_rate": 8.825148469945335e-06, "loss": 0.4416, "step": 8028 }, { "epoch": 0.2460769890891259, "grad_norm": 1.5702616237627034, "learning_rate": 8.824828824891618e-06, "loss": 0.7202, "step": 8029 }, { "epoch": 0.2461076376118671, "grad_norm": 1.275455520378543, "learning_rate": 8.824509142150885e-06, "loss": 0.6945, "step": 8030 }, { "epoch": 0.2461382861346083, "grad_norm": 1.401044571174049, "learning_rate": 8.824189421726284e-06, "loss": 0.6989, "step": 8031 }, { "epoch": 0.24616893465734951, "grad_norm": 1.2800600040570005, "learning_rate": 8.823869663620967e-06, "loss": 0.7313, "step": 8032 }, { "epoch": 0.24619958318009072, "grad_norm": 1.1653988016849963, "learning_rate": 8.823549867838082e-06, "loss": 0.6917, "step": 8033 }, { "epoch": 0.24623023170283193, "grad_norm": 1.151577789350426, "learning_rate": 8.823230034380784e-06, "loss": 0.6052, "step": 8034 }, { "epoch": 0.24626088022557313, "grad_norm": 1.3434962828204748, "learning_rate": 8.822910163252221e-06, "loss": 0.7126, "step": 8035 }, { "epoch": 0.24629152874831434, "grad_norm": 1.2951956875722395, "learning_rate": 8.822590254455547e-06, "loss": 0.7961, "step": 8036 }, { "epoch": 0.24632217727105554, "grad_norm": 1.2732330521429926, "learning_rate": 8.822270307993912e-06, "loss": 0.5876, "step": 8037 }, { "epoch": 0.24635282579379675, "grad_norm": 1.3810716130710232, "learning_rate": 8.82195032387047e-06, "loss": 0.7561, "step": 8038 }, { "epoch": 0.24638347431653795, "grad_norm": 1.2104610060971666, "learning_rate": 8.821630302088374e-06, "loss": 0.6158, "step": 8039 }, { "epoch": 0.24641412283927916, "grad_norm": 1.1746623396461253, "learning_rate": 8.821310242650776e-06, "loss": 0.6472, "step": 8040 }, { "epoch": 0.24644477136202034, "grad_norm": 1.2104794394649696, "learning_rate": 8.82099014556083e-06, "loss": 0.6956, "step": 8041 }, { "epoch": 0.24647541988476154, "grad_norm": 1.3611184121045687, "learning_rate": 8.820670010821693e-06, "loss": 0.713, "step": 8042 }, { "epoch": 0.24650606840750275, "grad_norm": 1.2887434905359894, "learning_rate": 8.820349838436515e-06, "loss": 0.7379, "step": 8043 }, { "epoch": 0.24653671693024395, "grad_norm": 0.5126144469307656, "learning_rate": 8.820029628408453e-06, "loss": 0.4461, "step": 8044 }, { "epoch": 0.24656736545298516, "grad_norm": 1.404320669116368, "learning_rate": 8.819709380740662e-06, "loss": 0.709, "step": 8045 }, { "epoch": 0.24659801397572637, "grad_norm": 1.4503993779053397, "learning_rate": 8.819389095436295e-06, "loss": 0.7158, "step": 8046 }, { "epoch": 0.24662866249846757, "grad_norm": 1.2735906635981877, "learning_rate": 8.819068772498514e-06, "loss": 0.6929, "step": 8047 }, { "epoch": 0.24665931102120878, "grad_norm": 1.1774715853006812, "learning_rate": 8.818748411930468e-06, "loss": 0.6823, "step": 8048 }, { "epoch": 0.24668995954394998, "grad_norm": 1.388558068150557, "learning_rate": 8.818428013735319e-06, "loss": 0.7561, "step": 8049 }, { "epoch": 0.2467206080666912, "grad_norm": 1.2205639053917308, "learning_rate": 8.81810757791622e-06, "loss": 0.7077, "step": 8050 }, { "epoch": 0.2467512565894324, "grad_norm": 1.2474427802375718, "learning_rate": 8.81778710447633e-06, "loss": 0.7088, "step": 8051 }, { "epoch": 0.2467819051121736, "grad_norm": 1.2637539899939598, "learning_rate": 8.817466593418808e-06, "loss": 0.6066, "step": 8052 }, { "epoch": 0.2468125536349148, "grad_norm": 1.4129915642369393, "learning_rate": 8.817146044746809e-06, "loss": 0.7007, "step": 8053 }, { "epoch": 0.246843202157656, "grad_norm": 0.49527465886493205, "learning_rate": 8.816825458463496e-06, "loss": 0.4375, "step": 8054 }, { "epoch": 0.24687385068039722, "grad_norm": 1.2532218221368987, "learning_rate": 8.816504834572024e-06, "loss": 0.7171, "step": 8055 }, { "epoch": 0.24690449920313842, "grad_norm": 0.4946139448799034, "learning_rate": 8.816184173075553e-06, "loss": 0.4593, "step": 8056 }, { "epoch": 0.2469351477258796, "grad_norm": 1.296145882627794, "learning_rate": 8.815863473977244e-06, "loss": 0.6848, "step": 8057 }, { "epoch": 0.2469657962486208, "grad_norm": 1.3528771786516156, "learning_rate": 8.815542737280254e-06, "loss": 0.8254, "step": 8058 }, { "epoch": 0.246996444771362, "grad_norm": 1.3391499604265822, "learning_rate": 8.815221962987747e-06, "loss": 0.7209, "step": 8059 }, { "epoch": 0.24702709329410322, "grad_norm": 1.3292593399902426, "learning_rate": 8.814901151102882e-06, "loss": 0.6851, "step": 8060 }, { "epoch": 0.24705774181684442, "grad_norm": 1.2878000606767441, "learning_rate": 8.814580301628818e-06, "loss": 0.7123, "step": 8061 }, { "epoch": 0.24708839033958563, "grad_norm": 1.2526843540787223, "learning_rate": 8.81425941456872e-06, "loss": 0.7396, "step": 8062 }, { "epoch": 0.24711903886232683, "grad_norm": 0.5397623333196585, "learning_rate": 8.813938489925747e-06, "loss": 0.4488, "step": 8063 }, { "epoch": 0.24714968738506804, "grad_norm": 1.3220268563712183, "learning_rate": 8.813617527703062e-06, "loss": 0.6277, "step": 8064 }, { "epoch": 0.24718033590780925, "grad_norm": 1.4035364668964598, "learning_rate": 8.813296527903828e-06, "loss": 0.7582, "step": 8065 }, { "epoch": 0.24721098443055045, "grad_norm": 1.2912359549633652, "learning_rate": 8.812975490531208e-06, "loss": 0.6352, "step": 8066 }, { "epoch": 0.24724163295329166, "grad_norm": 1.7139679576140097, "learning_rate": 8.812654415588366e-06, "loss": 0.7022, "step": 8067 }, { "epoch": 0.24727228147603286, "grad_norm": 1.2719290994719, "learning_rate": 8.812333303078462e-06, "loss": 0.6236, "step": 8068 }, { "epoch": 0.24730292999877407, "grad_norm": 1.336515240312529, "learning_rate": 8.812012153004665e-06, "loss": 0.7216, "step": 8069 }, { "epoch": 0.24733357852151527, "grad_norm": 1.34279776586217, "learning_rate": 8.811690965370135e-06, "loss": 0.7961, "step": 8070 }, { "epoch": 0.24736422704425648, "grad_norm": 0.4865862987090786, "learning_rate": 8.81136974017804e-06, "loss": 0.4665, "step": 8071 }, { "epoch": 0.24739487556699766, "grad_norm": 0.48291127268575534, "learning_rate": 8.811048477431543e-06, "loss": 0.4426, "step": 8072 }, { "epoch": 0.24742552408973886, "grad_norm": 1.3587881768941705, "learning_rate": 8.810727177133811e-06, "loss": 0.7832, "step": 8073 }, { "epoch": 0.24745617261248007, "grad_norm": 1.38056847114867, "learning_rate": 8.810405839288008e-06, "loss": 0.6355, "step": 8074 }, { "epoch": 0.24748682113522127, "grad_norm": 1.2795143264878879, "learning_rate": 8.810084463897302e-06, "loss": 0.672, "step": 8075 }, { "epoch": 0.24751746965796248, "grad_norm": 1.285256042013173, "learning_rate": 8.80976305096486e-06, "loss": 0.6695, "step": 8076 }, { "epoch": 0.24754811818070369, "grad_norm": 1.2488545583390107, "learning_rate": 8.809441600493846e-06, "loss": 0.6778, "step": 8077 }, { "epoch": 0.2475787667034449, "grad_norm": 0.5011843602202062, "learning_rate": 8.80912011248743e-06, "loss": 0.46, "step": 8078 }, { "epoch": 0.2476094152261861, "grad_norm": 1.2297172419985452, "learning_rate": 8.80879858694878e-06, "loss": 0.6106, "step": 8079 }, { "epoch": 0.2476400637489273, "grad_norm": 1.395404034290315, "learning_rate": 8.808477023881061e-06, "loss": 0.6607, "step": 8080 }, { "epoch": 0.2476707122716685, "grad_norm": 0.4572825724319096, "learning_rate": 8.808155423287444e-06, "loss": 0.4349, "step": 8081 }, { "epoch": 0.24770136079440971, "grad_norm": 1.3558110301818718, "learning_rate": 8.807833785171098e-06, "loss": 0.7278, "step": 8082 }, { "epoch": 0.24773200931715092, "grad_norm": 0.4829899591422972, "learning_rate": 8.807512109535192e-06, "loss": 0.4178, "step": 8083 }, { "epoch": 0.24776265783989213, "grad_norm": 1.2047798513899195, "learning_rate": 8.807190396382893e-06, "loss": 0.6494, "step": 8084 }, { "epoch": 0.24779330636263333, "grad_norm": 1.451720263097646, "learning_rate": 8.806868645717374e-06, "loss": 0.6712, "step": 8085 }, { "epoch": 0.24782395488537454, "grad_norm": 1.2666064816305809, "learning_rate": 8.806546857541804e-06, "loss": 0.7011, "step": 8086 }, { "epoch": 0.24785460340811574, "grad_norm": 1.3762043061013212, "learning_rate": 8.806225031859354e-06, "loss": 0.7391, "step": 8087 }, { "epoch": 0.24788525193085692, "grad_norm": 1.2350515236403115, "learning_rate": 8.805903168673196e-06, "loss": 0.7156, "step": 8088 }, { "epoch": 0.24791590045359813, "grad_norm": 1.2124222917254313, "learning_rate": 8.805581267986499e-06, "loss": 0.6873, "step": 8089 }, { "epoch": 0.24794654897633933, "grad_norm": 1.1970785311415293, "learning_rate": 8.805259329802435e-06, "loss": 0.6616, "step": 8090 }, { "epoch": 0.24797719749908054, "grad_norm": 1.2307808292458084, "learning_rate": 8.80493735412418e-06, "loss": 0.69, "step": 8091 }, { "epoch": 0.24800784602182174, "grad_norm": 1.4645532757705941, "learning_rate": 8.804615340954901e-06, "loss": 0.7654, "step": 8092 }, { "epoch": 0.24803849454456295, "grad_norm": 1.2256406887536018, "learning_rate": 8.804293290297777e-06, "loss": 0.6897, "step": 8093 }, { "epoch": 0.24806914306730415, "grad_norm": 1.2403848220977194, "learning_rate": 8.803971202155975e-06, "loss": 0.6452, "step": 8094 }, { "epoch": 0.24809979159004536, "grad_norm": 1.3145135677225754, "learning_rate": 8.803649076532672e-06, "loss": 0.7529, "step": 8095 }, { "epoch": 0.24813044011278657, "grad_norm": 1.2384312244125464, "learning_rate": 8.80332691343104e-06, "loss": 0.687, "step": 8096 }, { "epoch": 0.24816108863552777, "grad_norm": 1.4259420646062773, "learning_rate": 8.803004712854258e-06, "loss": 0.6428, "step": 8097 }, { "epoch": 0.24819173715826898, "grad_norm": 1.3216968060011463, "learning_rate": 8.802682474805495e-06, "loss": 0.7084, "step": 8098 }, { "epoch": 0.24822238568101018, "grad_norm": 1.2884484678131967, "learning_rate": 8.80236019928793e-06, "loss": 0.746, "step": 8099 }, { "epoch": 0.2482530342037514, "grad_norm": 1.3651077460309105, "learning_rate": 8.802037886304736e-06, "loss": 0.6262, "step": 8100 }, { "epoch": 0.2482836827264926, "grad_norm": 1.2619189893205967, "learning_rate": 8.80171553585909e-06, "loss": 0.6611, "step": 8101 }, { "epoch": 0.2483143312492338, "grad_norm": 1.2341649557647696, "learning_rate": 8.80139314795417e-06, "loss": 0.7359, "step": 8102 }, { "epoch": 0.24834497977197498, "grad_norm": 1.1597356702934203, "learning_rate": 8.801070722593147e-06, "loss": 0.7299, "step": 8103 }, { "epoch": 0.24837562829471618, "grad_norm": 1.4127720017208485, "learning_rate": 8.800748259779206e-06, "loss": 0.6481, "step": 8104 }, { "epoch": 0.2484062768174574, "grad_norm": 1.2324272361102198, "learning_rate": 8.800425759515517e-06, "loss": 0.7634, "step": 8105 }, { "epoch": 0.2484369253401986, "grad_norm": 1.2548509615856882, "learning_rate": 8.800103221805261e-06, "loss": 0.6719, "step": 8106 }, { "epoch": 0.2484675738629398, "grad_norm": 0.5112618575173368, "learning_rate": 8.799780646651617e-06, "loss": 0.4308, "step": 8107 }, { "epoch": 0.248498222385681, "grad_norm": 1.577290766396426, "learning_rate": 8.799458034057761e-06, "loss": 0.7929, "step": 8108 }, { "epoch": 0.2485288709084222, "grad_norm": 1.381217659388618, "learning_rate": 8.799135384026874e-06, "loss": 0.6952, "step": 8109 }, { "epoch": 0.24855951943116342, "grad_norm": 1.2063464719634305, "learning_rate": 8.798812696562132e-06, "loss": 0.6723, "step": 8110 }, { "epoch": 0.24859016795390462, "grad_norm": 1.3205224426259106, "learning_rate": 8.798489971666717e-06, "loss": 0.7131, "step": 8111 }, { "epoch": 0.24862081647664583, "grad_norm": 1.3464659738944065, "learning_rate": 8.798167209343811e-06, "loss": 0.7125, "step": 8112 }, { "epoch": 0.24865146499938703, "grad_norm": 1.3239431022710884, "learning_rate": 8.79784440959659e-06, "loss": 0.6679, "step": 8113 }, { "epoch": 0.24868211352212824, "grad_norm": 1.4159180576575694, "learning_rate": 8.797521572428234e-06, "loss": 0.7013, "step": 8114 }, { "epoch": 0.24871276204486945, "grad_norm": 1.230771364100069, "learning_rate": 8.79719869784193e-06, "loss": 0.6539, "step": 8115 }, { "epoch": 0.24874341056761065, "grad_norm": 1.125792132701479, "learning_rate": 8.796875785840853e-06, "loss": 0.6613, "step": 8116 }, { "epoch": 0.24877405909035186, "grad_norm": 1.302302561920032, "learning_rate": 8.796552836428188e-06, "loss": 0.65, "step": 8117 }, { "epoch": 0.24880470761309306, "grad_norm": 1.1178956202834645, "learning_rate": 8.796229849607116e-06, "loss": 0.674, "step": 8118 }, { "epoch": 0.24883535613583424, "grad_norm": 1.185041854167544, "learning_rate": 8.795906825380821e-06, "loss": 0.6059, "step": 8119 }, { "epoch": 0.24886600465857545, "grad_norm": 1.2627126809830023, "learning_rate": 8.795583763752486e-06, "loss": 0.6995, "step": 8120 }, { "epoch": 0.24889665318131665, "grad_norm": 1.236720836861893, "learning_rate": 8.795260664725291e-06, "loss": 0.6472, "step": 8121 }, { "epoch": 0.24892730170405786, "grad_norm": 1.1977387936277188, "learning_rate": 8.794937528302422e-06, "loss": 0.6678, "step": 8122 }, { "epoch": 0.24895795022679906, "grad_norm": 1.2172130131323324, "learning_rate": 8.794614354487063e-06, "loss": 0.632, "step": 8123 }, { "epoch": 0.24898859874954027, "grad_norm": 1.3147203887397603, "learning_rate": 8.794291143282398e-06, "loss": 0.6587, "step": 8124 }, { "epoch": 0.24901924727228147, "grad_norm": 1.326672277797781, "learning_rate": 8.793967894691612e-06, "loss": 0.6831, "step": 8125 }, { "epoch": 0.24904989579502268, "grad_norm": 1.352452192298668, "learning_rate": 8.793644608717888e-06, "loss": 0.6179, "step": 8126 }, { "epoch": 0.24908054431776389, "grad_norm": 1.9392059473475598, "learning_rate": 8.793321285364416e-06, "loss": 0.683, "step": 8127 }, { "epoch": 0.2491111928405051, "grad_norm": 1.256196326040367, "learning_rate": 8.792997924634376e-06, "loss": 0.6202, "step": 8128 }, { "epoch": 0.2491418413632463, "grad_norm": 1.214864619091239, "learning_rate": 8.792674526530957e-06, "loss": 0.6231, "step": 8129 }, { "epoch": 0.2491724898859875, "grad_norm": 1.5509085635458917, "learning_rate": 8.792351091057348e-06, "loss": 0.8767, "step": 8130 }, { "epoch": 0.2492031384087287, "grad_norm": 1.2279694339016174, "learning_rate": 8.792027618216731e-06, "loss": 0.6422, "step": 8131 }, { "epoch": 0.24923378693146991, "grad_norm": 1.2500987613479604, "learning_rate": 8.791704108012295e-06, "loss": 0.7298, "step": 8132 }, { "epoch": 0.24926443545421112, "grad_norm": 1.2762347325952599, "learning_rate": 8.791380560447231e-06, "loss": 0.7268, "step": 8133 }, { "epoch": 0.2492950839769523, "grad_norm": 1.389197376058435, "learning_rate": 8.791056975524722e-06, "loss": 0.7455, "step": 8134 }, { "epoch": 0.2493257324996935, "grad_norm": 1.2891930103557403, "learning_rate": 8.79073335324796e-06, "loss": 0.7012, "step": 8135 }, { "epoch": 0.2493563810224347, "grad_norm": 0.5402473222735675, "learning_rate": 8.790409693620132e-06, "loss": 0.4379, "step": 8136 }, { "epoch": 0.24938702954517591, "grad_norm": 1.1523914825605026, "learning_rate": 8.790085996644426e-06, "loss": 0.7025, "step": 8137 }, { "epoch": 0.24941767806791712, "grad_norm": 1.444617874373537, "learning_rate": 8.789762262324035e-06, "loss": 0.7094, "step": 8138 }, { "epoch": 0.24944832659065833, "grad_norm": 1.3542254742272737, "learning_rate": 8.789438490662146e-06, "loss": 0.6674, "step": 8139 }, { "epoch": 0.24947897511339953, "grad_norm": 0.48085610562195596, "learning_rate": 8.78911468166195e-06, "loss": 0.4287, "step": 8140 }, { "epoch": 0.24950962363614074, "grad_norm": 0.45409285035307695, "learning_rate": 8.788790835326637e-06, "loss": 0.4412, "step": 8141 }, { "epoch": 0.24954027215888194, "grad_norm": 1.1922572503858013, "learning_rate": 8.7884669516594e-06, "loss": 0.6668, "step": 8142 }, { "epoch": 0.24957092068162315, "grad_norm": 1.18757211775353, "learning_rate": 8.788143030663427e-06, "loss": 0.7036, "step": 8143 }, { "epoch": 0.24960156920436435, "grad_norm": 1.1360254038056066, "learning_rate": 8.787819072341914e-06, "loss": 0.6361, "step": 8144 }, { "epoch": 0.24963221772710556, "grad_norm": 1.315095156533497, "learning_rate": 8.787495076698049e-06, "loss": 0.6655, "step": 8145 }, { "epoch": 0.24966286624984677, "grad_norm": 0.4930317195237841, "learning_rate": 8.787171043735025e-06, "loss": 0.4419, "step": 8146 }, { "epoch": 0.24969351477258797, "grad_norm": 1.3865591915093098, "learning_rate": 8.786846973456036e-06, "loss": 0.7453, "step": 8147 }, { "epoch": 0.24972416329532918, "grad_norm": 1.44101418025766, "learning_rate": 8.786522865864275e-06, "loss": 0.7794, "step": 8148 }, { "epoch": 0.24975481181807038, "grad_norm": 1.3773161646579897, "learning_rate": 8.786198720962937e-06, "loss": 0.737, "step": 8149 }, { "epoch": 0.24978546034081156, "grad_norm": 1.3111858276675359, "learning_rate": 8.785874538755212e-06, "loss": 0.7637, "step": 8150 }, { "epoch": 0.24981610886355277, "grad_norm": 1.1748078398570037, "learning_rate": 8.785550319244298e-06, "loss": 0.6296, "step": 8151 }, { "epoch": 0.24984675738629397, "grad_norm": 1.3798855962232888, "learning_rate": 8.785226062433387e-06, "loss": 0.6671, "step": 8152 }, { "epoch": 0.24987740590903518, "grad_norm": 1.3007524856077541, "learning_rate": 8.784901768325676e-06, "loss": 0.7665, "step": 8153 }, { "epoch": 0.24990805443177638, "grad_norm": 0.5022888232568519, "learning_rate": 8.784577436924359e-06, "loss": 0.4231, "step": 8154 }, { "epoch": 0.2499387029545176, "grad_norm": 1.4348592742597082, "learning_rate": 8.784253068232634e-06, "loss": 0.755, "step": 8155 }, { "epoch": 0.2499693514772588, "grad_norm": 1.2936625826255834, "learning_rate": 8.783928662253693e-06, "loss": 0.7232, "step": 8156 }, { "epoch": 0.25, "grad_norm": 1.326617425869348, "learning_rate": 8.783604218990735e-06, "loss": 0.7033, "step": 8157 }, { "epoch": 0.2500306485227412, "grad_norm": 1.1582553578138624, "learning_rate": 8.783279738446957e-06, "loss": 0.6417, "step": 8158 }, { "epoch": 0.2500612970454824, "grad_norm": 1.4519949473029536, "learning_rate": 8.782955220625556e-06, "loss": 0.7179, "step": 8159 }, { "epoch": 0.2500919455682236, "grad_norm": 1.2870638732296025, "learning_rate": 8.78263066552973e-06, "loss": 0.6949, "step": 8160 }, { "epoch": 0.2501225940909648, "grad_norm": 1.3454523443740254, "learning_rate": 8.782306073162674e-06, "loss": 0.6505, "step": 8161 }, { "epoch": 0.25015324261370603, "grad_norm": 1.2721490730700746, "learning_rate": 8.78198144352759e-06, "loss": 0.6756, "step": 8162 }, { "epoch": 0.25018389113644723, "grad_norm": 1.296755855189322, "learning_rate": 8.781656776627674e-06, "loss": 0.7258, "step": 8163 }, { "epoch": 0.25021453965918844, "grad_norm": 1.4920450165581842, "learning_rate": 8.78133207246613e-06, "loss": 0.8534, "step": 8164 }, { "epoch": 0.25024518818192965, "grad_norm": 1.1983206781848208, "learning_rate": 8.78100733104615e-06, "loss": 0.5958, "step": 8165 }, { "epoch": 0.25027583670467085, "grad_norm": 1.1901229498604324, "learning_rate": 8.780682552370937e-06, "loss": 0.7367, "step": 8166 }, { "epoch": 0.25030648522741206, "grad_norm": 0.4874821049910407, "learning_rate": 8.780357736443693e-06, "loss": 0.4615, "step": 8167 }, { "epoch": 0.25033713375015326, "grad_norm": 1.3043031942649972, "learning_rate": 8.780032883267617e-06, "loss": 0.7322, "step": 8168 }, { "epoch": 0.25036778227289447, "grad_norm": 0.46843046931918697, "learning_rate": 8.779707992845909e-06, "loss": 0.4361, "step": 8169 }, { "epoch": 0.2503984307956357, "grad_norm": 0.47225621850804494, "learning_rate": 8.779383065181772e-06, "loss": 0.461, "step": 8170 }, { "epoch": 0.2504290793183769, "grad_norm": 1.1780472002812554, "learning_rate": 8.779058100278407e-06, "loss": 0.6463, "step": 8171 }, { "epoch": 0.2504597278411181, "grad_norm": 1.3198305271585897, "learning_rate": 8.778733098139014e-06, "loss": 0.7595, "step": 8172 }, { "epoch": 0.25049037636385924, "grad_norm": 1.2207610275558054, "learning_rate": 8.778408058766796e-06, "loss": 0.6551, "step": 8173 }, { "epoch": 0.25052102488660044, "grad_norm": 0.48220281526838643, "learning_rate": 8.778082982164959e-06, "loss": 0.4318, "step": 8174 }, { "epoch": 0.25055167340934165, "grad_norm": 1.5629537716670976, "learning_rate": 8.777757868336703e-06, "loss": 0.7706, "step": 8175 }, { "epoch": 0.25058232193208285, "grad_norm": 1.3536014111308496, "learning_rate": 8.777432717285232e-06, "loss": 0.6685, "step": 8176 }, { "epoch": 0.25061297045482406, "grad_norm": 1.4296422503193942, "learning_rate": 8.777107529013751e-06, "loss": 0.7708, "step": 8177 }, { "epoch": 0.25064361897756526, "grad_norm": 1.3012244730710412, "learning_rate": 8.776782303525462e-06, "loss": 0.6434, "step": 8178 }, { "epoch": 0.25067426750030647, "grad_norm": 1.3481173575634944, "learning_rate": 8.776457040823572e-06, "loss": 0.6962, "step": 8179 }, { "epoch": 0.2507049160230477, "grad_norm": 1.4372289042813933, "learning_rate": 8.776131740911283e-06, "loss": 0.7645, "step": 8180 }, { "epoch": 0.2507355645457889, "grad_norm": 1.255103279421672, "learning_rate": 8.775806403791802e-06, "loss": 0.6599, "step": 8181 }, { "epoch": 0.2507662130685301, "grad_norm": 0.46348315798127604, "learning_rate": 8.775481029468334e-06, "loss": 0.4407, "step": 8182 }, { "epoch": 0.2507968615912713, "grad_norm": 1.3027203735993664, "learning_rate": 8.775155617944087e-06, "loss": 0.674, "step": 8183 }, { "epoch": 0.2508275101140125, "grad_norm": 1.3549206580742783, "learning_rate": 8.774830169222263e-06, "loss": 0.6894, "step": 8184 }, { "epoch": 0.2508581586367537, "grad_norm": 1.293961308118781, "learning_rate": 8.774504683306076e-06, "loss": 0.7026, "step": 8185 }, { "epoch": 0.2508888071594949, "grad_norm": 1.4983837780161011, "learning_rate": 8.774179160198725e-06, "loss": 0.7259, "step": 8186 }, { "epoch": 0.2509194556822361, "grad_norm": 1.105468439332893, "learning_rate": 8.773853599903422e-06, "loss": 0.7072, "step": 8187 }, { "epoch": 0.2509501042049773, "grad_norm": 0.5242026820319559, "learning_rate": 8.773528002423373e-06, "loss": 0.447, "step": 8188 }, { "epoch": 0.2509807527277185, "grad_norm": 1.1799045500537946, "learning_rate": 8.773202367761788e-06, "loss": 0.8162, "step": 8189 }, { "epoch": 0.25101140125045973, "grad_norm": 1.268698169714763, "learning_rate": 8.772876695921874e-06, "loss": 0.7921, "step": 8190 }, { "epoch": 0.25104204977320094, "grad_norm": 1.33608098607531, "learning_rate": 8.772550986906843e-06, "loss": 0.6353, "step": 8191 }, { "epoch": 0.25107269829594214, "grad_norm": 1.3585093442520302, "learning_rate": 8.7722252407199e-06, "loss": 0.6612, "step": 8192 }, { "epoch": 0.25110334681868335, "grad_norm": 0.4587892667773189, "learning_rate": 8.771899457364256e-06, "loss": 0.4384, "step": 8193 }, { "epoch": 0.25113399534142455, "grad_norm": 1.3451337628830937, "learning_rate": 8.771573636843123e-06, "loss": 0.7188, "step": 8194 }, { "epoch": 0.25116464386416576, "grad_norm": 1.2082449336029453, "learning_rate": 8.771247779159708e-06, "loss": 0.7214, "step": 8195 }, { "epoch": 0.25119529238690697, "grad_norm": 1.3534931429771626, "learning_rate": 8.770921884317225e-06, "loss": 0.7414, "step": 8196 }, { "epoch": 0.25122594090964817, "grad_norm": 1.3949244690788367, "learning_rate": 8.770595952318885e-06, "loss": 0.7327, "step": 8197 }, { "epoch": 0.2512565894323894, "grad_norm": 1.2908044291213119, "learning_rate": 8.770269983167896e-06, "loss": 0.7726, "step": 8198 }, { "epoch": 0.2512872379551306, "grad_norm": 1.3873569273311794, "learning_rate": 8.769943976867473e-06, "loss": 0.6546, "step": 8199 }, { "epoch": 0.2513178864778718, "grad_norm": 1.4605852013412235, "learning_rate": 8.76961793342083e-06, "loss": 0.7111, "step": 8200 }, { "epoch": 0.251348535000613, "grad_norm": 1.3941688094914169, "learning_rate": 8.769291852831172e-06, "loss": 0.7663, "step": 8201 }, { "epoch": 0.2513791835233542, "grad_norm": 0.554687536337352, "learning_rate": 8.76896573510172e-06, "loss": 0.4326, "step": 8202 }, { "epoch": 0.2514098320460954, "grad_norm": 0.5242181420040966, "learning_rate": 8.768639580235685e-06, "loss": 0.4471, "step": 8203 }, { "epoch": 0.25144048056883656, "grad_norm": 0.4733754697591507, "learning_rate": 8.768313388236278e-06, "loss": 0.4662, "step": 8204 }, { "epoch": 0.25147112909157776, "grad_norm": 1.4679082043597775, "learning_rate": 8.767987159106717e-06, "loss": 0.786, "step": 8205 }, { "epoch": 0.25150177761431897, "grad_norm": 1.3045437504362085, "learning_rate": 8.767660892850214e-06, "loss": 0.7763, "step": 8206 }, { "epoch": 0.25153242613706017, "grad_norm": 1.126196523770882, "learning_rate": 8.767334589469982e-06, "loss": 0.5546, "step": 8207 }, { "epoch": 0.2515630746598014, "grad_norm": 1.0766552215339371, "learning_rate": 8.76700824896924e-06, "loss": 0.6648, "step": 8208 }, { "epoch": 0.2515937231825426, "grad_norm": 1.355949744053413, "learning_rate": 8.766681871351202e-06, "loss": 0.7267, "step": 8209 }, { "epoch": 0.2516243717052838, "grad_norm": 1.2181090868019442, "learning_rate": 8.766355456619085e-06, "loss": 0.7127, "step": 8210 }, { "epoch": 0.251655020228025, "grad_norm": 1.2457689127529286, "learning_rate": 8.766029004776102e-06, "loss": 0.7097, "step": 8211 }, { "epoch": 0.2516856687507662, "grad_norm": 1.481295533383651, "learning_rate": 8.765702515825472e-06, "loss": 0.6328, "step": 8212 }, { "epoch": 0.2517163172735074, "grad_norm": 1.3169580285534324, "learning_rate": 8.765375989770412e-06, "loss": 0.6502, "step": 8213 }, { "epoch": 0.2517469657962486, "grad_norm": 0.8284188939621088, "learning_rate": 8.765049426614138e-06, "loss": 0.4378, "step": 8214 }, { "epoch": 0.2517776143189898, "grad_norm": 1.3763298877933083, "learning_rate": 8.764722826359871e-06, "loss": 0.6632, "step": 8215 }, { "epoch": 0.251808262841731, "grad_norm": 1.5181261910013402, "learning_rate": 8.764396189010824e-06, "loss": 0.7282, "step": 8216 }, { "epoch": 0.25183891136447223, "grad_norm": 1.3449994729091403, "learning_rate": 8.76406951457022e-06, "loss": 0.8098, "step": 8217 }, { "epoch": 0.25186955988721343, "grad_norm": 1.1822377458594557, "learning_rate": 8.763742803041275e-06, "loss": 0.6808, "step": 8218 }, { "epoch": 0.25190020840995464, "grad_norm": 1.1452583781435843, "learning_rate": 8.76341605442721e-06, "loss": 0.6682, "step": 8219 }, { "epoch": 0.25193085693269585, "grad_norm": 1.208402239545372, "learning_rate": 8.763089268731244e-06, "loss": 0.7571, "step": 8220 }, { "epoch": 0.25196150545543705, "grad_norm": 1.2605614918208767, "learning_rate": 8.762762445956595e-06, "loss": 0.6772, "step": 8221 }, { "epoch": 0.25199215397817826, "grad_norm": 1.2212796475777776, "learning_rate": 8.762435586106486e-06, "loss": 0.678, "step": 8222 }, { "epoch": 0.25202280250091946, "grad_norm": 1.2370819383622977, "learning_rate": 8.762108689184136e-06, "loss": 0.6928, "step": 8223 }, { "epoch": 0.25205345102366067, "grad_norm": 1.1584767865213388, "learning_rate": 8.761781755192767e-06, "loss": 0.589, "step": 8224 }, { "epoch": 0.2520840995464019, "grad_norm": 1.168341900940918, "learning_rate": 8.7614547841356e-06, "loss": 0.6755, "step": 8225 }, { "epoch": 0.2521147480691431, "grad_norm": 1.3329278469470274, "learning_rate": 8.761127776015857e-06, "loss": 0.6721, "step": 8226 }, { "epoch": 0.2521453965918843, "grad_norm": 1.2365464757637161, "learning_rate": 8.760800730836758e-06, "loss": 0.6439, "step": 8227 }, { "epoch": 0.2521760451146255, "grad_norm": 0.5444669859901451, "learning_rate": 8.760473648601528e-06, "loss": 0.442, "step": 8228 }, { "epoch": 0.2522066936373667, "grad_norm": 1.3161144445970232, "learning_rate": 8.76014652931339e-06, "loss": 0.6667, "step": 8229 }, { "epoch": 0.2522373421601079, "grad_norm": 0.5084482955693274, "learning_rate": 8.759819372975565e-06, "loss": 0.4553, "step": 8230 }, { "epoch": 0.2522679906828491, "grad_norm": 1.2362352898084645, "learning_rate": 8.759492179591278e-06, "loss": 0.7305, "step": 8231 }, { "epoch": 0.2522986392055903, "grad_norm": 1.3090263427259436, "learning_rate": 8.759164949163752e-06, "loss": 0.7379, "step": 8232 }, { "epoch": 0.2523292877283315, "grad_norm": 1.258178161211327, "learning_rate": 8.758837681696213e-06, "loss": 0.7004, "step": 8233 }, { "epoch": 0.2523599362510727, "grad_norm": 1.3954483568812732, "learning_rate": 8.758510377191884e-06, "loss": 0.6643, "step": 8234 }, { "epoch": 0.2523905847738139, "grad_norm": 1.3147244016543522, "learning_rate": 8.75818303565399e-06, "loss": 0.6913, "step": 8235 }, { "epoch": 0.2524212332965551, "grad_norm": 1.2729188364817439, "learning_rate": 8.757855657085758e-06, "loss": 0.7145, "step": 8236 }, { "epoch": 0.2524518818192963, "grad_norm": 1.1822818878342507, "learning_rate": 8.757528241490413e-06, "loss": 0.6634, "step": 8237 }, { "epoch": 0.2524825303420375, "grad_norm": 1.3055323882975023, "learning_rate": 8.75720078887118e-06, "loss": 0.695, "step": 8238 }, { "epoch": 0.2525131788647787, "grad_norm": 1.1481774349170044, "learning_rate": 8.756873299231287e-06, "loss": 0.667, "step": 8239 }, { "epoch": 0.2525438273875199, "grad_norm": 1.2750207195884522, "learning_rate": 8.756545772573962e-06, "loss": 0.8218, "step": 8240 }, { "epoch": 0.2525744759102611, "grad_norm": 1.255939575718055, "learning_rate": 8.756218208902426e-06, "loss": 0.6725, "step": 8241 }, { "epoch": 0.2526051244330023, "grad_norm": 1.3335032582277249, "learning_rate": 8.755890608219914e-06, "loss": 0.7181, "step": 8242 }, { "epoch": 0.2526357729557435, "grad_norm": 1.2704929324128063, "learning_rate": 8.75556297052965e-06, "loss": 0.6853, "step": 8243 }, { "epoch": 0.2526664214784847, "grad_norm": 1.3971104841623887, "learning_rate": 8.755235295834862e-06, "loss": 0.5906, "step": 8244 }, { "epoch": 0.25269707000122593, "grad_norm": 1.3970955828007507, "learning_rate": 8.754907584138781e-06, "loss": 0.6978, "step": 8245 }, { "epoch": 0.25272771852396714, "grad_norm": 1.3226296167439684, "learning_rate": 8.754579835444634e-06, "loss": 0.6296, "step": 8246 }, { "epoch": 0.25275836704670834, "grad_norm": 1.2749448934259513, "learning_rate": 8.754252049755654e-06, "loss": 0.5998, "step": 8247 }, { "epoch": 0.25278901556944955, "grad_norm": 1.1623400366795211, "learning_rate": 8.753924227075064e-06, "loss": 0.7045, "step": 8248 }, { "epoch": 0.25281966409219075, "grad_norm": 1.3697549299192295, "learning_rate": 8.7535963674061e-06, "loss": 0.6798, "step": 8249 }, { "epoch": 0.25285031261493196, "grad_norm": 1.242345089971489, "learning_rate": 8.753268470751991e-06, "loss": 0.6577, "step": 8250 }, { "epoch": 0.25288096113767317, "grad_norm": 1.235083866252244, "learning_rate": 8.752940537115969e-06, "loss": 0.6934, "step": 8251 }, { "epoch": 0.25291160966041437, "grad_norm": 1.2227507275646055, "learning_rate": 8.752612566501259e-06, "loss": 0.6383, "step": 8252 }, { "epoch": 0.2529422581831556, "grad_norm": 1.3073358174333227, "learning_rate": 8.752284558911101e-06, "loss": 0.7602, "step": 8253 }, { "epoch": 0.2529729067058968, "grad_norm": 1.2518523920731794, "learning_rate": 8.751956514348722e-06, "loss": 0.6822, "step": 8254 }, { "epoch": 0.253003555228638, "grad_norm": 1.2121824686364862, "learning_rate": 8.751628432817355e-06, "loss": 0.7644, "step": 8255 }, { "epoch": 0.2530342037513792, "grad_norm": 1.333616553702452, "learning_rate": 8.751300314320234e-06, "loss": 0.6685, "step": 8256 }, { "epoch": 0.2530648522741204, "grad_norm": 1.128551667209538, "learning_rate": 8.750972158860592e-06, "loss": 0.6991, "step": 8257 }, { "epoch": 0.2530955007968616, "grad_norm": 1.277713066683229, "learning_rate": 8.75064396644166e-06, "loss": 0.6358, "step": 8258 }, { "epoch": 0.2531261493196028, "grad_norm": 1.1671775896965082, "learning_rate": 8.750315737066674e-06, "loss": 0.7272, "step": 8259 }, { "epoch": 0.253156797842344, "grad_norm": 0.8068869169787676, "learning_rate": 8.749987470738867e-06, "loss": 0.4721, "step": 8260 }, { "epoch": 0.2531874463650852, "grad_norm": 1.2699667022014376, "learning_rate": 8.749659167461475e-06, "loss": 0.6695, "step": 8261 }, { "epoch": 0.25321809488782643, "grad_norm": 1.413823303054858, "learning_rate": 8.749330827237731e-06, "loss": 0.6672, "step": 8262 }, { "epoch": 0.25324874341056763, "grad_norm": 0.4872185350079838, "learning_rate": 8.749002450070871e-06, "loss": 0.4538, "step": 8263 }, { "epoch": 0.25327939193330884, "grad_norm": 1.3233518415790946, "learning_rate": 8.748674035964132e-06, "loss": 0.7773, "step": 8264 }, { "epoch": 0.25331004045605005, "grad_norm": 1.3913189296570003, "learning_rate": 8.748345584920748e-06, "loss": 0.7196, "step": 8265 }, { "epoch": 0.2533406889787912, "grad_norm": 1.1628586134599492, "learning_rate": 8.748017096943956e-06, "loss": 0.7134, "step": 8266 }, { "epoch": 0.2533713375015324, "grad_norm": 1.390666993176717, "learning_rate": 8.74768857203699e-06, "loss": 0.7036, "step": 8267 }, { "epoch": 0.2534019860242736, "grad_norm": 1.2984484728676293, "learning_rate": 8.747360010203092e-06, "loss": 0.6873, "step": 8268 }, { "epoch": 0.2534326345470148, "grad_norm": 1.5565542206475578, "learning_rate": 8.747031411445496e-06, "loss": 0.7096, "step": 8269 }, { "epoch": 0.253463283069756, "grad_norm": 1.2232531396533373, "learning_rate": 8.746702775767442e-06, "loss": 0.6506, "step": 8270 }, { "epoch": 0.2534939315924972, "grad_norm": 1.3821201676216344, "learning_rate": 8.746374103172166e-06, "loss": 0.7161, "step": 8271 }, { "epoch": 0.25352458011523843, "grad_norm": 0.6768472805959405, "learning_rate": 8.746045393662908e-06, "loss": 0.4604, "step": 8272 }, { "epoch": 0.25355522863797963, "grad_norm": 1.180669804485392, "learning_rate": 8.745716647242905e-06, "loss": 0.6583, "step": 8273 }, { "epoch": 0.25358587716072084, "grad_norm": 1.1192726434088314, "learning_rate": 8.7453878639154e-06, "loss": 0.6156, "step": 8274 }, { "epoch": 0.25361652568346205, "grad_norm": 1.260589064139743, "learning_rate": 8.745059043683629e-06, "loss": 0.7269, "step": 8275 }, { "epoch": 0.25364717420620325, "grad_norm": 1.2236625382028357, "learning_rate": 8.744730186550831e-06, "loss": 0.7147, "step": 8276 }, { "epoch": 0.25367782272894446, "grad_norm": 1.318126486664618, "learning_rate": 8.74440129252025e-06, "loss": 0.6745, "step": 8277 }, { "epoch": 0.25370847125168566, "grad_norm": 1.4901771869400882, "learning_rate": 8.744072361595124e-06, "loss": 0.7847, "step": 8278 }, { "epoch": 0.25373911977442687, "grad_norm": 1.3682812170307446, "learning_rate": 8.743743393778697e-06, "loss": 0.7152, "step": 8279 }, { "epoch": 0.2537697682971681, "grad_norm": 1.1856971007685657, "learning_rate": 8.743414389074208e-06, "loss": 0.6812, "step": 8280 }, { "epoch": 0.2538004168199093, "grad_norm": 1.2300351842793809, "learning_rate": 8.743085347484899e-06, "loss": 0.6496, "step": 8281 }, { "epoch": 0.2538310653426505, "grad_norm": 1.3348620901988046, "learning_rate": 8.742756269014012e-06, "loss": 0.7583, "step": 8282 }, { "epoch": 0.2538617138653917, "grad_norm": 1.1859907005067327, "learning_rate": 8.74242715366479e-06, "loss": 0.775, "step": 8283 }, { "epoch": 0.2538923623881329, "grad_norm": 1.4295882384224419, "learning_rate": 8.742098001440474e-06, "loss": 0.8363, "step": 8284 }, { "epoch": 0.2539230109108741, "grad_norm": 1.363531134613812, "learning_rate": 8.741768812344311e-06, "loss": 0.8137, "step": 8285 }, { "epoch": 0.2539536594336153, "grad_norm": 1.2386781401531688, "learning_rate": 8.741439586379543e-06, "loss": 0.6009, "step": 8286 }, { "epoch": 0.2539843079563565, "grad_norm": 1.3297432659795982, "learning_rate": 8.74111032354941e-06, "loss": 0.7652, "step": 8287 }, { "epoch": 0.2540149564790977, "grad_norm": 1.1263422442291013, "learning_rate": 8.740781023857163e-06, "loss": 0.5965, "step": 8288 }, { "epoch": 0.2540456050018389, "grad_norm": 1.3516745741343639, "learning_rate": 8.740451687306043e-06, "loss": 0.6582, "step": 8289 }, { "epoch": 0.25407625352458013, "grad_norm": 1.21717579195766, "learning_rate": 8.740122313899295e-06, "loss": 0.68, "step": 8290 }, { "epoch": 0.25410690204732134, "grad_norm": 1.1874585402023852, "learning_rate": 8.739792903640166e-06, "loss": 0.6727, "step": 8291 }, { "epoch": 0.25413755057006254, "grad_norm": 1.3324923899775802, "learning_rate": 8.7394634565319e-06, "loss": 0.6462, "step": 8292 }, { "epoch": 0.25416819909280375, "grad_norm": 1.354971424099426, "learning_rate": 8.739133972577744e-06, "loss": 0.7681, "step": 8293 }, { "epoch": 0.25419884761554495, "grad_norm": 1.2100682331338772, "learning_rate": 8.738804451780943e-06, "loss": 0.6098, "step": 8294 }, { "epoch": 0.25422949613828616, "grad_norm": 1.3267328185138052, "learning_rate": 8.738474894144747e-06, "loss": 0.6841, "step": 8295 }, { "epoch": 0.25426014466102737, "grad_norm": 1.1992451120199255, "learning_rate": 8.7381452996724e-06, "loss": 0.5352, "step": 8296 }, { "epoch": 0.2542907931837685, "grad_norm": 1.3132878523458513, "learning_rate": 8.737815668367152e-06, "loss": 0.7214, "step": 8297 }, { "epoch": 0.2543214417065097, "grad_norm": 1.2345118968070012, "learning_rate": 8.737486000232247e-06, "loss": 0.675, "step": 8298 }, { "epoch": 0.2543520902292509, "grad_norm": 1.2829356275705177, "learning_rate": 8.737156295270938e-06, "loss": 0.6086, "step": 8299 }, { "epoch": 0.25438273875199213, "grad_norm": 1.719515331706592, "learning_rate": 8.736826553486473e-06, "loss": 0.6724, "step": 8300 }, { "epoch": 0.25441338727473334, "grad_norm": 0.5708405508606438, "learning_rate": 8.736496774882099e-06, "loss": 0.4529, "step": 8301 }, { "epoch": 0.25444403579747454, "grad_norm": 1.2489383740460358, "learning_rate": 8.736166959461065e-06, "loss": 0.6754, "step": 8302 }, { "epoch": 0.25447468432021575, "grad_norm": 1.1131801047117547, "learning_rate": 8.735837107226624e-06, "loss": 0.7785, "step": 8303 }, { "epoch": 0.25450533284295696, "grad_norm": 1.3656951634583467, "learning_rate": 8.735507218182023e-06, "loss": 0.7828, "step": 8304 }, { "epoch": 0.25453598136569816, "grad_norm": 1.5171617648024844, "learning_rate": 8.735177292330514e-06, "loss": 0.7566, "step": 8305 }, { "epoch": 0.25456662988843937, "grad_norm": 0.4653866052876938, "learning_rate": 8.734847329675349e-06, "loss": 0.4371, "step": 8306 }, { "epoch": 0.25459727841118057, "grad_norm": 1.2604309679466528, "learning_rate": 8.734517330219775e-06, "loss": 0.7248, "step": 8307 }, { "epoch": 0.2546279269339218, "grad_norm": 1.3020294910778298, "learning_rate": 8.734187293967046e-06, "loss": 0.6176, "step": 8308 }, { "epoch": 0.254658575456663, "grad_norm": 0.45507074201336256, "learning_rate": 8.733857220920416e-06, "loss": 0.458, "step": 8309 }, { "epoch": 0.2546892239794042, "grad_norm": 1.3904388545875397, "learning_rate": 8.733527111083136e-06, "loss": 0.668, "step": 8310 }, { "epoch": 0.2547198725021454, "grad_norm": 1.3173775116117041, "learning_rate": 8.733196964458457e-06, "loss": 0.733, "step": 8311 }, { "epoch": 0.2547505210248866, "grad_norm": 1.4539973434461926, "learning_rate": 8.732866781049632e-06, "loss": 0.7058, "step": 8312 }, { "epoch": 0.2547811695476278, "grad_norm": 1.1771885812039022, "learning_rate": 8.732536560859917e-06, "loss": 0.7548, "step": 8313 }, { "epoch": 0.254811818070369, "grad_norm": 1.2425612219007902, "learning_rate": 8.732206303892564e-06, "loss": 0.7293, "step": 8314 }, { "epoch": 0.2548424665931102, "grad_norm": 1.3985889369394076, "learning_rate": 8.731876010150827e-06, "loss": 0.7817, "step": 8315 }, { "epoch": 0.2548731151158514, "grad_norm": 0.457988456423019, "learning_rate": 8.731545679637962e-06, "loss": 0.4269, "step": 8316 }, { "epoch": 0.25490376363859263, "grad_norm": 1.3692254305738558, "learning_rate": 8.731215312357221e-06, "loss": 0.7367, "step": 8317 }, { "epoch": 0.25493441216133383, "grad_norm": 1.4571580435099591, "learning_rate": 8.730884908311862e-06, "loss": 0.7898, "step": 8318 }, { "epoch": 0.25496506068407504, "grad_norm": 1.2651402089988835, "learning_rate": 8.730554467505139e-06, "loss": 0.6606, "step": 8319 }, { "epoch": 0.25499570920681625, "grad_norm": 1.1552377024621872, "learning_rate": 8.730223989940307e-06, "loss": 0.6077, "step": 8320 }, { "epoch": 0.25502635772955745, "grad_norm": 1.1982258055090855, "learning_rate": 8.729893475620626e-06, "loss": 0.7053, "step": 8321 }, { "epoch": 0.25505700625229866, "grad_norm": 1.0890247196526217, "learning_rate": 8.729562924549348e-06, "loss": 0.5874, "step": 8322 }, { "epoch": 0.25508765477503986, "grad_norm": 1.371642366984247, "learning_rate": 8.729232336729734e-06, "loss": 0.7401, "step": 8323 }, { "epoch": 0.25511830329778107, "grad_norm": 1.3300238572801437, "learning_rate": 8.728901712165039e-06, "loss": 0.7373, "step": 8324 }, { "epoch": 0.2551489518205223, "grad_norm": 1.321692108364458, "learning_rate": 8.728571050858522e-06, "loss": 0.7429, "step": 8325 }, { "epoch": 0.2551796003432635, "grad_norm": 1.2013998475027012, "learning_rate": 8.72824035281344e-06, "loss": 0.704, "step": 8326 }, { "epoch": 0.2552102488660047, "grad_norm": 1.215708143652265, "learning_rate": 8.727909618033051e-06, "loss": 0.6793, "step": 8327 }, { "epoch": 0.25524089738874584, "grad_norm": 0.4858839452632103, "learning_rate": 8.727578846520615e-06, "loss": 0.4578, "step": 8328 }, { "epoch": 0.25527154591148704, "grad_norm": 0.45269596781828514, "learning_rate": 8.727248038279392e-06, "loss": 0.4718, "step": 8329 }, { "epoch": 0.25530219443422825, "grad_norm": 1.5680061891084756, "learning_rate": 8.72691719331264e-06, "loss": 0.8061, "step": 8330 }, { "epoch": 0.25533284295696945, "grad_norm": 1.2938035463260875, "learning_rate": 8.72658631162362e-06, "loss": 0.7195, "step": 8331 }, { "epoch": 0.25536349147971066, "grad_norm": 1.2459818404343954, "learning_rate": 8.72625539321559e-06, "loss": 0.6925, "step": 8332 }, { "epoch": 0.25539414000245186, "grad_norm": 1.446472992066287, "learning_rate": 8.725924438091813e-06, "loss": 0.6894, "step": 8333 }, { "epoch": 0.25542478852519307, "grad_norm": 0.4695561926539483, "learning_rate": 8.72559344625555e-06, "loss": 0.4264, "step": 8334 }, { "epoch": 0.2554554370479343, "grad_norm": 1.205907382339073, "learning_rate": 8.72526241771006e-06, "loss": 0.6916, "step": 8335 }, { "epoch": 0.2554860855706755, "grad_norm": 1.5639598133032937, "learning_rate": 8.724931352458605e-06, "loss": 0.6584, "step": 8336 }, { "epoch": 0.2555167340934167, "grad_norm": 0.4634601257772118, "learning_rate": 8.72460025050445e-06, "loss": 0.4479, "step": 8337 }, { "epoch": 0.2555473826161579, "grad_norm": 1.4643301698020683, "learning_rate": 8.724269111850857e-06, "loss": 0.7339, "step": 8338 }, { "epoch": 0.2555780311388991, "grad_norm": 1.1195036283351472, "learning_rate": 8.723937936501086e-06, "loss": 0.6802, "step": 8339 }, { "epoch": 0.2556086796616403, "grad_norm": 1.3927362774093748, "learning_rate": 8.723606724458402e-06, "loss": 0.6318, "step": 8340 }, { "epoch": 0.2556393281843815, "grad_norm": 0.498743948598272, "learning_rate": 8.72327547572607e-06, "loss": 0.4453, "step": 8341 }, { "epoch": 0.2556699767071227, "grad_norm": 1.358310679719097, "learning_rate": 8.72294419030735e-06, "loss": 0.6959, "step": 8342 }, { "epoch": 0.2557006252298639, "grad_norm": 1.2249029670060316, "learning_rate": 8.72261286820551e-06, "loss": 0.6728, "step": 8343 }, { "epoch": 0.2557312737526051, "grad_norm": 1.4619923289984205, "learning_rate": 8.72228150942381e-06, "loss": 0.7565, "step": 8344 }, { "epoch": 0.25576192227534633, "grad_norm": 1.3075278380732984, "learning_rate": 8.72195011396552e-06, "loss": 0.7267, "step": 8345 }, { "epoch": 0.25579257079808754, "grad_norm": 1.3075486216043053, "learning_rate": 8.721618681833903e-06, "loss": 0.7621, "step": 8346 }, { "epoch": 0.25582321932082874, "grad_norm": 1.2208368797984699, "learning_rate": 8.721287213032225e-06, "loss": 0.7108, "step": 8347 }, { "epoch": 0.25585386784356995, "grad_norm": 1.4132642332814613, "learning_rate": 8.720955707563752e-06, "loss": 0.6987, "step": 8348 }, { "epoch": 0.25588451636631115, "grad_norm": 1.1648100632675817, "learning_rate": 8.72062416543175e-06, "loss": 0.6652, "step": 8349 }, { "epoch": 0.25591516488905236, "grad_norm": 1.1941145978275582, "learning_rate": 8.720292586639485e-06, "loss": 0.6387, "step": 8350 }, { "epoch": 0.25594581341179357, "grad_norm": 1.1708974695055132, "learning_rate": 8.719960971190227e-06, "loss": 0.7569, "step": 8351 }, { "epoch": 0.25597646193453477, "grad_norm": 1.2005903768230781, "learning_rate": 8.719629319087242e-06, "loss": 0.7227, "step": 8352 }, { "epoch": 0.256007110457276, "grad_norm": 1.272040143394137, "learning_rate": 8.719297630333796e-06, "loss": 0.7387, "step": 8353 }, { "epoch": 0.2560377589800172, "grad_norm": 1.1711887954141815, "learning_rate": 8.71896590493316e-06, "loss": 0.7417, "step": 8354 }, { "epoch": 0.2560684075027584, "grad_norm": 1.2019769329057746, "learning_rate": 8.718634142888601e-06, "loss": 0.7716, "step": 8355 }, { "epoch": 0.2560990560254996, "grad_norm": 1.2921233009367643, "learning_rate": 8.718302344203388e-06, "loss": 0.6721, "step": 8356 }, { "epoch": 0.2561297045482408, "grad_norm": 0.5211416165308232, "learning_rate": 8.717970508880791e-06, "loss": 0.4586, "step": 8357 }, { "epoch": 0.256160353070982, "grad_norm": 1.206737988059705, "learning_rate": 8.71763863692408e-06, "loss": 0.7002, "step": 8358 }, { "epoch": 0.25619100159372316, "grad_norm": 1.2545330305220606, "learning_rate": 8.717306728336523e-06, "loss": 0.6932, "step": 8359 }, { "epoch": 0.25622165011646436, "grad_norm": 1.419999632161286, "learning_rate": 8.716974783121393e-06, "loss": 0.729, "step": 8360 }, { "epoch": 0.25625229863920557, "grad_norm": 1.1352291962088679, "learning_rate": 8.716642801281959e-06, "loss": 0.8261, "step": 8361 }, { "epoch": 0.2562829471619468, "grad_norm": 1.1002949431700222, "learning_rate": 8.716310782821493e-06, "loss": 0.699, "step": 8362 }, { "epoch": 0.256313595684688, "grad_norm": 1.1710103097702407, "learning_rate": 8.715978727743263e-06, "loss": 0.6482, "step": 8363 }, { "epoch": 0.2563442442074292, "grad_norm": 1.4093924886412796, "learning_rate": 8.715646636050548e-06, "loss": 0.7623, "step": 8364 }, { "epoch": 0.2563748927301704, "grad_norm": 1.0824407675479804, "learning_rate": 8.715314507746613e-06, "loss": 0.6174, "step": 8365 }, { "epoch": 0.2564055412529116, "grad_norm": 1.0418115331005484, "learning_rate": 8.714982342834735e-06, "loss": 0.5848, "step": 8366 }, { "epoch": 0.2564361897756528, "grad_norm": 1.4080425539257222, "learning_rate": 8.714650141318185e-06, "loss": 0.7333, "step": 8367 }, { "epoch": 0.256466838298394, "grad_norm": 1.1873275408436794, "learning_rate": 8.714317903200238e-06, "loss": 0.7014, "step": 8368 }, { "epoch": 0.2564974868211352, "grad_norm": 1.170838781618224, "learning_rate": 8.713985628484165e-06, "loss": 0.7382, "step": 8369 }, { "epoch": 0.2565281353438764, "grad_norm": 0.5265697201617882, "learning_rate": 8.713653317173241e-06, "loss": 0.4434, "step": 8370 }, { "epoch": 0.2565587838666176, "grad_norm": 1.2183997287689694, "learning_rate": 8.713320969270742e-06, "loss": 0.6951, "step": 8371 }, { "epoch": 0.25658943238935883, "grad_norm": 1.2182837358040837, "learning_rate": 8.71298858477994e-06, "loss": 0.6177, "step": 8372 }, { "epoch": 0.25662008091210003, "grad_norm": 0.45312735964912726, "learning_rate": 8.712656163704111e-06, "loss": 0.4405, "step": 8373 }, { "epoch": 0.25665072943484124, "grad_norm": 1.2152524935383189, "learning_rate": 8.712323706046533e-06, "loss": 0.7229, "step": 8374 }, { "epoch": 0.25668137795758245, "grad_norm": 1.2350989268494688, "learning_rate": 8.71199121181048e-06, "loss": 0.7076, "step": 8375 }, { "epoch": 0.25671202648032365, "grad_norm": 1.259920593802326, "learning_rate": 8.711658680999226e-06, "loss": 0.6274, "step": 8376 }, { "epoch": 0.25674267500306486, "grad_norm": 1.2207373395363408, "learning_rate": 8.71132611361605e-06, "loss": 0.7671, "step": 8377 }, { "epoch": 0.25677332352580606, "grad_norm": 0.4954032829754, "learning_rate": 8.710993509664226e-06, "loss": 0.4653, "step": 8378 }, { "epoch": 0.25680397204854727, "grad_norm": 1.2994837650558464, "learning_rate": 8.710660869147038e-06, "loss": 0.7068, "step": 8379 }, { "epoch": 0.2568346205712885, "grad_norm": 1.2467534716181596, "learning_rate": 8.710328192067757e-06, "loss": 0.7133, "step": 8380 }, { "epoch": 0.2568652690940297, "grad_norm": 1.2577178900635309, "learning_rate": 8.709995478429661e-06, "loss": 0.7477, "step": 8381 }, { "epoch": 0.2568959176167709, "grad_norm": 1.2788120816651432, "learning_rate": 8.709662728236033e-06, "loss": 0.7119, "step": 8382 }, { "epoch": 0.2569265661395121, "grad_norm": 1.4606154802560607, "learning_rate": 8.709329941490147e-06, "loss": 0.7366, "step": 8383 }, { "epoch": 0.2569572146622533, "grad_norm": 1.377205828376606, "learning_rate": 8.708997118195287e-06, "loss": 0.6479, "step": 8384 }, { "epoch": 0.2569878631849945, "grad_norm": 1.432596504163811, "learning_rate": 8.708664258354727e-06, "loss": 0.7852, "step": 8385 }, { "epoch": 0.2570185117077357, "grad_norm": 1.273135122220157, "learning_rate": 8.708331361971748e-06, "loss": 0.732, "step": 8386 }, { "epoch": 0.2570491602304769, "grad_norm": 1.0888294364137363, "learning_rate": 8.707998429049633e-06, "loss": 0.6653, "step": 8387 }, { "epoch": 0.2570798087532181, "grad_norm": 1.267511263657185, "learning_rate": 8.707665459591662e-06, "loss": 0.694, "step": 8388 }, { "epoch": 0.2571104572759593, "grad_norm": 1.2069349276656987, "learning_rate": 8.707332453601112e-06, "loss": 0.6503, "step": 8389 }, { "epoch": 0.2571411057987005, "grad_norm": 0.4579007119598239, "learning_rate": 8.706999411081268e-06, "loss": 0.4329, "step": 8390 }, { "epoch": 0.2571717543214417, "grad_norm": 1.1628750852475078, "learning_rate": 8.706666332035409e-06, "loss": 0.7494, "step": 8391 }, { "epoch": 0.2572024028441829, "grad_norm": 1.3530797231207536, "learning_rate": 8.70633321646682e-06, "loss": 0.7879, "step": 8392 }, { "epoch": 0.2572330513669241, "grad_norm": 1.149099130272792, "learning_rate": 8.70600006437878e-06, "loss": 0.6797, "step": 8393 }, { "epoch": 0.2572636998896653, "grad_norm": 1.0885649424899821, "learning_rate": 8.705666875774575e-06, "loss": 0.6614, "step": 8394 }, { "epoch": 0.2572943484124065, "grad_norm": 1.2323141499525208, "learning_rate": 8.705333650657486e-06, "loss": 0.6934, "step": 8395 }, { "epoch": 0.2573249969351477, "grad_norm": 1.2341175183549566, "learning_rate": 8.705000389030795e-06, "loss": 0.6331, "step": 8396 }, { "epoch": 0.2573556454578889, "grad_norm": 1.2082560264037217, "learning_rate": 8.704667090897787e-06, "loss": 0.6875, "step": 8397 }, { "epoch": 0.2573862939806301, "grad_norm": 0.4857301951077717, "learning_rate": 8.704333756261748e-06, "loss": 0.4671, "step": 8398 }, { "epoch": 0.2574169425033713, "grad_norm": 1.2356011173551902, "learning_rate": 8.704000385125959e-06, "loss": 0.6885, "step": 8399 }, { "epoch": 0.25744759102611253, "grad_norm": 1.388250093802283, "learning_rate": 8.703666977493707e-06, "loss": 0.7603, "step": 8400 }, { "epoch": 0.25747823954885374, "grad_norm": 1.4006696526567057, "learning_rate": 8.703333533368279e-06, "loss": 0.7331, "step": 8401 }, { "epoch": 0.25750888807159494, "grad_norm": 1.2109218666135686, "learning_rate": 8.703000052752954e-06, "loss": 0.757, "step": 8402 }, { "epoch": 0.25753953659433615, "grad_norm": 1.215545285762381, "learning_rate": 8.702666535651026e-06, "loss": 0.6498, "step": 8403 }, { "epoch": 0.25757018511707735, "grad_norm": 1.261670618698873, "learning_rate": 8.702332982065775e-06, "loss": 0.6885, "step": 8404 }, { "epoch": 0.25760083363981856, "grad_norm": 1.4789070347365243, "learning_rate": 8.701999392000491e-06, "loss": 0.7992, "step": 8405 }, { "epoch": 0.25763148216255977, "grad_norm": 1.1925032318411246, "learning_rate": 8.701665765458458e-06, "loss": 0.6781, "step": 8406 }, { "epoch": 0.25766213068530097, "grad_norm": 1.1634352420147362, "learning_rate": 8.701332102442967e-06, "loss": 0.6692, "step": 8407 }, { "epoch": 0.2576927792080422, "grad_norm": 1.221825912149342, "learning_rate": 8.700998402957303e-06, "loss": 0.7068, "step": 8408 }, { "epoch": 0.2577234277307834, "grad_norm": 1.3818918370751068, "learning_rate": 8.700664667004754e-06, "loss": 0.662, "step": 8409 }, { "epoch": 0.2577540762535246, "grad_norm": 1.3783166879728785, "learning_rate": 8.700330894588612e-06, "loss": 0.6689, "step": 8410 }, { "epoch": 0.2577847247762658, "grad_norm": 1.3494829918026048, "learning_rate": 8.69999708571216e-06, "loss": 0.8073, "step": 8411 }, { "epoch": 0.257815373299007, "grad_norm": 1.2691328099846118, "learning_rate": 8.69966324037869e-06, "loss": 0.6475, "step": 8412 }, { "epoch": 0.2578460218217482, "grad_norm": 1.3775756815779177, "learning_rate": 8.699329358591492e-06, "loss": 0.6805, "step": 8413 }, { "epoch": 0.2578766703444894, "grad_norm": 0.4933718928172918, "learning_rate": 8.698995440353856e-06, "loss": 0.4427, "step": 8414 }, { "epoch": 0.2579073188672306, "grad_norm": 0.47269309996752945, "learning_rate": 8.698661485669072e-06, "loss": 0.4402, "step": 8415 }, { "epoch": 0.2579379673899718, "grad_norm": 0.45899492361082533, "learning_rate": 8.698327494540428e-06, "loss": 0.441, "step": 8416 }, { "epoch": 0.25796861591271303, "grad_norm": 0.4664143315019603, "learning_rate": 8.69799346697122e-06, "loss": 0.4594, "step": 8417 }, { "epoch": 0.25799926443545423, "grad_norm": 1.2860700334611495, "learning_rate": 8.697659402964733e-06, "loss": 0.727, "step": 8418 }, { "epoch": 0.25802991295819544, "grad_norm": 1.3338171305941786, "learning_rate": 8.697325302524264e-06, "loss": 0.775, "step": 8419 }, { "epoch": 0.25806056148093665, "grad_norm": 0.47528101295053454, "learning_rate": 8.696991165653102e-06, "loss": 0.4582, "step": 8420 }, { "epoch": 0.2580912100036778, "grad_norm": 1.1728443152096781, "learning_rate": 8.69665699235454e-06, "loss": 0.5628, "step": 8421 }, { "epoch": 0.258121858526419, "grad_norm": 1.2292744630778838, "learning_rate": 8.69632278263187e-06, "loss": 0.7012, "step": 8422 }, { "epoch": 0.2581525070491602, "grad_norm": 1.4328343565763644, "learning_rate": 8.695988536488387e-06, "loss": 0.7461, "step": 8423 }, { "epoch": 0.2581831555719014, "grad_norm": 1.2170923512204386, "learning_rate": 8.695654253927384e-06, "loss": 0.7401, "step": 8424 }, { "epoch": 0.2582138040946426, "grad_norm": 1.2075046567818122, "learning_rate": 8.695319934952152e-06, "loss": 0.5563, "step": 8425 }, { "epoch": 0.2582444526173838, "grad_norm": 1.3104316545888461, "learning_rate": 8.694985579565988e-06, "loss": 0.6682, "step": 8426 }, { "epoch": 0.25827510114012503, "grad_norm": 1.2501754168876316, "learning_rate": 8.694651187772187e-06, "loss": 0.6565, "step": 8427 }, { "epoch": 0.25830574966286624, "grad_norm": 1.268800691547941, "learning_rate": 8.694316759574042e-06, "loss": 0.5899, "step": 8428 }, { "epoch": 0.25833639818560744, "grad_norm": 1.193120365133155, "learning_rate": 8.693982294974847e-06, "loss": 0.7386, "step": 8429 }, { "epoch": 0.25836704670834865, "grad_norm": 1.284819569154729, "learning_rate": 8.693647793977901e-06, "loss": 0.6559, "step": 8430 }, { "epoch": 0.25839769523108985, "grad_norm": 1.342896200933586, "learning_rate": 8.693313256586498e-06, "loss": 0.6846, "step": 8431 }, { "epoch": 0.25842834375383106, "grad_norm": 1.3863278100994316, "learning_rate": 8.692978682803934e-06, "loss": 0.676, "step": 8432 }, { "epoch": 0.25845899227657226, "grad_norm": 1.365561663191343, "learning_rate": 8.692644072633506e-06, "loss": 0.7058, "step": 8433 }, { "epoch": 0.25848964079931347, "grad_norm": 1.1361092733669595, "learning_rate": 8.692309426078514e-06, "loss": 0.7183, "step": 8434 }, { "epoch": 0.2585202893220547, "grad_norm": 1.313436783371316, "learning_rate": 8.691974743142249e-06, "loss": 0.6676, "step": 8435 }, { "epoch": 0.2585509378447959, "grad_norm": 0.5343209773999, "learning_rate": 8.691640023828014e-06, "loss": 0.434, "step": 8436 }, { "epoch": 0.2585815863675371, "grad_norm": 1.3562058540956499, "learning_rate": 8.691305268139104e-06, "loss": 0.7272, "step": 8437 }, { "epoch": 0.2586122348902783, "grad_norm": 1.2355294956637755, "learning_rate": 8.69097047607882e-06, "loss": 0.669, "step": 8438 }, { "epoch": 0.2586428834130195, "grad_norm": 1.2270984654086523, "learning_rate": 8.690635647650458e-06, "loss": 0.7045, "step": 8439 }, { "epoch": 0.2586735319357607, "grad_norm": 1.127184766513557, "learning_rate": 8.690300782857319e-06, "loss": 0.6162, "step": 8440 }, { "epoch": 0.2587041804585019, "grad_norm": 1.1307168273158767, "learning_rate": 8.689965881702704e-06, "loss": 0.73, "step": 8441 }, { "epoch": 0.2587348289812431, "grad_norm": 1.2671495248924898, "learning_rate": 8.68963094418991e-06, "loss": 0.6844, "step": 8442 }, { "epoch": 0.2587654775039843, "grad_norm": 1.2354291746429267, "learning_rate": 8.689295970322238e-06, "loss": 0.6661, "step": 8443 }, { "epoch": 0.2587961260267255, "grad_norm": 1.27710060556738, "learning_rate": 8.688960960102987e-06, "loss": 0.6761, "step": 8444 }, { "epoch": 0.25882677454946673, "grad_norm": 1.535706307987367, "learning_rate": 8.688625913535464e-06, "loss": 0.6846, "step": 8445 }, { "epoch": 0.25885742307220794, "grad_norm": 1.4023615410319432, "learning_rate": 8.688290830622963e-06, "loss": 0.7512, "step": 8446 }, { "epoch": 0.25888807159494914, "grad_norm": 1.1603542073779975, "learning_rate": 8.68795571136879e-06, "loss": 0.6943, "step": 8447 }, { "epoch": 0.25891872011769035, "grad_norm": 0.5287462138468726, "learning_rate": 8.687620555776244e-06, "loss": 0.4514, "step": 8448 }, { "epoch": 0.25894936864043155, "grad_norm": 0.4925235758089211, "learning_rate": 8.687285363848631e-06, "loss": 0.4379, "step": 8449 }, { "epoch": 0.25898001716317276, "grad_norm": 1.2152151642387576, "learning_rate": 8.686950135589251e-06, "loss": 0.7355, "step": 8450 }, { "epoch": 0.25901066568591397, "grad_norm": 1.2938818094872815, "learning_rate": 8.686614871001409e-06, "loss": 0.6641, "step": 8451 }, { "epoch": 0.2590413142086551, "grad_norm": 1.2796275292677135, "learning_rate": 8.686279570088408e-06, "loss": 0.7801, "step": 8452 }, { "epoch": 0.2590719627313963, "grad_norm": 0.458272569709917, "learning_rate": 8.68594423285355e-06, "loss": 0.4375, "step": 8453 }, { "epoch": 0.2591026112541375, "grad_norm": 1.2363992641685586, "learning_rate": 8.68560885930014e-06, "loss": 0.7257, "step": 8454 }, { "epoch": 0.25913325977687873, "grad_norm": 1.2168202725760853, "learning_rate": 8.685273449431483e-06, "loss": 0.6173, "step": 8455 }, { "epoch": 0.25916390829961994, "grad_norm": 1.2653455905167454, "learning_rate": 8.684938003250885e-06, "loss": 0.7035, "step": 8456 }, { "epoch": 0.25919455682236114, "grad_norm": 1.1745233255854195, "learning_rate": 8.684602520761648e-06, "loss": 0.7985, "step": 8457 }, { "epoch": 0.25922520534510235, "grad_norm": 1.2589422003855062, "learning_rate": 8.684267001967082e-06, "loss": 0.6479, "step": 8458 }, { "epoch": 0.25925585386784356, "grad_norm": 1.2463852288205013, "learning_rate": 8.683931446870488e-06, "loss": 0.6684, "step": 8459 }, { "epoch": 0.25928650239058476, "grad_norm": 1.2510034206081575, "learning_rate": 8.683595855475176e-06, "loss": 0.7153, "step": 8460 }, { "epoch": 0.25931715091332597, "grad_norm": 1.2577140653810046, "learning_rate": 8.683260227784452e-06, "loss": 0.7255, "step": 8461 }, { "epoch": 0.2593477994360672, "grad_norm": 0.5598823006170848, "learning_rate": 8.682924563801622e-06, "loss": 0.4475, "step": 8462 }, { "epoch": 0.2593784479588084, "grad_norm": 0.4895122040849615, "learning_rate": 8.682588863529994e-06, "loss": 0.4574, "step": 8463 }, { "epoch": 0.2594090964815496, "grad_norm": 1.279928157838936, "learning_rate": 8.682253126972878e-06, "loss": 0.6925, "step": 8464 }, { "epoch": 0.2594397450042908, "grad_norm": 1.3030793056361043, "learning_rate": 8.681917354133576e-06, "loss": 0.7017, "step": 8465 }, { "epoch": 0.259470393527032, "grad_norm": 0.5000639459867694, "learning_rate": 8.681581545015403e-06, "loss": 0.4355, "step": 8466 }, { "epoch": 0.2595010420497732, "grad_norm": 1.2622414403556501, "learning_rate": 8.681245699621663e-06, "loss": 0.6917, "step": 8467 }, { "epoch": 0.2595316905725144, "grad_norm": 0.47540826001278025, "learning_rate": 8.68090981795567e-06, "loss": 0.4352, "step": 8468 }, { "epoch": 0.2595623390952556, "grad_norm": 1.2162357301375426, "learning_rate": 8.680573900020727e-06, "loss": 0.5721, "step": 8469 }, { "epoch": 0.2595929876179968, "grad_norm": 1.2476782787855523, "learning_rate": 8.680237945820152e-06, "loss": 0.6166, "step": 8470 }, { "epoch": 0.259623636140738, "grad_norm": 1.2986323386266272, "learning_rate": 8.679901955357247e-06, "loss": 0.6305, "step": 8471 }, { "epoch": 0.25965428466347923, "grad_norm": 1.2788671661359725, "learning_rate": 8.679565928635329e-06, "loss": 0.6485, "step": 8472 }, { "epoch": 0.25968493318622043, "grad_norm": 1.3573983622070527, "learning_rate": 8.679229865657705e-06, "loss": 0.6745, "step": 8473 }, { "epoch": 0.25971558170896164, "grad_norm": 1.2079831055876715, "learning_rate": 8.678893766427688e-06, "loss": 0.7818, "step": 8474 }, { "epoch": 0.25974623023170285, "grad_norm": 1.2629345307056936, "learning_rate": 8.67855763094859e-06, "loss": 0.6233, "step": 8475 }, { "epoch": 0.25977687875444405, "grad_norm": 1.4501403312745416, "learning_rate": 8.678221459223722e-06, "loss": 0.6714, "step": 8476 }, { "epoch": 0.25980752727718526, "grad_norm": 1.2675193742526807, "learning_rate": 8.677885251256397e-06, "loss": 0.6464, "step": 8477 }, { "epoch": 0.25983817579992646, "grad_norm": 1.3772374032757961, "learning_rate": 8.677549007049928e-06, "loss": 0.8149, "step": 8478 }, { "epoch": 0.25986882432266767, "grad_norm": 1.3351794905090453, "learning_rate": 8.677212726607627e-06, "loss": 0.6349, "step": 8479 }, { "epoch": 0.2598994728454089, "grad_norm": 1.1513942327987639, "learning_rate": 8.676876409932809e-06, "loss": 0.5981, "step": 8480 }, { "epoch": 0.2599301213681501, "grad_norm": 1.1764531173111026, "learning_rate": 8.676540057028785e-06, "loss": 0.671, "step": 8481 }, { "epoch": 0.2599607698908913, "grad_norm": 0.5806988695596604, "learning_rate": 8.676203667898872e-06, "loss": 0.4578, "step": 8482 }, { "epoch": 0.25999141841363244, "grad_norm": 1.78098118517363, "learning_rate": 8.675867242546386e-06, "loss": 0.7432, "step": 8483 }, { "epoch": 0.26002206693637364, "grad_norm": 1.3241227118075087, "learning_rate": 8.675530780974637e-06, "loss": 0.7598, "step": 8484 }, { "epoch": 0.26005271545911485, "grad_norm": 0.47538874225575967, "learning_rate": 8.675194283186944e-06, "loss": 0.4211, "step": 8485 }, { "epoch": 0.26008336398185605, "grad_norm": 1.3194935588797168, "learning_rate": 8.67485774918662e-06, "loss": 0.7828, "step": 8486 }, { "epoch": 0.26011401250459726, "grad_norm": 1.225274044906928, "learning_rate": 8.674521178976985e-06, "loss": 0.7284, "step": 8487 }, { "epoch": 0.26014466102733846, "grad_norm": 1.2007386668638582, "learning_rate": 8.67418457256135e-06, "loss": 0.6957, "step": 8488 }, { "epoch": 0.26017530955007967, "grad_norm": 1.2867022124335914, "learning_rate": 8.673847929943036e-06, "loss": 0.6884, "step": 8489 }, { "epoch": 0.2602059580728209, "grad_norm": 1.4028822503024967, "learning_rate": 8.673511251125358e-06, "loss": 0.5757, "step": 8490 }, { "epoch": 0.2602366065955621, "grad_norm": 1.261626846215402, "learning_rate": 8.673174536111632e-06, "loss": 0.783, "step": 8491 }, { "epoch": 0.2602672551183033, "grad_norm": 1.3225089379223194, "learning_rate": 8.672837784905178e-06, "loss": 0.7597, "step": 8492 }, { "epoch": 0.2602979036410445, "grad_norm": 1.2154579916871282, "learning_rate": 8.672500997509316e-06, "loss": 0.7789, "step": 8493 }, { "epoch": 0.2603285521637857, "grad_norm": 1.2871352957573041, "learning_rate": 8.67216417392736e-06, "loss": 0.7081, "step": 8494 }, { "epoch": 0.2603592006865269, "grad_norm": 1.3467934831738995, "learning_rate": 8.67182731416263e-06, "loss": 0.6976, "step": 8495 }, { "epoch": 0.2603898492092681, "grad_norm": 1.2772909852598975, "learning_rate": 8.67149041821845e-06, "loss": 0.6662, "step": 8496 }, { "epoch": 0.2604204977320093, "grad_norm": 1.3477097104173095, "learning_rate": 8.671153486098131e-06, "loss": 0.7372, "step": 8497 }, { "epoch": 0.2604511462547505, "grad_norm": 1.261449637486628, "learning_rate": 8.670816517805e-06, "loss": 0.6733, "step": 8498 }, { "epoch": 0.2604817947774917, "grad_norm": 1.2496898936494472, "learning_rate": 8.670479513342373e-06, "loss": 0.5453, "step": 8499 }, { "epoch": 0.26051244330023293, "grad_norm": 0.6312305285004131, "learning_rate": 8.670142472713574e-06, "loss": 0.4726, "step": 8500 }, { "epoch": 0.26054309182297414, "grad_norm": 1.1414708573793995, "learning_rate": 8.66980539592192e-06, "loss": 0.7843, "step": 8501 }, { "epoch": 0.26057374034571534, "grad_norm": 0.475927417070506, "learning_rate": 8.669468282970736e-06, "loss": 0.4286, "step": 8502 }, { "epoch": 0.26060438886845655, "grad_norm": 1.198852875904467, "learning_rate": 8.669131133863342e-06, "loss": 0.7306, "step": 8503 }, { "epoch": 0.26063503739119775, "grad_norm": 1.4536734901589743, "learning_rate": 8.66879394860306e-06, "loss": 0.7804, "step": 8504 }, { "epoch": 0.26066568591393896, "grad_norm": 0.45680355795520106, "learning_rate": 8.668456727193213e-06, "loss": 0.4274, "step": 8505 }, { "epoch": 0.26069633443668017, "grad_norm": 1.255327181696738, "learning_rate": 8.668119469637122e-06, "loss": 0.6335, "step": 8506 }, { "epoch": 0.26072698295942137, "grad_norm": 1.2719339176554638, "learning_rate": 8.667782175938112e-06, "loss": 0.6737, "step": 8507 }, { "epoch": 0.2607576314821626, "grad_norm": 1.4467423093442182, "learning_rate": 8.667444846099507e-06, "loss": 0.7092, "step": 8508 }, { "epoch": 0.2607882800049038, "grad_norm": 0.503302811079295, "learning_rate": 8.667107480124629e-06, "loss": 0.4515, "step": 8509 }, { "epoch": 0.260818928527645, "grad_norm": 0.5129622433614409, "learning_rate": 8.6667700780168e-06, "loss": 0.4537, "step": 8510 }, { "epoch": 0.2608495770503862, "grad_norm": 1.2280700713004993, "learning_rate": 8.66643263977935e-06, "loss": 0.6601, "step": 8511 }, { "epoch": 0.2608802255731274, "grad_norm": 1.1124639268224148, "learning_rate": 8.666095165415602e-06, "loss": 0.6257, "step": 8512 }, { "epoch": 0.2609108740958686, "grad_norm": 1.3911327326930143, "learning_rate": 8.665757654928878e-06, "loss": 0.7383, "step": 8513 }, { "epoch": 0.26094152261860976, "grad_norm": 1.0779349455469955, "learning_rate": 8.665420108322507e-06, "loss": 0.6135, "step": 8514 }, { "epoch": 0.26097217114135096, "grad_norm": 0.4646760807811044, "learning_rate": 8.665082525599812e-06, "loss": 0.4457, "step": 8515 }, { "epoch": 0.26100281966409217, "grad_norm": 1.3002705989561825, "learning_rate": 8.664744906764124e-06, "loss": 0.7536, "step": 8516 }, { "epoch": 0.2610334681868334, "grad_norm": 1.183971840949454, "learning_rate": 8.664407251818765e-06, "loss": 0.6498, "step": 8517 }, { "epoch": 0.2610641167095746, "grad_norm": 1.2561562958416668, "learning_rate": 8.664069560767064e-06, "loss": 0.7259, "step": 8518 }, { "epoch": 0.2610947652323158, "grad_norm": 1.22734857523946, "learning_rate": 8.663731833612348e-06, "loss": 0.6613, "step": 8519 }, { "epoch": 0.261125413755057, "grad_norm": 1.1451914130854168, "learning_rate": 8.663394070357947e-06, "loss": 0.5905, "step": 8520 }, { "epoch": 0.2611560622777982, "grad_norm": 1.2896824517597882, "learning_rate": 8.663056271007185e-06, "loss": 0.5929, "step": 8521 }, { "epoch": 0.2611867108005394, "grad_norm": 1.1444603770535726, "learning_rate": 8.662718435563391e-06, "loss": 0.6284, "step": 8522 }, { "epoch": 0.2612173593232806, "grad_norm": 0.5839755302937913, "learning_rate": 8.662380564029897e-06, "loss": 0.4543, "step": 8523 }, { "epoch": 0.2612480078460218, "grad_norm": 1.235602202796807, "learning_rate": 8.66204265641003e-06, "loss": 0.6935, "step": 8524 }, { "epoch": 0.261278656368763, "grad_norm": 1.1137521684681062, "learning_rate": 8.66170471270712e-06, "loss": 0.6313, "step": 8525 }, { "epoch": 0.2613093048915042, "grad_norm": 1.1324928051754515, "learning_rate": 8.661366732924496e-06, "loss": 0.6941, "step": 8526 }, { "epoch": 0.26133995341424543, "grad_norm": 1.414167583063231, "learning_rate": 8.661028717065488e-06, "loss": 0.71, "step": 8527 }, { "epoch": 0.26137060193698664, "grad_norm": 1.4593530095961662, "learning_rate": 8.660690665133429e-06, "loss": 0.7203, "step": 8528 }, { "epoch": 0.26140125045972784, "grad_norm": 1.2056264839094286, "learning_rate": 8.660352577131646e-06, "loss": 0.651, "step": 8529 }, { "epoch": 0.26143189898246905, "grad_norm": 1.3762626709079138, "learning_rate": 8.660014453063475e-06, "loss": 0.6962, "step": 8530 }, { "epoch": 0.26146254750521025, "grad_norm": 0.5101082080567525, "learning_rate": 8.659676292932244e-06, "loss": 0.4418, "step": 8531 }, { "epoch": 0.26149319602795146, "grad_norm": 1.2017784531928364, "learning_rate": 8.659338096741285e-06, "loss": 0.6908, "step": 8532 }, { "epoch": 0.26152384455069266, "grad_norm": 1.3324148311134258, "learning_rate": 8.658999864493934e-06, "loss": 0.6661, "step": 8533 }, { "epoch": 0.26155449307343387, "grad_norm": 1.3117707564118597, "learning_rate": 8.658661596193519e-06, "loss": 0.7129, "step": 8534 }, { "epoch": 0.2615851415961751, "grad_norm": 1.2356497871792027, "learning_rate": 8.658323291843375e-06, "loss": 0.7262, "step": 8535 }, { "epoch": 0.2616157901189163, "grad_norm": 1.225970336039338, "learning_rate": 8.657984951446838e-06, "loss": 0.6887, "step": 8536 }, { "epoch": 0.2616464386416575, "grad_norm": 1.0814582880217696, "learning_rate": 8.657646575007238e-06, "loss": 0.7538, "step": 8537 }, { "epoch": 0.2616770871643987, "grad_norm": 1.2320787275967047, "learning_rate": 8.65730816252791e-06, "loss": 0.7291, "step": 8538 }, { "epoch": 0.2617077356871399, "grad_norm": 1.2609662476471188, "learning_rate": 8.65696971401219e-06, "loss": 0.7071, "step": 8539 }, { "epoch": 0.2617383842098811, "grad_norm": 1.2937180793395007, "learning_rate": 8.65663122946341e-06, "loss": 0.7907, "step": 8540 }, { "epoch": 0.2617690327326223, "grad_norm": 1.2469352179672886, "learning_rate": 8.656292708884908e-06, "loss": 0.6869, "step": 8541 }, { "epoch": 0.2617996812553635, "grad_norm": 1.249729073125973, "learning_rate": 8.65595415228002e-06, "loss": 0.6267, "step": 8542 }, { "epoch": 0.2618303297781047, "grad_norm": 0.46108990794692734, "learning_rate": 8.655615559652078e-06, "loss": 0.4553, "step": 8543 }, { "epoch": 0.2618609783008459, "grad_norm": 1.508922758232217, "learning_rate": 8.655276931004422e-06, "loss": 0.644, "step": 8544 }, { "epoch": 0.2618916268235871, "grad_norm": 1.2643879735376513, "learning_rate": 8.654938266340384e-06, "loss": 0.6061, "step": 8545 }, { "epoch": 0.2619222753463283, "grad_norm": 1.327495166949583, "learning_rate": 8.654599565663307e-06, "loss": 0.7342, "step": 8546 }, { "epoch": 0.2619529238690695, "grad_norm": 1.3027643178952115, "learning_rate": 8.654260828976526e-06, "loss": 0.6945, "step": 8547 }, { "epoch": 0.2619835723918107, "grad_norm": 1.453702723203517, "learning_rate": 8.653922056283376e-06, "loss": 0.7141, "step": 8548 }, { "epoch": 0.2620142209145519, "grad_norm": 1.3346249111385875, "learning_rate": 8.653583247587198e-06, "loss": 0.7857, "step": 8549 }, { "epoch": 0.2620448694372931, "grad_norm": 1.3743731712899123, "learning_rate": 8.65324440289133e-06, "loss": 0.636, "step": 8550 }, { "epoch": 0.2620755179600343, "grad_norm": 1.2513448874845714, "learning_rate": 8.65290552219911e-06, "loss": 0.6537, "step": 8551 }, { "epoch": 0.2621061664827755, "grad_norm": 1.2952303493442794, "learning_rate": 8.652566605513877e-06, "loss": 0.7384, "step": 8552 }, { "epoch": 0.2621368150055167, "grad_norm": 1.2068509774018588, "learning_rate": 8.65222765283897e-06, "loss": 0.7139, "step": 8553 }, { "epoch": 0.2621674635282579, "grad_norm": 1.366192960492027, "learning_rate": 8.65188866417773e-06, "loss": 0.6321, "step": 8554 }, { "epoch": 0.26219811205099913, "grad_norm": 1.2567118555684924, "learning_rate": 8.651549639533496e-06, "loss": 0.5807, "step": 8555 }, { "epoch": 0.26222876057374034, "grad_norm": 1.2603232755749745, "learning_rate": 8.65121057890961e-06, "loss": 0.6582, "step": 8556 }, { "epoch": 0.26225940909648154, "grad_norm": 1.2608167551219462, "learning_rate": 8.650871482309413e-06, "loss": 0.7012, "step": 8557 }, { "epoch": 0.26229005761922275, "grad_norm": 1.2877304892718686, "learning_rate": 8.650532349736244e-06, "loss": 0.7042, "step": 8558 }, { "epoch": 0.26232070614196396, "grad_norm": 0.4627190063547194, "learning_rate": 8.650193181193444e-06, "loss": 0.4497, "step": 8559 }, { "epoch": 0.26235135466470516, "grad_norm": 1.3720022648516452, "learning_rate": 8.649853976684358e-06, "loss": 0.6509, "step": 8560 }, { "epoch": 0.26238200318744637, "grad_norm": 1.3069562231073932, "learning_rate": 8.64951473621233e-06, "loss": 0.6475, "step": 8561 }, { "epoch": 0.26241265171018757, "grad_norm": 1.3447130616365834, "learning_rate": 8.649175459780695e-06, "loss": 0.7812, "step": 8562 }, { "epoch": 0.2624433002329288, "grad_norm": 1.2908742713700372, "learning_rate": 8.648836147392802e-06, "loss": 0.7614, "step": 8563 }, { "epoch": 0.26247394875567, "grad_norm": 1.245979481617068, "learning_rate": 8.648496799051994e-06, "loss": 0.6707, "step": 8564 }, { "epoch": 0.2625045972784112, "grad_norm": 0.45161418811136356, "learning_rate": 8.648157414761613e-06, "loss": 0.4456, "step": 8565 }, { "epoch": 0.2625352458011524, "grad_norm": 1.351451070571529, "learning_rate": 8.647817994525002e-06, "loss": 0.6947, "step": 8566 }, { "epoch": 0.2625658943238936, "grad_norm": 1.3179431962828811, "learning_rate": 8.647478538345508e-06, "loss": 0.7604, "step": 8567 }, { "epoch": 0.2625965428466348, "grad_norm": 1.2565393949477457, "learning_rate": 8.647139046226476e-06, "loss": 0.7497, "step": 8568 }, { "epoch": 0.262627191369376, "grad_norm": 1.246929931161116, "learning_rate": 8.646799518171249e-06, "loss": 0.8239, "step": 8569 }, { "epoch": 0.2626578398921172, "grad_norm": 1.2732760466812376, "learning_rate": 8.646459954183173e-06, "loss": 0.6427, "step": 8570 }, { "epoch": 0.2626884884148584, "grad_norm": 1.1917152444380252, "learning_rate": 8.646120354265594e-06, "loss": 0.78, "step": 8571 }, { "epoch": 0.26271913693759963, "grad_norm": 1.2626490965595802, "learning_rate": 8.645780718421858e-06, "loss": 0.6792, "step": 8572 }, { "epoch": 0.26274978546034083, "grad_norm": 0.47960417439168934, "learning_rate": 8.645441046655312e-06, "loss": 0.4406, "step": 8573 }, { "epoch": 0.26278043398308204, "grad_norm": 1.2843900088652578, "learning_rate": 8.645101338969303e-06, "loss": 0.6041, "step": 8574 }, { "epoch": 0.26281108250582325, "grad_norm": 1.1393123934057834, "learning_rate": 8.644761595367177e-06, "loss": 0.6581, "step": 8575 }, { "epoch": 0.2628417310285644, "grad_norm": 1.1482055173991845, "learning_rate": 8.644421815852284e-06, "loss": 0.6738, "step": 8576 }, { "epoch": 0.2628723795513056, "grad_norm": 1.251505227877393, "learning_rate": 8.644082000427968e-06, "loss": 0.7325, "step": 8577 }, { "epoch": 0.2629030280740468, "grad_norm": 1.2828673226024219, "learning_rate": 8.643742149097582e-06, "loss": 0.7342, "step": 8578 }, { "epoch": 0.262933676596788, "grad_norm": 1.2194827072883838, "learning_rate": 8.64340226186447e-06, "loss": 0.6874, "step": 8579 }, { "epoch": 0.2629643251195292, "grad_norm": 1.2353802190664829, "learning_rate": 8.643062338731987e-06, "loss": 0.6926, "step": 8580 }, { "epoch": 0.2629949736422704, "grad_norm": 0.4866428476844136, "learning_rate": 8.642722379703477e-06, "loss": 0.4406, "step": 8581 }, { "epoch": 0.26302562216501163, "grad_norm": 1.2830523633179642, "learning_rate": 8.64238238478229e-06, "loss": 0.8544, "step": 8582 }, { "epoch": 0.26305627068775284, "grad_norm": 1.2034429032902443, "learning_rate": 8.642042353971778e-06, "loss": 0.7242, "step": 8583 }, { "epoch": 0.26308691921049404, "grad_norm": 1.0863900561863082, "learning_rate": 8.641702287275291e-06, "loss": 0.6517, "step": 8584 }, { "epoch": 0.26311756773323525, "grad_norm": 1.1432382959239105, "learning_rate": 8.641362184696179e-06, "loss": 0.6631, "step": 8585 }, { "epoch": 0.26314821625597645, "grad_norm": 1.4133311976181109, "learning_rate": 8.641022046237795e-06, "loss": 0.7799, "step": 8586 }, { "epoch": 0.26317886477871766, "grad_norm": 1.2569677316700536, "learning_rate": 8.640681871903488e-06, "loss": 0.7145, "step": 8587 }, { "epoch": 0.26320951330145886, "grad_norm": 1.2522943175785117, "learning_rate": 8.640341661696612e-06, "loss": 0.7194, "step": 8588 }, { "epoch": 0.26324016182420007, "grad_norm": 0.4850397642025912, "learning_rate": 8.640001415620519e-06, "loss": 0.4369, "step": 8589 }, { "epoch": 0.2632708103469413, "grad_norm": 1.3443317732546645, "learning_rate": 8.639661133678558e-06, "loss": 0.6849, "step": 8590 }, { "epoch": 0.2633014588696825, "grad_norm": 0.4716921146689399, "learning_rate": 8.639320815874087e-06, "loss": 0.4311, "step": 8591 }, { "epoch": 0.2633321073924237, "grad_norm": 1.2945375463771391, "learning_rate": 8.638980462210455e-06, "loss": 0.7289, "step": 8592 }, { "epoch": 0.2633627559151649, "grad_norm": 1.1677228074079644, "learning_rate": 8.638640072691017e-06, "loss": 0.7146, "step": 8593 }, { "epoch": 0.2633934044379061, "grad_norm": 1.328120068308841, "learning_rate": 8.63829964731913e-06, "loss": 0.6747, "step": 8594 }, { "epoch": 0.2634240529606473, "grad_norm": 1.2324678025756033, "learning_rate": 8.637959186098143e-06, "loss": 0.6998, "step": 8595 }, { "epoch": 0.2634547014833885, "grad_norm": 1.327922382830414, "learning_rate": 8.637618689031415e-06, "loss": 0.6275, "step": 8596 }, { "epoch": 0.2634853500061297, "grad_norm": 1.0866065092387551, "learning_rate": 8.6372781561223e-06, "loss": 0.5911, "step": 8597 }, { "epoch": 0.2635159985288709, "grad_norm": 1.322211553113275, "learning_rate": 8.636937587374152e-06, "loss": 0.5754, "step": 8598 }, { "epoch": 0.2635466470516121, "grad_norm": 1.1450150851352279, "learning_rate": 8.636596982790327e-06, "loss": 0.6007, "step": 8599 }, { "epoch": 0.26357729557435333, "grad_norm": 1.2260234662435925, "learning_rate": 8.63625634237418e-06, "loss": 0.755, "step": 8600 }, { "epoch": 0.26360794409709454, "grad_norm": 1.1393148933189612, "learning_rate": 8.63591566612907e-06, "loss": 0.6889, "step": 8601 }, { "epoch": 0.26363859261983574, "grad_norm": 0.5296177499686503, "learning_rate": 8.635574954058355e-06, "loss": 0.4468, "step": 8602 }, { "epoch": 0.26366924114257695, "grad_norm": 1.1777110105148862, "learning_rate": 8.635234206165386e-06, "loss": 0.6347, "step": 8603 }, { "epoch": 0.26369988966531815, "grad_norm": 1.1780981662533603, "learning_rate": 8.634893422453527e-06, "loss": 0.5914, "step": 8604 }, { "epoch": 0.26373053818805936, "grad_norm": 1.112873185251005, "learning_rate": 8.634552602926133e-06, "loss": 0.6, "step": 8605 }, { "epoch": 0.26376118671080057, "grad_norm": 1.2738846401807902, "learning_rate": 8.63421174758656e-06, "loss": 0.5823, "step": 8606 }, { "epoch": 0.2637918352335417, "grad_norm": 0.48134105466264426, "learning_rate": 8.63387085643817e-06, "loss": 0.4359, "step": 8607 }, { "epoch": 0.2638224837562829, "grad_norm": 1.2420117591201862, "learning_rate": 8.633529929484322e-06, "loss": 0.6983, "step": 8608 }, { "epoch": 0.2638531322790241, "grad_norm": 1.1993494263299354, "learning_rate": 8.633188966728374e-06, "loss": 0.7048, "step": 8609 }, { "epoch": 0.26388378080176533, "grad_norm": 1.315443478545406, "learning_rate": 8.632847968173683e-06, "loss": 0.752, "step": 8610 }, { "epoch": 0.26391442932450654, "grad_norm": 0.45751785184515786, "learning_rate": 8.632506933823613e-06, "loss": 0.4502, "step": 8611 }, { "epoch": 0.26394507784724774, "grad_norm": 1.2730998173052939, "learning_rate": 8.632165863681523e-06, "loss": 0.6746, "step": 8612 }, { "epoch": 0.26397572636998895, "grad_norm": 1.3211116452891247, "learning_rate": 8.631824757750774e-06, "loss": 0.6738, "step": 8613 }, { "epoch": 0.26400637489273016, "grad_norm": 1.4053299821934562, "learning_rate": 8.631483616034725e-06, "loss": 0.6808, "step": 8614 }, { "epoch": 0.26403702341547136, "grad_norm": 1.1309624077140374, "learning_rate": 8.631142438536739e-06, "loss": 0.6838, "step": 8615 }, { "epoch": 0.26406767193821257, "grad_norm": 1.2128990306114875, "learning_rate": 8.630801225260177e-06, "loss": 0.7464, "step": 8616 }, { "epoch": 0.2640983204609538, "grad_norm": 1.2722265450298695, "learning_rate": 8.630459976208403e-06, "loss": 0.6383, "step": 8617 }, { "epoch": 0.264128968983695, "grad_norm": 1.2972006027800447, "learning_rate": 8.630118691384776e-06, "loss": 0.6403, "step": 8618 }, { "epoch": 0.2641596175064362, "grad_norm": 1.591901936374825, "learning_rate": 8.629777370792663e-06, "loss": 0.621, "step": 8619 }, { "epoch": 0.2641902660291774, "grad_norm": 1.2527058427089233, "learning_rate": 8.629436014435424e-06, "loss": 0.6677, "step": 8620 }, { "epoch": 0.2642209145519186, "grad_norm": 1.1290763658430247, "learning_rate": 8.629094622316423e-06, "loss": 0.6585, "step": 8621 }, { "epoch": 0.2642515630746598, "grad_norm": 1.1924127946091654, "learning_rate": 8.628753194439024e-06, "loss": 0.7534, "step": 8622 }, { "epoch": 0.264282211597401, "grad_norm": 1.25350850552528, "learning_rate": 8.628411730806592e-06, "loss": 0.6211, "step": 8623 }, { "epoch": 0.2643128601201422, "grad_norm": 0.5561489714576224, "learning_rate": 8.62807023142249e-06, "loss": 0.4247, "step": 8624 }, { "epoch": 0.2643435086428834, "grad_norm": 1.1554004913549092, "learning_rate": 8.627728696290084e-06, "loss": 0.7624, "step": 8625 }, { "epoch": 0.2643741571656246, "grad_norm": 1.3575000895019993, "learning_rate": 8.62738712541274e-06, "loss": 0.6645, "step": 8626 }, { "epoch": 0.26440480568836583, "grad_norm": 1.3174359355895584, "learning_rate": 8.627045518793821e-06, "loss": 0.6105, "step": 8627 }, { "epoch": 0.26443545421110703, "grad_norm": 1.3769065035388905, "learning_rate": 8.626703876436695e-06, "loss": 0.7578, "step": 8628 }, { "epoch": 0.26446610273384824, "grad_norm": 1.2627599135678724, "learning_rate": 8.626362198344728e-06, "loss": 0.6368, "step": 8629 }, { "epoch": 0.26449675125658945, "grad_norm": 1.3186769995705907, "learning_rate": 8.626020484521287e-06, "loss": 0.6304, "step": 8630 }, { "epoch": 0.26452739977933065, "grad_norm": 0.49793313920943605, "learning_rate": 8.625678734969737e-06, "loss": 0.433, "step": 8631 }, { "epoch": 0.26455804830207186, "grad_norm": 1.3092478758697483, "learning_rate": 8.625336949693448e-06, "loss": 0.6847, "step": 8632 }, { "epoch": 0.26458869682481306, "grad_norm": 1.2656707043656745, "learning_rate": 8.624995128695785e-06, "loss": 0.6733, "step": 8633 }, { "epoch": 0.26461934534755427, "grad_norm": 1.1699667171384556, "learning_rate": 8.62465327198012e-06, "loss": 0.7067, "step": 8634 }, { "epoch": 0.2646499938702955, "grad_norm": 0.49641905679691045, "learning_rate": 8.624311379549817e-06, "loss": 0.4503, "step": 8635 }, { "epoch": 0.2646806423930367, "grad_norm": 1.432341768021552, "learning_rate": 8.623969451408248e-06, "loss": 0.7082, "step": 8636 }, { "epoch": 0.2647112909157779, "grad_norm": 1.187318779955506, "learning_rate": 8.623627487558779e-06, "loss": 0.6777, "step": 8637 }, { "epoch": 0.26474193943851904, "grad_norm": 1.2409194568870447, "learning_rate": 8.623285488004781e-06, "loss": 0.7752, "step": 8638 }, { "epoch": 0.26477258796126024, "grad_norm": 1.447315463028888, "learning_rate": 8.622943452749626e-06, "loss": 0.7036, "step": 8639 }, { "epoch": 0.26480323648400145, "grad_norm": 1.080404931002666, "learning_rate": 8.62260138179668e-06, "loss": 0.6606, "step": 8640 }, { "epoch": 0.26483388500674265, "grad_norm": 1.2781218747170582, "learning_rate": 8.622259275149317e-06, "loss": 0.7046, "step": 8641 }, { "epoch": 0.26486453352948386, "grad_norm": 1.3553202454694946, "learning_rate": 8.621917132810906e-06, "loss": 0.8086, "step": 8642 }, { "epoch": 0.26489518205222506, "grad_norm": 1.141972865609203, "learning_rate": 8.621574954784821e-06, "loss": 0.6604, "step": 8643 }, { "epoch": 0.26492583057496627, "grad_norm": 1.4370042800745348, "learning_rate": 8.621232741074429e-06, "loss": 0.6318, "step": 8644 }, { "epoch": 0.2649564790977075, "grad_norm": 0.5008130287107083, "learning_rate": 8.620890491683105e-06, "loss": 0.4527, "step": 8645 }, { "epoch": 0.2649871276204487, "grad_norm": 1.2501227345421964, "learning_rate": 8.62054820661422e-06, "loss": 0.5595, "step": 8646 }, { "epoch": 0.2650177761431899, "grad_norm": 1.4422193821481577, "learning_rate": 8.620205885871147e-06, "loss": 0.6428, "step": 8647 }, { "epoch": 0.2650484246659311, "grad_norm": 1.3948348039064495, "learning_rate": 8.61986352945726e-06, "loss": 0.6841, "step": 8648 }, { "epoch": 0.2650790731886723, "grad_norm": 1.2522391095196428, "learning_rate": 8.619521137375932e-06, "loss": 0.6484, "step": 8649 }, { "epoch": 0.2651097217114135, "grad_norm": 0.4821605708264371, "learning_rate": 8.619178709630536e-06, "loss": 0.436, "step": 8650 }, { "epoch": 0.2651403702341547, "grad_norm": 1.2761692520008454, "learning_rate": 8.618836246224444e-06, "loss": 0.7505, "step": 8651 }, { "epoch": 0.2651710187568959, "grad_norm": 1.20898863822313, "learning_rate": 8.618493747161034e-06, "loss": 0.6882, "step": 8652 }, { "epoch": 0.2652016672796371, "grad_norm": 1.2224303353779782, "learning_rate": 8.618151212443679e-06, "loss": 0.7131, "step": 8653 }, { "epoch": 0.2652323158023783, "grad_norm": 1.1966190084115884, "learning_rate": 8.617808642075756e-06, "loss": 0.6493, "step": 8654 }, { "epoch": 0.26526296432511953, "grad_norm": 1.1849975448291556, "learning_rate": 8.617466036060638e-06, "loss": 0.7389, "step": 8655 }, { "epoch": 0.26529361284786074, "grad_norm": 1.3286797274219913, "learning_rate": 8.6171233944017e-06, "loss": 0.7705, "step": 8656 }, { "epoch": 0.26532426137060194, "grad_norm": 1.5072073879483199, "learning_rate": 8.61678071710232e-06, "loss": 0.7159, "step": 8657 }, { "epoch": 0.26535490989334315, "grad_norm": 1.161133357145324, "learning_rate": 8.616438004165876e-06, "loss": 0.6371, "step": 8658 }, { "epoch": 0.26538555841608436, "grad_norm": 0.4698651688662234, "learning_rate": 8.616095255595743e-06, "loss": 0.4488, "step": 8659 }, { "epoch": 0.26541620693882556, "grad_norm": 1.2715225150849758, "learning_rate": 8.615752471395296e-06, "loss": 0.7617, "step": 8660 }, { "epoch": 0.26544685546156677, "grad_norm": 1.2984254430267599, "learning_rate": 8.615409651567916e-06, "loss": 0.6788, "step": 8661 }, { "epoch": 0.26547750398430797, "grad_norm": 1.3089667932238758, "learning_rate": 8.61506679611698e-06, "loss": 0.7212, "step": 8662 }, { "epoch": 0.2655081525070492, "grad_norm": 1.560701985325999, "learning_rate": 8.614723905045865e-06, "loss": 0.7222, "step": 8663 }, { "epoch": 0.2655388010297904, "grad_norm": 1.3690436228645269, "learning_rate": 8.61438097835795e-06, "loss": 0.6142, "step": 8664 }, { "epoch": 0.2655694495525316, "grad_norm": 1.2416011114717036, "learning_rate": 8.614038016056617e-06, "loss": 0.6689, "step": 8665 }, { "epoch": 0.2656000980752728, "grad_norm": 1.362533440074303, "learning_rate": 8.613695018145241e-06, "loss": 0.7422, "step": 8666 }, { "epoch": 0.265630746598014, "grad_norm": 1.336682697718301, "learning_rate": 8.613351984627204e-06, "loss": 0.7318, "step": 8667 }, { "epoch": 0.2656613951207552, "grad_norm": 1.3634543371517414, "learning_rate": 8.613008915505885e-06, "loss": 0.6836, "step": 8668 }, { "epoch": 0.26569204364349636, "grad_norm": 1.6941297836773683, "learning_rate": 8.612665810784664e-06, "loss": 0.7975, "step": 8669 }, { "epoch": 0.26572269216623756, "grad_norm": 1.2196094895642977, "learning_rate": 8.612322670466924e-06, "loss": 0.6611, "step": 8670 }, { "epoch": 0.26575334068897877, "grad_norm": 1.480189151313414, "learning_rate": 8.611979494556043e-06, "loss": 0.7113, "step": 8671 }, { "epoch": 0.26578398921172, "grad_norm": 1.3113801227735293, "learning_rate": 8.611636283055405e-06, "loss": 0.7313, "step": 8672 }, { "epoch": 0.2658146377344612, "grad_norm": 1.2661186902767032, "learning_rate": 8.61129303596839e-06, "loss": 0.6509, "step": 8673 }, { "epoch": 0.2658452862572024, "grad_norm": 1.1760592185246261, "learning_rate": 8.61094975329838e-06, "loss": 0.6805, "step": 8674 }, { "epoch": 0.2658759347799436, "grad_norm": 1.1330909640030182, "learning_rate": 8.610606435048761e-06, "loss": 0.6596, "step": 8675 }, { "epoch": 0.2659065833026848, "grad_norm": 1.3798149952779648, "learning_rate": 8.61026308122291e-06, "loss": 0.6743, "step": 8676 }, { "epoch": 0.265937231825426, "grad_norm": 0.47045708907567657, "learning_rate": 8.609919691824213e-06, "loss": 0.4418, "step": 8677 }, { "epoch": 0.2659678803481672, "grad_norm": 1.2025084355221034, "learning_rate": 8.609576266856057e-06, "loss": 0.6362, "step": 8678 }, { "epoch": 0.2659985288709084, "grad_norm": 0.476742035182971, "learning_rate": 8.60923280632182e-06, "loss": 0.4731, "step": 8679 }, { "epoch": 0.2660291773936496, "grad_norm": 1.3647930971872593, "learning_rate": 8.608889310224888e-06, "loss": 0.7024, "step": 8680 }, { "epoch": 0.2660598259163908, "grad_norm": 1.2800125112825556, "learning_rate": 8.608545778568648e-06, "loss": 0.6778, "step": 8681 }, { "epoch": 0.26609047443913203, "grad_norm": 1.2765534483511527, "learning_rate": 8.608202211356483e-06, "loss": 0.6949, "step": 8682 }, { "epoch": 0.26612112296187324, "grad_norm": 1.4569641963854254, "learning_rate": 8.607858608591778e-06, "loss": 0.6941, "step": 8683 }, { "epoch": 0.26615177148461444, "grad_norm": 0.4562390485028987, "learning_rate": 8.607514970277917e-06, "loss": 0.4152, "step": 8684 }, { "epoch": 0.26618242000735565, "grad_norm": 1.2296803830408036, "learning_rate": 8.60717129641829e-06, "loss": 0.6708, "step": 8685 }, { "epoch": 0.26621306853009685, "grad_norm": 1.4393992018330397, "learning_rate": 8.606827587016281e-06, "loss": 0.6802, "step": 8686 }, { "epoch": 0.26624371705283806, "grad_norm": 0.4468916213508266, "learning_rate": 8.606483842075277e-06, "loss": 0.4417, "step": 8687 }, { "epoch": 0.26627436557557926, "grad_norm": 1.226959973101805, "learning_rate": 8.606140061598665e-06, "loss": 0.6805, "step": 8688 }, { "epoch": 0.26630501409832047, "grad_norm": 1.3912340531423484, "learning_rate": 8.605796245589833e-06, "loss": 0.8369, "step": 8689 }, { "epoch": 0.2663356626210617, "grad_norm": 1.0957875000744386, "learning_rate": 8.605452394052168e-06, "loss": 0.5819, "step": 8690 }, { "epoch": 0.2663663111438029, "grad_norm": 1.2147624318961814, "learning_rate": 8.605108506989057e-06, "loss": 0.7395, "step": 8691 }, { "epoch": 0.2663969596665441, "grad_norm": 1.231527124375371, "learning_rate": 8.604764584403888e-06, "loss": 0.7001, "step": 8692 }, { "epoch": 0.2664276081892853, "grad_norm": 1.2557913931911415, "learning_rate": 8.604420626300054e-06, "loss": 0.6646, "step": 8693 }, { "epoch": 0.2664582567120265, "grad_norm": 1.2114929755151955, "learning_rate": 8.60407663268094e-06, "loss": 0.7067, "step": 8694 }, { "epoch": 0.2664889052347677, "grad_norm": 0.5001719172359658, "learning_rate": 8.603732603549938e-06, "loss": 0.4488, "step": 8695 }, { "epoch": 0.2665195537575089, "grad_norm": 0.48642650310511404, "learning_rate": 8.603388538910435e-06, "loss": 0.4464, "step": 8696 }, { "epoch": 0.2665502022802501, "grad_norm": 1.410945621820425, "learning_rate": 8.603044438765824e-06, "loss": 0.6426, "step": 8697 }, { "epoch": 0.2665808508029913, "grad_norm": 1.3989717669564024, "learning_rate": 8.602700303119493e-06, "loss": 0.7863, "step": 8698 }, { "epoch": 0.2666114993257325, "grad_norm": 1.2948965150789717, "learning_rate": 8.602356131974837e-06, "loss": 0.8239, "step": 8699 }, { "epoch": 0.2666421478484737, "grad_norm": 1.1224814775747087, "learning_rate": 8.602011925335241e-06, "loss": 0.568, "step": 8700 }, { "epoch": 0.2666727963712149, "grad_norm": 1.106078896728433, "learning_rate": 8.601667683204101e-06, "loss": 0.6644, "step": 8701 }, { "epoch": 0.2667034448939561, "grad_norm": 1.2445330517929543, "learning_rate": 8.601323405584808e-06, "loss": 0.757, "step": 8702 }, { "epoch": 0.2667340934166973, "grad_norm": 1.3458067940202667, "learning_rate": 8.600979092480755e-06, "loss": 0.7305, "step": 8703 }, { "epoch": 0.2667647419394385, "grad_norm": 1.2697298181055563, "learning_rate": 8.600634743895332e-06, "loss": 0.6723, "step": 8704 }, { "epoch": 0.2667953904621797, "grad_norm": 1.4935537835256447, "learning_rate": 8.600290359831935e-06, "loss": 0.7901, "step": 8705 }, { "epoch": 0.2668260389849209, "grad_norm": 1.230629179762413, "learning_rate": 8.599945940293955e-06, "loss": 0.6611, "step": 8706 }, { "epoch": 0.2668566875076621, "grad_norm": 1.1814137530067246, "learning_rate": 8.599601485284787e-06, "loss": 0.6697, "step": 8707 }, { "epoch": 0.2668873360304033, "grad_norm": 1.2492698635870934, "learning_rate": 8.599256994807823e-06, "loss": 0.6404, "step": 8708 }, { "epoch": 0.2669179845531445, "grad_norm": 1.2821726709172785, "learning_rate": 8.598912468866461e-06, "loss": 0.7154, "step": 8709 }, { "epoch": 0.26694863307588573, "grad_norm": 1.441239739957361, "learning_rate": 8.598567907464093e-06, "loss": 0.6939, "step": 8710 }, { "epoch": 0.26697928159862694, "grad_norm": 1.2884184019913814, "learning_rate": 8.598223310604115e-06, "loss": 0.6692, "step": 8711 }, { "epoch": 0.26700993012136814, "grad_norm": 1.1945181605539412, "learning_rate": 8.597878678289921e-06, "loss": 0.7529, "step": 8712 }, { "epoch": 0.26704057864410935, "grad_norm": 1.3038284988869202, "learning_rate": 8.597534010524908e-06, "loss": 0.7607, "step": 8713 }, { "epoch": 0.26707122716685056, "grad_norm": 1.4705188601858608, "learning_rate": 8.597189307312472e-06, "loss": 0.7214, "step": 8714 }, { "epoch": 0.26710187568959176, "grad_norm": 1.3575305760187688, "learning_rate": 8.59684456865601e-06, "loss": 0.6758, "step": 8715 }, { "epoch": 0.26713252421233297, "grad_norm": 1.1827377581703356, "learning_rate": 8.596499794558918e-06, "loss": 0.608, "step": 8716 }, { "epoch": 0.2671631727350742, "grad_norm": 1.3490433208725154, "learning_rate": 8.596154985024594e-06, "loss": 0.6787, "step": 8717 }, { "epoch": 0.2671938212578154, "grad_norm": 1.4025778382966525, "learning_rate": 8.595810140056433e-06, "loss": 0.7531, "step": 8718 }, { "epoch": 0.2672244697805566, "grad_norm": 1.4150664578442715, "learning_rate": 8.595465259657837e-06, "loss": 0.7014, "step": 8719 }, { "epoch": 0.2672551183032978, "grad_norm": 1.38906292743763, "learning_rate": 8.5951203438322e-06, "loss": 0.6695, "step": 8720 }, { "epoch": 0.267285766826039, "grad_norm": 1.467491589428927, "learning_rate": 8.594775392582923e-06, "loss": 0.7157, "step": 8721 }, { "epoch": 0.2673164153487802, "grad_norm": 1.3113086221118575, "learning_rate": 8.594430405913403e-06, "loss": 0.7495, "step": 8722 }, { "epoch": 0.2673470638715214, "grad_norm": 1.2988779078205164, "learning_rate": 8.594085383827043e-06, "loss": 0.6359, "step": 8723 }, { "epoch": 0.2673777123942626, "grad_norm": 1.3187164244285752, "learning_rate": 8.593740326327237e-06, "loss": 0.6484, "step": 8724 }, { "epoch": 0.2674083609170038, "grad_norm": 1.2092205511227105, "learning_rate": 8.59339523341739e-06, "loss": 0.6993, "step": 8725 }, { "epoch": 0.267439009439745, "grad_norm": 1.1095890292531272, "learning_rate": 8.593050105100902e-06, "loss": 0.7379, "step": 8726 }, { "epoch": 0.26746965796248623, "grad_norm": 1.2019265375552661, "learning_rate": 8.59270494138117e-06, "loss": 0.6948, "step": 8727 }, { "epoch": 0.26750030648522743, "grad_norm": 1.2586661409318014, "learning_rate": 8.592359742261598e-06, "loss": 0.6886, "step": 8728 }, { "epoch": 0.26753095500796864, "grad_norm": 1.2633695322037037, "learning_rate": 8.592014507745586e-06, "loss": 0.6425, "step": 8729 }, { "epoch": 0.26756160353070985, "grad_norm": 1.045048964512166, "learning_rate": 8.591669237836534e-06, "loss": 0.6674, "step": 8730 }, { "epoch": 0.267592252053451, "grad_norm": 1.2695389316780146, "learning_rate": 8.591323932537847e-06, "loss": 0.7145, "step": 8731 }, { "epoch": 0.2676229005761922, "grad_norm": 1.4154139657383344, "learning_rate": 8.590978591852928e-06, "loss": 0.7272, "step": 8732 }, { "epoch": 0.2676535490989334, "grad_norm": 0.6243665947618138, "learning_rate": 8.590633215785178e-06, "loss": 0.4526, "step": 8733 }, { "epoch": 0.2676841976216746, "grad_norm": 0.5643724957934406, "learning_rate": 8.590287804337998e-06, "loss": 0.4571, "step": 8734 }, { "epoch": 0.2677148461444158, "grad_norm": 1.3052520983848566, "learning_rate": 8.589942357514796e-06, "loss": 0.5937, "step": 8735 }, { "epoch": 0.267745494667157, "grad_norm": 1.1568744205196337, "learning_rate": 8.589596875318973e-06, "loss": 0.6414, "step": 8736 }, { "epoch": 0.26777614318989823, "grad_norm": 1.3348830833688057, "learning_rate": 8.589251357753932e-06, "loss": 0.6464, "step": 8737 }, { "epoch": 0.26780679171263944, "grad_norm": 0.5040695617769613, "learning_rate": 8.58890580482308e-06, "loss": 0.4163, "step": 8738 }, { "epoch": 0.26783744023538064, "grad_norm": 1.1787167679345052, "learning_rate": 8.58856021652982e-06, "loss": 0.7422, "step": 8739 }, { "epoch": 0.26786808875812185, "grad_norm": 1.304647466793117, "learning_rate": 8.588214592877559e-06, "loss": 0.6557, "step": 8740 }, { "epoch": 0.26789873728086305, "grad_norm": 1.1793650829179645, "learning_rate": 8.587868933869703e-06, "loss": 0.7019, "step": 8741 }, { "epoch": 0.26792938580360426, "grad_norm": 1.5226170616330843, "learning_rate": 8.587523239509653e-06, "loss": 0.7013, "step": 8742 }, { "epoch": 0.26796003432634546, "grad_norm": 1.3111633942103866, "learning_rate": 8.587177509800823e-06, "loss": 0.6704, "step": 8743 }, { "epoch": 0.26799068284908667, "grad_norm": 1.3964101152715618, "learning_rate": 8.586831744746611e-06, "loss": 0.7093, "step": 8744 }, { "epoch": 0.2680213313718279, "grad_norm": 1.2424757858804407, "learning_rate": 8.586485944350432e-06, "loss": 0.6413, "step": 8745 }, { "epoch": 0.2680519798945691, "grad_norm": 1.3245153583053368, "learning_rate": 8.586140108615685e-06, "loss": 0.7073, "step": 8746 }, { "epoch": 0.2680826284173103, "grad_norm": 1.1748758983630276, "learning_rate": 8.585794237545784e-06, "loss": 0.6581, "step": 8747 }, { "epoch": 0.2681132769400515, "grad_norm": 1.3440942953759811, "learning_rate": 8.585448331144135e-06, "loss": 0.6714, "step": 8748 }, { "epoch": 0.2681439254627927, "grad_norm": 1.812484643370614, "learning_rate": 8.585102389414147e-06, "loss": 0.7835, "step": 8749 }, { "epoch": 0.2681745739855339, "grad_norm": 1.1042423799999965, "learning_rate": 8.584756412359228e-06, "loss": 0.5351, "step": 8750 }, { "epoch": 0.2682052225082751, "grad_norm": 1.2199918431673493, "learning_rate": 8.584410399982786e-06, "loss": 0.7544, "step": 8751 }, { "epoch": 0.2682358710310163, "grad_norm": 1.4386384182214764, "learning_rate": 8.58406435228823e-06, "loss": 0.7764, "step": 8752 }, { "epoch": 0.2682665195537575, "grad_norm": 1.508413011362873, "learning_rate": 8.583718269278972e-06, "loss": 0.7076, "step": 8753 }, { "epoch": 0.2682971680764987, "grad_norm": 1.2661918829324816, "learning_rate": 8.58337215095842e-06, "loss": 0.6323, "step": 8754 }, { "epoch": 0.26832781659923993, "grad_norm": 0.599758179130047, "learning_rate": 8.583025997329988e-06, "loss": 0.4369, "step": 8755 }, { "epoch": 0.26835846512198114, "grad_norm": 1.3598639762639828, "learning_rate": 8.58267980839708e-06, "loss": 0.6354, "step": 8756 }, { "epoch": 0.26838911364472234, "grad_norm": 0.5575804237277061, "learning_rate": 8.582333584163116e-06, "loss": 0.4456, "step": 8757 }, { "epoch": 0.26841976216746355, "grad_norm": 1.2233522978208144, "learning_rate": 8.5819873246315e-06, "loss": 0.6657, "step": 8758 }, { "epoch": 0.26845041069020475, "grad_norm": 1.6448184527251763, "learning_rate": 8.581641029805646e-06, "loss": 0.6884, "step": 8759 }, { "epoch": 0.26848105921294596, "grad_norm": 1.3025493318891919, "learning_rate": 8.581294699688966e-06, "loss": 0.6908, "step": 8760 }, { "epoch": 0.26851170773568717, "grad_norm": 1.3032225127763417, "learning_rate": 8.580948334284875e-06, "loss": 0.7523, "step": 8761 }, { "epoch": 0.2685423562584283, "grad_norm": 1.268148610269712, "learning_rate": 8.580601933596784e-06, "loss": 0.8184, "step": 8762 }, { "epoch": 0.2685730047811695, "grad_norm": 0.545961590870056, "learning_rate": 8.580255497628104e-06, "loss": 0.459, "step": 8763 }, { "epoch": 0.2686036533039107, "grad_norm": 1.3908567334495514, "learning_rate": 8.579909026382251e-06, "loss": 0.7296, "step": 8764 }, { "epoch": 0.26863430182665193, "grad_norm": 1.430042125502574, "learning_rate": 8.57956251986264e-06, "loss": 0.7367, "step": 8765 }, { "epoch": 0.26866495034939314, "grad_norm": 1.298436627314937, "learning_rate": 8.579215978072683e-06, "loss": 0.6977, "step": 8766 }, { "epoch": 0.26869559887213434, "grad_norm": 1.2278209521811416, "learning_rate": 8.578869401015794e-06, "loss": 0.6862, "step": 8767 }, { "epoch": 0.26872624739487555, "grad_norm": 1.1650279134340937, "learning_rate": 8.57852278869539e-06, "loss": 0.6357, "step": 8768 }, { "epoch": 0.26875689591761676, "grad_norm": 1.3375665303035664, "learning_rate": 8.578176141114886e-06, "loss": 0.733, "step": 8769 }, { "epoch": 0.26878754444035796, "grad_norm": 1.2306323303071136, "learning_rate": 8.577829458277695e-06, "loss": 0.6984, "step": 8770 }, { "epoch": 0.26881819296309917, "grad_norm": 1.2906511236707492, "learning_rate": 8.577482740187237e-06, "loss": 0.6338, "step": 8771 }, { "epoch": 0.2688488414858404, "grad_norm": 1.4349479910489698, "learning_rate": 8.577135986846925e-06, "loss": 0.8149, "step": 8772 }, { "epoch": 0.2688794900085816, "grad_norm": 1.3070675898034039, "learning_rate": 8.576789198260178e-06, "loss": 0.6631, "step": 8773 }, { "epoch": 0.2689101385313228, "grad_norm": 1.3143028058555357, "learning_rate": 8.57644237443041e-06, "loss": 0.7754, "step": 8774 }, { "epoch": 0.268940787054064, "grad_norm": 1.3098379253043282, "learning_rate": 8.576095515361043e-06, "loss": 0.6722, "step": 8775 }, { "epoch": 0.2689714355768052, "grad_norm": 0.6011414839445492, "learning_rate": 8.575748621055488e-06, "loss": 0.422, "step": 8776 }, { "epoch": 0.2690020840995464, "grad_norm": 1.2325552401583568, "learning_rate": 8.57540169151717e-06, "loss": 0.7148, "step": 8777 }, { "epoch": 0.2690327326222876, "grad_norm": 1.3069864162176925, "learning_rate": 8.575054726749503e-06, "loss": 0.7254, "step": 8778 }, { "epoch": 0.2690633811450288, "grad_norm": 1.354772539331866, "learning_rate": 8.574707726755909e-06, "loss": 0.6618, "step": 8779 }, { "epoch": 0.26909402966777, "grad_norm": 0.46151127841464445, "learning_rate": 8.574360691539803e-06, "loss": 0.4483, "step": 8780 }, { "epoch": 0.2691246781905112, "grad_norm": 1.535784814816863, "learning_rate": 8.574013621104607e-06, "loss": 0.7771, "step": 8781 }, { "epoch": 0.26915532671325243, "grad_norm": 1.1543028509132516, "learning_rate": 8.57366651545374e-06, "loss": 0.6144, "step": 8782 }, { "epoch": 0.26918597523599364, "grad_norm": 1.135006859827909, "learning_rate": 8.573319374590622e-06, "loss": 0.6532, "step": 8783 }, { "epoch": 0.26921662375873484, "grad_norm": 1.4088199212508088, "learning_rate": 8.572972198518676e-06, "loss": 0.7161, "step": 8784 }, { "epoch": 0.26924727228147605, "grad_norm": 1.232836782478266, "learning_rate": 8.57262498724132e-06, "loss": 0.6214, "step": 8785 }, { "epoch": 0.26927792080421725, "grad_norm": 1.155009259023921, "learning_rate": 8.572277740761976e-06, "loss": 0.7456, "step": 8786 }, { "epoch": 0.26930856932695846, "grad_norm": 0.5279901148011825, "learning_rate": 8.571930459084065e-06, "loss": 0.4263, "step": 8787 }, { "epoch": 0.26933921784969966, "grad_norm": 0.48963332244950664, "learning_rate": 8.571583142211009e-06, "loss": 0.4287, "step": 8788 }, { "epoch": 0.26936986637244087, "grad_norm": 0.4441361094908785, "learning_rate": 8.57123579014623e-06, "loss": 0.4576, "step": 8789 }, { "epoch": 0.2694005148951821, "grad_norm": 1.22241830123646, "learning_rate": 8.570888402893154e-06, "loss": 0.6944, "step": 8790 }, { "epoch": 0.2694311634179233, "grad_norm": 1.2601424711726086, "learning_rate": 8.570540980455197e-06, "loss": 0.773, "step": 8791 }, { "epoch": 0.2694618119406645, "grad_norm": 1.286016584023133, "learning_rate": 8.570193522835788e-06, "loss": 0.758, "step": 8792 }, { "epoch": 0.26949246046340564, "grad_norm": 1.3850115465127784, "learning_rate": 8.56984603003835e-06, "loss": 0.7409, "step": 8793 }, { "epoch": 0.26952310898614684, "grad_norm": 1.150029096387328, "learning_rate": 8.569498502066302e-06, "loss": 0.6273, "step": 8794 }, { "epoch": 0.26955375750888805, "grad_norm": 1.5153436040328154, "learning_rate": 8.569150938923077e-06, "loss": 0.7409, "step": 8795 }, { "epoch": 0.26958440603162925, "grad_norm": 0.5879895153132331, "learning_rate": 8.56880334061209e-06, "loss": 0.4488, "step": 8796 }, { "epoch": 0.26961505455437046, "grad_norm": 1.3909684450758535, "learning_rate": 8.568455707136774e-06, "loss": 0.7113, "step": 8797 }, { "epoch": 0.26964570307711166, "grad_norm": 1.114016538199736, "learning_rate": 8.568108038500548e-06, "loss": 0.5221, "step": 8798 }, { "epoch": 0.26967635159985287, "grad_norm": 0.5223724412907239, "learning_rate": 8.567760334706843e-06, "loss": 0.4322, "step": 8799 }, { "epoch": 0.2697070001225941, "grad_norm": 1.2940424147415543, "learning_rate": 8.56741259575908e-06, "loss": 0.6319, "step": 8800 }, { "epoch": 0.2697376486453353, "grad_norm": 1.2124796589671791, "learning_rate": 8.56706482166069e-06, "loss": 0.7408, "step": 8801 }, { "epoch": 0.2697682971680765, "grad_norm": 1.5664904611776265, "learning_rate": 8.566717012415096e-06, "loss": 0.6968, "step": 8802 }, { "epoch": 0.2697989456908177, "grad_norm": 1.898588582211376, "learning_rate": 8.56636916802573e-06, "loss": 0.6678, "step": 8803 }, { "epoch": 0.2698295942135589, "grad_norm": 1.3736262780226907, "learning_rate": 8.566021288496013e-06, "loss": 0.7374, "step": 8804 }, { "epoch": 0.2698602427363001, "grad_norm": 1.217410398460211, "learning_rate": 8.565673373829375e-06, "loss": 0.6343, "step": 8805 }, { "epoch": 0.2698908912590413, "grad_norm": 1.1855646206109272, "learning_rate": 8.565325424029248e-06, "loss": 0.7335, "step": 8806 }, { "epoch": 0.2699215397817825, "grad_norm": 1.3512867796223762, "learning_rate": 8.564977439099056e-06, "loss": 0.6603, "step": 8807 }, { "epoch": 0.2699521883045237, "grad_norm": 1.338445936816909, "learning_rate": 8.564629419042227e-06, "loss": 0.6513, "step": 8808 }, { "epoch": 0.2699828368272649, "grad_norm": 1.1978808998321142, "learning_rate": 8.564281363862196e-06, "loss": 0.6593, "step": 8809 }, { "epoch": 0.27001348535000613, "grad_norm": 1.3144724129354504, "learning_rate": 8.563933273562387e-06, "loss": 0.7209, "step": 8810 }, { "epoch": 0.27004413387274734, "grad_norm": 1.2812572577608783, "learning_rate": 8.563585148146231e-06, "loss": 0.6765, "step": 8811 }, { "epoch": 0.27007478239548854, "grad_norm": 1.1137740753129801, "learning_rate": 8.56323698761716e-06, "loss": 0.641, "step": 8812 }, { "epoch": 0.27010543091822975, "grad_norm": 1.3781894137013548, "learning_rate": 8.562888791978604e-06, "loss": 0.6782, "step": 8813 }, { "epoch": 0.27013607944097096, "grad_norm": 1.4820354634169126, "learning_rate": 8.562540561233991e-06, "loss": 0.5862, "step": 8814 }, { "epoch": 0.27016672796371216, "grad_norm": 1.263588806458761, "learning_rate": 8.562192295386756e-06, "loss": 0.6731, "step": 8815 }, { "epoch": 0.27019737648645337, "grad_norm": 0.7168664983341272, "learning_rate": 8.561843994440327e-06, "loss": 0.4652, "step": 8816 }, { "epoch": 0.2702280250091946, "grad_norm": 1.1457955775852589, "learning_rate": 8.56149565839814e-06, "loss": 0.6769, "step": 8817 }, { "epoch": 0.2702586735319358, "grad_norm": 1.2769055985822417, "learning_rate": 8.561147287263623e-06, "loss": 0.7201, "step": 8818 }, { "epoch": 0.270289322054677, "grad_norm": 1.3227944609709545, "learning_rate": 8.560798881040211e-06, "loss": 0.7306, "step": 8819 }, { "epoch": 0.2703199705774182, "grad_norm": 0.4857582866009371, "learning_rate": 8.560450439731337e-06, "loss": 0.4641, "step": 8820 }, { "epoch": 0.2703506191001594, "grad_norm": 1.236719320568433, "learning_rate": 8.560101963340434e-06, "loss": 0.6105, "step": 8821 }, { "epoch": 0.2703812676229006, "grad_norm": 0.4647109892894141, "learning_rate": 8.559753451870936e-06, "loss": 0.4451, "step": 8822 }, { "epoch": 0.2704119161456418, "grad_norm": 1.2185357825920655, "learning_rate": 8.559404905326275e-06, "loss": 0.7923, "step": 8823 }, { "epoch": 0.27044256466838296, "grad_norm": 1.204977843359566, "learning_rate": 8.559056323709889e-06, "loss": 0.6526, "step": 8824 }, { "epoch": 0.27047321319112416, "grad_norm": 1.289073139015455, "learning_rate": 8.558707707025209e-06, "loss": 0.612, "step": 8825 }, { "epoch": 0.27050386171386537, "grad_norm": 1.2082177621993675, "learning_rate": 8.558359055275671e-06, "loss": 0.6016, "step": 8826 }, { "epoch": 0.2705345102366066, "grad_norm": 1.2768987557772857, "learning_rate": 8.558010368464711e-06, "loss": 0.7184, "step": 8827 }, { "epoch": 0.2705651587593478, "grad_norm": 1.2039169304439696, "learning_rate": 8.557661646595766e-06, "loss": 0.6246, "step": 8828 }, { "epoch": 0.270595807282089, "grad_norm": 1.2444596316799257, "learning_rate": 8.557312889672267e-06, "loss": 0.7565, "step": 8829 }, { "epoch": 0.2706264558048302, "grad_norm": 1.2555773854340726, "learning_rate": 8.556964097697657e-06, "loss": 0.7886, "step": 8830 }, { "epoch": 0.2706571043275714, "grad_norm": 1.1708250694916815, "learning_rate": 8.556615270675368e-06, "loss": 0.7365, "step": 8831 }, { "epoch": 0.2706877528503126, "grad_norm": 0.591493930527507, "learning_rate": 8.55626640860884e-06, "loss": 0.4252, "step": 8832 }, { "epoch": 0.2707184013730538, "grad_norm": 0.5156416976271654, "learning_rate": 8.555917511501508e-06, "loss": 0.417, "step": 8833 }, { "epoch": 0.270749049895795, "grad_norm": 1.342282625988881, "learning_rate": 8.555568579356813e-06, "loss": 0.7146, "step": 8834 }, { "epoch": 0.2707796984185362, "grad_norm": 1.2368741503998841, "learning_rate": 8.55521961217819e-06, "loss": 0.703, "step": 8835 }, { "epoch": 0.2708103469412774, "grad_norm": 1.4559279401507983, "learning_rate": 8.554870609969077e-06, "loss": 0.8033, "step": 8836 }, { "epoch": 0.27084099546401863, "grad_norm": 0.504188245432934, "learning_rate": 8.554521572732916e-06, "loss": 0.4407, "step": 8837 }, { "epoch": 0.27087164398675984, "grad_norm": 1.2162242476435734, "learning_rate": 8.554172500473144e-06, "loss": 0.7639, "step": 8838 }, { "epoch": 0.27090229250950104, "grad_norm": 1.1832784141444548, "learning_rate": 8.553823393193201e-06, "loss": 0.747, "step": 8839 }, { "epoch": 0.27093294103224225, "grad_norm": 1.2952432674687926, "learning_rate": 8.553474250896527e-06, "loss": 0.8286, "step": 8840 }, { "epoch": 0.27096358955498345, "grad_norm": 1.1436961656025628, "learning_rate": 8.553125073586561e-06, "loss": 0.6861, "step": 8841 }, { "epoch": 0.27099423807772466, "grad_norm": 1.264989018064216, "learning_rate": 8.552775861266745e-06, "loss": 0.6037, "step": 8842 }, { "epoch": 0.27102488660046586, "grad_norm": 1.2529810143861086, "learning_rate": 8.552426613940521e-06, "loss": 0.5845, "step": 8843 }, { "epoch": 0.27105553512320707, "grad_norm": 1.439012873216767, "learning_rate": 8.552077331611326e-06, "loss": 0.7688, "step": 8844 }, { "epoch": 0.2710861836459483, "grad_norm": 1.1793572549542843, "learning_rate": 8.551728014282607e-06, "loss": 0.7014, "step": 8845 }, { "epoch": 0.2711168321686895, "grad_norm": 1.3003828243695783, "learning_rate": 8.5513786619578e-06, "loss": 0.5897, "step": 8846 }, { "epoch": 0.2711474806914307, "grad_norm": 1.1706249618497713, "learning_rate": 8.551029274640353e-06, "loss": 0.6581, "step": 8847 }, { "epoch": 0.2711781292141719, "grad_norm": 1.462759820144905, "learning_rate": 8.550679852333705e-06, "loss": 0.7393, "step": 8848 }, { "epoch": 0.2712087777369131, "grad_norm": 1.383070831411537, "learning_rate": 8.5503303950413e-06, "loss": 0.668, "step": 8849 }, { "epoch": 0.2712394262596543, "grad_norm": 1.1701749881311538, "learning_rate": 8.549980902766582e-06, "loss": 0.6248, "step": 8850 }, { "epoch": 0.2712700747823955, "grad_norm": 1.2332443174706007, "learning_rate": 8.549631375512994e-06, "loss": 0.6806, "step": 8851 }, { "epoch": 0.2713007233051367, "grad_norm": 1.6057330922744562, "learning_rate": 8.549281813283978e-06, "loss": 0.7061, "step": 8852 }, { "epoch": 0.2713313718278779, "grad_norm": 1.3165609478527363, "learning_rate": 8.548932216082982e-06, "loss": 0.651, "step": 8853 }, { "epoch": 0.2713620203506191, "grad_norm": 1.273857887437751, "learning_rate": 8.548582583913447e-06, "loss": 0.6699, "step": 8854 }, { "epoch": 0.2713926688733603, "grad_norm": 1.2331420419170018, "learning_rate": 8.54823291677882e-06, "loss": 0.7548, "step": 8855 }, { "epoch": 0.2714233173961015, "grad_norm": 1.2785134583766578, "learning_rate": 8.547883214682549e-06, "loss": 0.764, "step": 8856 }, { "epoch": 0.2714539659188427, "grad_norm": 1.5792503696575182, "learning_rate": 8.547533477628073e-06, "loss": 0.7096, "step": 8857 }, { "epoch": 0.2714846144415839, "grad_norm": 1.5397807983141678, "learning_rate": 8.547183705618845e-06, "loss": 0.6843, "step": 8858 }, { "epoch": 0.2715152629643251, "grad_norm": 0.7302893657713051, "learning_rate": 8.546833898658309e-06, "loss": 0.4639, "step": 8859 }, { "epoch": 0.2715459114870663, "grad_norm": 1.2524634017559488, "learning_rate": 8.546484056749908e-06, "loss": 0.6487, "step": 8860 }, { "epoch": 0.2715765600098075, "grad_norm": 1.328264361491448, "learning_rate": 8.546134179897095e-06, "loss": 0.6898, "step": 8861 }, { "epoch": 0.2716072085325487, "grad_norm": 1.4318214541347323, "learning_rate": 8.545784268103312e-06, "loss": 0.805, "step": 8862 }, { "epoch": 0.2716378570552899, "grad_norm": 1.1977357681702092, "learning_rate": 8.54543432137201e-06, "loss": 0.6486, "step": 8863 }, { "epoch": 0.2716685055780311, "grad_norm": 1.3813401118479465, "learning_rate": 8.545084339706638e-06, "loss": 0.6614, "step": 8864 }, { "epoch": 0.27169915410077233, "grad_norm": 1.3448191611798836, "learning_rate": 8.544734323110641e-06, "loss": 0.7059, "step": 8865 }, { "epoch": 0.27172980262351354, "grad_norm": 1.2331490484973326, "learning_rate": 8.54438427158747e-06, "loss": 0.6494, "step": 8866 }, { "epoch": 0.27176045114625474, "grad_norm": 3.200269386822099, "learning_rate": 8.544034185140577e-06, "loss": 0.6549, "step": 8867 }, { "epoch": 0.27179109966899595, "grad_norm": 1.0692903031919894, "learning_rate": 8.543684063773406e-06, "loss": 0.6647, "step": 8868 }, { "epoch": 0.27182174819173716, "grad_norm": 1.2405526826307098, "learning_rate": 8.54333390748941e-06, "loss": 0.6487, "step": 8869 }, { "epoch": 0.27185239671447836, "grad_norm": 1.3468203164244146, "learning_rate": 8.542983716292037e-06, "loss": 0.6317, "step": 8870 }, { "epoch": 0.27188304523721957, "grad_norm": 1.0762366944796755, "learning_rate": 8.54263349018474e-06, "loss": 0.6924, "step": 8871 }, { "epoch": 0.2719136937599608, "grad_norm": 1.251087932713493, "learning_rate": 8.542283229170967e-06, "loss": 0.7645, "step": 8872 }, { "epoch": 0.271944342282702, "grad_norm": 1.2035497836952604, "learning_rate": 8.541932933254174e-06, "loss": 0.7531, "step": 8873 }, { "epoch": 0.2719749908054432, "grad_norm": 1.3743985106924228, "learning_rate": 8.541582602437808e-06, "loss": 0.6313, "step": 8874 }, { "epoch": 0.2720056393281844, "grad_norm": 1.2343739586193456, "learning_rate": 8.54123223672532e-06, "loss": 0.6709, "step": 8875 }, { "epoch": 0.2720362878509256, "grad_norm": 1.1732706644049402, "learning_rate": 8.540881836120169e-06, "loss": 0.7197, "step": 8876 }, { "epoch": 0.2720669363736668, "grad_norm": 1.1914568491515551, "learning_rate": 8.540531400625802e-06, "loss": 0.6956, "step": 8877 }, { "epoch": 0.272097584896408, "grad_norm": 1.3108781884134901, "learning_rate": 8.540180930245671e-06, "loss": 0.7702, "step": 8878 }, { "epoch": 0.2721282334191492, "grad_norm": 1.3866153857249275, "learning_rate": 8.539830424983236e-06, "loss": 0.7366, "step": 8879 }, { "epoch": 0.2721588819418904, "grad_norm": 0.6213895769363987, "learning_rate": 8.539479884841941e-06, "loss": 0.4462, "step": 8880 }, { "epoch": 0.2721895304646316, "grad_norm": 1.3131293568499325, "learning_rate": 8.539129309825249e-06, "loss": 0.64, "step": 8881 }, { "epoch": 0.27222017898737283, "grad_norm": 1.2809818657790548, "learning_rate": 8.538778699936608e-06, "loss": 0.7646, "step": 8882 }, { "epoch": 0.27225082751011404, "grad_norm": 1.355941891087174, "learning_rate": 8.538428055179476e-06, "loss": 0.6634, "step": 8883 }, { "epoch": 0.27228147603285524, "grad_norm": 0.43631666465706376, "learning_rate": 8.538077375557308e-06, "loss": 0.4552, "step": 8884 }, { "epoch": 0.27231212455559645, "grad_norm": 1.2833727780224957, "learning_rate": 8.537726661073556e-06, "loss": 0.7209, "step": 8885 }, { "epoch": 0.2723427730783376, "grad_norm": 1.1623021435506198, "learning_rate": 8.53737591173168e-06, "loss": 0.6114, "step": 8886 }, { "epoch": 0.2723734216010788, "grad_norm": 1.323454299705176, "learning_rate": 8.537025127535132e-06, "loss": 0.7023, "step": 8887 }, { "epoch": 0.27240407012382, "grad_norm": 1.2361905991213888, "learning_rate": 8.536674308487373e-06, "loss": 0.6673, "step": 8888 }, { "epoch": 0.2724347186465612, "grad_norm": 1.1725283768095554, "learning_rate": 8.536323454591855e-06, "loss": 0.6782, "step": 8889 }, { "epoch": 0.2724653671693024, "grad_norm": 0.5172299589586252, "learning_rate": 8.535972565852038e-06, "loss": 0.4392, "step": 8890 }, { "epoch": 0.2724960156920436, "grad_norm": 1.2502444884500192, "learning_rate": 8.53562164227138e-06, "loss": 0.6389, "step": 8891 }, { "epoch": 0.27252666421478483, "grad_norm": 1.424230808787545, "learning_rate": 8.535270683853336e-06, "loss": 0.809, "step": 8892 }, { "epoch": 0.27255731273752604, "grad_norm": 0.4864486683115122, "learning_rate": 8.534919690601365e-06, "loss": 0.4308, "step": 8893 }, { "epoch": 0.27258796126026724, "grad_norm": 0.46247106731876125, "learning_rate": 8.534568662518926e-06, "loss": 0.4398, "step": 8894 }, { "epoch": 0.27261860978300845, "grad_norm": 1.1553656974397652, "learning_rate": 8.534217599609479e-06, "loss": 0.706, "step": 8895 }, { "epoch": 0.27264925830574965, "grad_norm": 1.2216906850038722, "learning_rate": 8.53386650187648e-06, "loss": 0.8133, "step": 8896 }, { "epoch": 0.27267990682849086, "grad_norm": 1.2324324945326783, "learning_rate": 8.53351536932339e-06, "loss": 0.7529, "step": 8897 }, { "epoch": 0.27271055535123206, "grad_norm": 0.4952428393843031, "learning_rate": 8.533164201953673e-06, "loss": 0.4536, "step": 8898 }, { "epoch": 0.27274120387397327, "grad_norm": 1.1537512337530704, "learning_rate": 8.53281299977078e-06, "loss": 0.6509, "step": 8899 }, { "epoch": 0.2727718523967145, "grad_norm": 1.2735756519924792, "learning_rate": 8.532461762778179e-06, "loss": 0.6469, "step": 8900 }, { "epoch": 0.2728025009194557, "grad_norm": 1.3040426801284302, "learning_rate": 8.532110490979327e-06, "loss": 0.6572, "step": 8901 }, { "epoch": 0.2728331494421969, "grad_norm": 1.2314900773819613, "learning_rate": 8.531759184377688e-06, "loss": 0.7294, "step": 8902 }, { "epoch": 0.2728637979649381, "grad_norm": 1.244620646280333, "learning_rate": 8.531407842976722e-06, "loss": 0.7314, "step": 8903 }, { "epoch": 0.2728944464876793, "grad_norm": 1.2855158245278362, "learning_rate": 8.53105646677989e-06, "loss": 0.6033, "step": 8904 }, { "epoch": 0.2729250950104205, "grad_norm": 1.2616429634433, "learning_rate": 8.530705055790655e-06, "loss": 0.691, "step": 8905 }, { "epoch": 0.2729557435331617, "grad_norm": 1.2194628109695653, "learning_rate": 8.530353610012482e-06, "loss": 0.5514, "step": 8906 }, { "epoch": 0.2729863920559029, "grad_norm": 1.3326208978667382, "learning_rate": 8.530002129448828e-06, "loss": 0.7346, "step": 8907 }, { "epoch": 0.2730170405786441, "grad_norm": 0.5545876316545734, "learning_rate": 8.529650614103163e-06, "loss": 0.4469, "step": 8908 }, { "epoch": 0.2730476891013853, "grad_norm": 1.2854844100847025, "learning_rate": 8.529299063978947e-06, "loss": 0.7314, "step": 8909 }, { "epoch": 0.27307833762412653, "grad_norm": 1.1974008598091717, "learning_rate": 8.528947479079644e-06, "loss": 0.7193, "step": 8910 }, { "epoch": 0.27310898614686774, "grad_norm": 1.411472215341912, "learning_rate": 8.528595859408718e-06, "loss": 0.7509, "step": 8911 }, { "epoch": 0.27313963466960894, "grad_norm": 1.537405742713896, "learning_rate": 8.528244204969633e-06, "loss": 0.6884, "step": 8912 }, { "epoch": 0.27317028319235015, "grad_norm": 1.5238478344665054, "learning_rate": 8.527892515765858e-06, "loss": 0.7209, "step": 8913 }, { "epoch": 0.27320093171509136, "grad_norm": 1.3918064457349764, "learning_rate": 8.527540791800853e-06, "loss": 0.6781, "step": 8914 }, { "epoch": 0.27323158023783256, "grad_norm": 0.47507756499518866, "learning_rate": 8.527189033078087e-06, "loss": 0.454, "step": 8915 }, { "epoch": 0.27326222876057377, "grad_norm": 1.3493565085979182, "learning_rate": 8.526837239601025e-06, "loss": 0.8294, "step": 8916 }, { "epoch": 0.2732928772833149, "grad_norm": 1.2615497855847786, "learning_rate": 8.526485411373133e-06, "loss": 0.673, "step": 8917 }, { "epoch": 0.2733235258060561, "grad_norm": 1.114923803093921, "learning_rate": 8.52613354839788e-06, "loss": 0.6438, "step": 8918 }, { "epoch": 0.27335417432879733, "grad_norm": 1.4007204372230913, "learning_rate": 8.525781650678728e-06, "loss": 0.7831, "step": 8919 }, { "epoch": 0.27338482285153853, "grad_norm": 1.413983103677362, "learning_rate": 8.525429718219149e-06, "loss": 0.7018, "step": 8920 }, { "epoch": 0.27341547137427974, "grad_norm": 0.4977338847202559, "learning_rate": 8.525077751022608e-06, "loss": 0.443, "step": 8921 }, { "epoch": 0.27344611989702095, "grad_norm": 1.3587576009773052, "learning_rate": 8.524725749092576e-06, "loss": 0.7701, "step": 8922 }, { "epoch": 0.27347676841976215, "grad_norm": 1.2834176211039046, "learning_rate": 8.524373712432516e-06, "loss": 0.6283, "step": 8923 }, { "epoch": 0.27350741694250336, "grad_norm": 1.1039218934148813, "learning_rate": 8.5240216410459e-06, "loss": 0.6581, "step": 8924 }, { "epoch": 0.27353806546524456, "grad_norm": 1.2991866649711261, "learning_rate": 8.5236695349362e-06, "loss": 0.7271, "step": 8925 }, { "epoch": 0.27356871398798577, "grad_norm": 1.0883589378349259, "learning_rate": 8.523317394106883e-06, "loss": 0.6072, "step": 8926 }, { "epoch": 0.273599362510727, "grad_norm": 1.1396845397792312, "learning_rate": 8.522965218561416e-06, "loss": 0.5558, "step": 8927 }, { "epoch": 0.2736300110334682, "grad_norm": 0.4708398162417961, "learning_rate": 8.522613008303272e-06, "loss": 0.4522, "step": 8928 }, { "epoch": 0.2736606595562094, "grad_norm": 1.2467325129326248, "learning_rate": 8.522260763335921e-06, "loss": 0.6693, "step": 8929 }, { "epoch": 0.2736913080789506, "grad_norm": 1.193928206396736, "learning_rate": 8.521908483662832e-06, "loss": 0.7528, "step": 8930 }, { "epoch": 0.2737219566016918, "grad_norm": 0.46726004073391275, "learning_rate": 8.52155616928748e-06, "loss": 0.4494, "step": 8931 }, { "epoch": 0.273752605124433, "grad_norm": 1.3147949888424413, "learning_rate": 8.52120382021333e-06, "loss": 0.6414, "step": 8932 }, { "epoch": 0.2737832536471742, "grad_norm": 1.3047204768693517, "learning_rate": 8.520851436443863e-06, "loss": 0.7212, "step": 8933 }, { "epoch": 0.2738139021699154, "grad_norm": 1.2967090044601541, "learning_rate": 8.520499017982543e-06, "loss": 0.6521, "step": 8934 }, { "epoch": 0.2738445506926566, "grad_norm": 1.1338822433547346, "learning_rate": 8.520146564832846e-06, "loss": 0.6103, "step": 8935 }, { "epoch": 0.2738751992153978, "grad_norm": 1.4055477605573354, "learning_rate": 8.519794076998244e-06, "loss": 0.7947, "step": 8936 }, { "epoch": 0.27390584773813903, "grad_norm": 1.2049305059322855, "learning_rate": 8.519441554482211e-06, "loss": 0.6044, "step": 8937 }, { "epoch": 0.27393649626088024, "grad_norm": 1.1610627905721773, "learning_rate": 8.51908899728822e-06, "loss": 0.6539, "step": 8938 }, { "epoch": 0.27396714478362144, "grad_norm": 1.2605999110249486, "learning_rate": 8.518736405419742e-06, "loss": 0.6532, "step": 8939 }, { "epoch": 0.27399779330636265, "grad_norm": 1.125662343721522, "learning_rate": 8.518383778880258e-06, "loss": 0.645, "step": 8940 }, { "epoch": 0.27402844182910385, "grad_norm": 1.341357763637812, "learning_rate": 8.518031117673236e-06, "loss": 0.6252, "step": 8941 }, { "epoch": 0.27405909035184506, "grad_norm": 0.562801245490393, "learning_rate": 8.517678421802153e-06, "loss": 0.4454, "step": 8942 }, { "epoch": 0.27408973887458626, "grad_norm": 1.3031126875146553, "learning_rate": 8.517325691270485e-06, "loss": 0.6931, "step": 8943 }, { "epoch": 0.27412038739732747, "grad_norm": 1.5208214788154975, "learning_rate": 8.516972926081708e-06, "loss": 0.7552, "step": 8944 }, { "epoch": 0.2741510359200687, "grad_norm": 1.1097797538778922, "learning_rate": 8.516620126239297e-06, "loss": 0.6716, "step": 8945 }, { "epoch": 0.2741816844428099, "grad_norm": 1.3047513690772434, "learning_rate": 8.516267291746727e-06, "loss": 0.605, "step": 8946 }, { "epoch": 0.2742123329655511, "grad_norm": 1.2066068754631458, "learning_rate": 8.515914422607476e-06, "loss": 0.604, "step": 8947 }, { "epoch": 0.27424298148829224, "grad_norm": 1.1788083344518105, "learning_rate": 8.51556151882502e-06, "loss": 0.6784, "step": 8948 }, { "epoch": 0.27427363001103344, "grad_norm": 0.47001271768513414, "learning_rate": 8.515208580402838e-06, "loss": 0.4288, "step": 8949 }, { "epoch": 0.27430427853377465, "grad_norm": 1.2421289158235254, "learning_rate": 8.514855607344406e-06, "loss": 0.6793, "step": 8950 }, { "epoch": 0.27433492705651585, "grad_norm": 1.0909664559982126, "learning_rate": 8.514502599653202e-06, "loss": 0.6416, "step": 8951 }, { "epoch": 0.27436557557925706, "grad_norm": 1.33075971139371, "learning_rate": 8.514149557332705e-06, "loss": 0.6035, "step": 8952 }, { "epoch": 0.27439622410199827, "grad_norm": 1.316937244541355, "learning_rate": 8.513796480386393e-06, "loss": 0.6911, "step": 8953 }, { "epoch": 0.27442687262473947, "grad_norm": 1.1332859155859065, "learning_rate": 8.513443368817745e-06, "loss": 0.5909, "step": 8954 }, { "epoch": 0.2744575211474807, "grad_norm": 1.3729335503260327, "learning_rate": 8.513090222630241e-06, "loss": 0.6887, "step": 8955 }, { "epoch": 0.2744881696702219, "grad_norm": 1.141197848873594, "learning_rate": 8.51273704182736e-06, "loss": 0.7124, "step": 8956 }, { "epoch": 0.2745188181929631, "grad_norm": 1.3259885298091807, "learning_rate": 8.512383826412583e-06, "loss": 0.77, "step": 8957 }, { "epoch": 0.2745494667157043, "grad_norm": 1.253656321548061, "learning_rate": 8.512030576389388e-06, "loss": 0.6793, "step": 8958 }, { "epoch": 0.2745801152384455, "grad_norm": 1.3020324643045758, "learning_rate": 8.511677291761258e-06, "loss": 0.6646, "step": 8959 }, { "epoch": 0.2746107637611867, "grad_norm": 1.231522198635288, "learning_rate": 8.511323972531674e-06, "loss": 0.6245, "step": 8960 }, { "epoch": 0.2746414122839279, "grad_norm": 1.322201322782424, "learning_rate": 8.510970618704115e-06, "loss": 0.6546, "step": 8961 }, { "epoch": 0.2746720608066691, "grad_norm": 1.4356203492476516, "learning_rate": 8.510617230282064e-06, "loss": 0.667, "step": 8962 }, { "epoch": 0.2747027093294103, "grad_norm": 1.3642087704723738, "learning_rate": 8.510263807269003e-06, "loss": 0.6615, "step": 8963 }, { "epoch": 0.2747333578521515, "grad_norm": 1.233741159568326, "learning_rate": 8.509910349668418e-06, "loss": 0.6854, "step": 8964 }, { "epoch": 0.27476400637489273, "grad_norm": 1.3263459529544213, "learning_rate": 8.509556857483786e-06, "loss": 0.7257, "step": 8965 }, { "epoch": 0.27479465489763394, "grad_norm": 1.1746358606487954, "learning_rate": 8.509203330718591e-06, "loss": 0.6782, "step": 8966 }, { "epoch": 0.27482530342037514, "grad_norm": 1.276597375809085, "learning_rate": 8.508849769376319e-06, "loss": 0.6535, "step": 8967 }, { "epoch": 0.27485595194311635, "grad_norm": 0.49438836037260986, "learning_rate": 8.508496173460453e-06, "loss": 0.4466, "step": 8968 }, { "epoch": 0.27488660046585756, "grad_norm": 1.3180003715568385, "learning_rate": 8.508142542974476e-06, "loss": 0.8, "step": 8969 }, { "epoch": 0.27491724898859876, "grad_norm": 1.1323582633120823, "learning_rate": 8.507788877921873e-06, "loss": 0.6163, "step": 8970 }, { "epoch": 0.27494789751133997, "grad_norm": 1.204001565630381, "learning_rate": 8.507435178306127e-06, "loss": 0.6929, "step": 8971 }, { "epoch": 0.2749785460340812, "grad_norm": 1.380885058961453, "learning_rate": 8.507081444130726e-06, "loss": 0.6615, "step": 8972 }, { "epoch": 0.2750091945568224, "grad_norm": 1.2946012418401793, "learning_rate": 8.506727675399154e-06, "loss": 0.7154, "step": 8973 }, { "epoch": 0.2750398430795636, "grad_norm": 0.43802968468414427, "learning_rate": 8.506373872114897e-06, "loss": 0.4364, "step": 8974 }, { "epoch": 0.2750704916023048, "grad_norm": 1.308335369706923, "learning_rate": 8.506020034281442e-06, "loss": 0.7969, "step": 8975 }, { "epoch": 0.275101140125046, "grad_norm": 1.3122341634332615, "learning_rate": 8.505666161902275e-06, "loss": 0.6832, "step": 8976 }, { "epoch": 0.2751317886477872, "grad_norm": 1.4148894864144057, "learning_rate": 8.50531225498088e-06, "loss": 0.8092, "step": 8977 }, { "epoch": 0.2751624371705284, "grad_norm": 1.247676408832807, "learning_rate": 8.504958313520749e-06, "loss": 0.5354, "step": 8978 }, { "epoch": 0.27519308569326956, "grad_norm": 1.3218137948938025, "learning_rate": 8.504604337525366e-06, "loss": 0.7629, "step": 8979 }, { "epoch": 0.27522373421601076, "grad_norm": 1.1834573453098733, "learning_rate": 8.504250326998219e-06, "loss": 0.6634, "step": 8980 }, { "epoch": 0.27525438273875197, "grad_norm": 1.4710417193725507, "learning_rate": 8.503896281942798e-06, "loss": 0.712, "step": 8981 }, { "epoch": 0.2752850312614932, "grad_norm": 1.2311743288306827, "learning_rate": 8.503542202362589e-06, "loss": 0.6807, "step": 8982 }, { "epoch": 0.2753156797842344, "grad_norm": 0.5018035424098127, "learning_rate": 8.503188088261083e-06, "loss": 0.4336, "step": 8983 }, { "epoch": 0.2753463283069756, "grad_norm": 1.1320909483519443, "learning_rate": 8.502833939641768e-06, "loss": 0.5938, "step": 8984 }, { "epoch": 0.2753769768297168, "grad_norm": 1.2693352366305835, "learning_rate": 8.502479756508135e-06, "loss": 0.6792, "step": 8985 }, { "epoch": 0.275407625352458, "grad_norm": 1.3489746890354997, "learning_rate": 8.502125538863673e-06, "loss": 0.7245, "step": 8986 }, { "epoch": 0.2754382738751992, "grad_norm": 1.3583956758605504, "learning_rate": 8.50177128671187e-06, "loss": 0.8072, "step": 8987 }, { "epoch": 0.2754689223979404, "grad_norm": 1.363609932419442, "learning_rate": 8.50141700005622e-06, "loss": 0.6516, "step": 8988 }, { "epoch": 0.2754995709206816, "grad_norm": 1.3567623191976037, "learning_rate": 8.501062678900212e-06, "loss": 0.7411, "step": 8989 }, { "epoch": 0.2755302194434228, "grad_norm": 1.3256037715013815, "learning_rate": 8.500708323247339e-06, "loss": 0.6915, "step": 8990 }, { "epoch": 0.275560867966164, "grad_norm": 0.48851698114801234, "learning_rate": 8.50035393310109e-06, "loss": 0.4337, "step": 8991 }, { "epoch": 0.27559151648890523, "grad_norm": 0.5102123568332382, "learning_rate": 8.499999508464958e-06, "loss": 0.4335, "step": 8992 }, { "epoch": 0.27562216501164644, "grad_norm": 1.2878112007078462, "learning_rate": 8.499645049342436e-06, "loss": 0.6854, "step": 8993 }, { "epoch": 0.27565281353438764, "grad_norm": 1.3839246724371441, "learning_rate": 8.499290555737015e-06, "loss": 0.6925, "step": 8994 }, { "epoch": 0.27568346205712885, "grad_norm": 1.4457180093183388, "learning_rate": 8.49893602765219e-06, "loss": 0.7561, "step": 8995 }, { "epoch": 0.27571411057987005, "grad_norm": 1.3163398261463701, "learning_rate": 8.498581465091453e-06, "loss": 0.6095, "step": 8996 }, { "epoch": 0.27574475910261126, "grad_norm": 1.3627920514952532, "learning_rate": 8.498226868058296e-06, "loss": 0.7302, "step": 8997 }, { "epoch": 0.27577540762535246, "grad_norm": 1.385931299764449, "learning_rate": 8.497872236556214e-06, "loss": 0.641, "step": 8998 }, { "epoch": 0.27580605614809367, "grad_norm": 1.1178031388355427, "learning_rate": 8.497517570588704e-06, "loss": 0.624, "step": 8999 }, { "epoch": 0.2758367046708349, "grad_norm": 1.280133627283969, "learning_rate": 8.497162870159259e-06, "loss": 0.6883, "step": 9000 }, { "epoch": 0.2758673531935761, "grad_norm": 1.3713051218730539, "learning_rate": 8.496808135271373e-06, "loss": 0.7985, "step": 9001 }, { "epoch": 0.2758980017163173, "grad_norm": 1.4075874907728576, "learning_rate": 8.49645336592854e-06, "loss": 0.7495, "step": 9002 }, { "epoch": 0.2759286502390585, "grad_norm": 1.2265269560404075, "learning_rate": 8.49609856213426e-06, "loss": 0.6661, "step": 9003 }, { "epoch": 0.2759592987617997, "grad_norm": 1.3464371929371686, "learning_rate": 8.495743723892024e-06, "loss": 0.6958, "step": 9004 }, { "epoch": 0.2759899472845409, "grad_norm": 1.1810050102590393, "learning_rate": 8.495388851205332e-06, "loss": 0.7031, "step": 9005 }, { "epoch": 0.2760205958072821, "grad_norm": 1.1873662124009754, "learning_rate": 8.495033944077679e-06, "loss": 0.68, "step": 9006 }, { "epoch": 0.2760512443300233, "grad_norm": 1.2646388866202987, "learning_rate": 8.494679002512562e-06, "loss": 0.5899, "step": 9007 }, { "epoch": 0.2760818928527645, "grad_norm": 1.1595826802342772, "learning_rate": 8.49432402651348e-06, "loss": 0.6931, "step": 9008 }, { "epoch": 0.2761125413755057, "grad_norm": 1.3571388461279978, "learning_rate": 8.493969016083928e-06, "loss": 0.7811, "step": 9009 }, { "epoch": 0.2761431898982469, "grad_norm": 1.3683276656671988, "learning_rate": 8.493613971227405e-06, "loss": 0.6203, "step": 9010 }, { "epoch": 0.2761738384209881, "grad_norm": 1.2170528096238395, "learning_rate": 8.49325889194741e-06, "loss": 0.6819, "step": 9011 }, { "epoch": 0.2762044869437293, "grad_norm": 1.2786945953271918, "learning_rate": 8.49290377824744e-06, "loss": 0.7088, "step": 9012 }, { "epoch": 0.2762351354664705, "grad_norm": 0.8039159117205331, "learning_rate": 8.492548630131e-06, "loss": 0.4455, "step": 9013 }, { "epoch": 0.2762657839892117, "grad_norm": 1.1884924553446337, "learning_rate": 8.49219344760158e-06, "loss": 0.7959, "step": 9014 }, { "epoch": 0.2762964325119529, "grad_norm": 1.247736752357224, "learning_rate": 8.491838230662685e-06, "loss": 0.6427, "step": 9015 }, { "epoch": 0.2763270810346941, "grad_norm": 0.48753343509840036, "learning_rate": 8.491482979317817e-06, "loss": 0.4494, "step": 9016 }, { "epoch": 0.2763577295574353, "grad_norm": 1.276925230793036, "learning_rate": 8.491127693570472e-06, "loss": 0.67, "step": 9017 }, { "epoch": 0.2763883780801765, "grad_norm": 1.3577168592468365, "learning_rate": 8.490772373424155e-06, "loss": 0.7627, "step": 9018 }, { "epoch": 0.27641902660291773, "grad_norm": 1.1105619315590043, "learning_rate": 8.49041701888236e-06, "loss": 0.5944, "step": 9019 }, { "epoch": 0.27644967512565893, "grad_norm": 1.1291549786996788, "learning_rate": 8.490061629948596e-06, "loss": 0.6286, "step": 9020 }, { "epoch": 0.27648032364840014, "grad_norm": 0.6569231090447313, "learning_rate": 8.489706206626363e-06, "loss": 0.4536, "step": 9021 }, { "epoch": 0.27651097217114134, "grad_norm": 1.4258988354220863, "learning_rate": 8.489350748919161e-06, "loss": 0.7635, "step": 9022 }, { "epoch": 0.27654162069388255, "grad_norm": 1.192108531081049, "learning_rate": 8.488995256830492e-06, "loss": 0.6577, "step": 9023 }, { "epoch": 0.27657226921662376, "grad_norm": 1.430406340319865, "learning_rate": 8.48863973036386e-06, "loss": 0.5689, "step": 9024 }, { "epoch": 0.27660291773936496, "grad_norm": 1.3084531422924566, "learning_rate": 8.48828416952277e-06, "loss": 0.7141, "step": 9025 }, { "epoch": 0.27663356626210617, "grad_norm": 0.5223493219456399, "learning_rate": 8.487928574310722e-06, "loss": 0.4635, "step": 9026 }, { "epoch": 0.2766642147848474, "grad_norm": 1.1285740089198364, "learning_rate": 8.487572944731221e-06, "loss": 0.636, "step": 9027 }, { "epoch": 0.2766948633075886, "grad_norm": 0.4387263104102548, "learning_rate": 8.487217280787772e-06, "loss": 0.4315, "step": 9028 }, { "epoch": 0.2767255118303298, "grad_norm": 0.48411287917070633, "learning_rate": 8.48686158248388e-06, "loss": 0.4394, "step": 9029 }, { "epoch": 0.276756160353071, "grad_norm": 1.4062834508044522, "learning_rate": 8.486505849823048e-06, "loss": 0.7039, "step": 9030 }, { "epoch": 0.2767868088758122, "grad_norm": 1.2027446465917873, "learning_rate": 8.48615008280878e-06, "loss": 0.6647, "step": 9031 }, { "epoch": 0.2768174573985534, "grad_norm": 1.3692191306974928, "learning_rate": 8.485794281444586e-06, "loss": 0.6713, "step": 9032 }, { "epoch": 0.2768481059212946, "grad_norm": 1.2498345372452335, "learning_rate": 8.485438445733967e-06, "loss": 0.6659, "step": 9033 }, { "epoch": 0.2768787544440358, "grad_norm": 1.1500101802874885, "learning_rate": 8.485082575680432e-06, "loss": 0.7359, "step": 9034 }, { "epoch": 0.276909402966777, "grad_norm": 1.4172852120975672, "learning_rate": 8.484726671287487e-06, "loss": 0.7034, "step": 9035 }, { "epoch": 0.2769400514895182, "grad_norm": 1.2374269823945079, "learning_rate": 8.484370732558637e-06, "loss": 0.6413, "step": 9036 }, { "epoch": 0.27697070001225943, "grad_norm": 1.1674237001247272, "learning_rate": 8.484014759497393e-06, "loss": 0.6372, "step": 9037 }, { "epoch": 0.27700134853500064, "grad_norm": 0.5501309786302069, "learning_rate": 8.48365875210726e-06, "loss": 0.4452, "step": 9038 }, { "epoch": 0.27703199705774184, "grad_norm": 1.2366269438769577, "learning_rate": 8.483302710391745e-06, "loss": 0.6572, "step": 9039 }, { "epoch": 0.27706264558048305, "grad_norm": 1.4535328203001943, "learning_rate": 8.482946634354357e-06, "loss": 0.6661, "step": 9040 }, { "epoch": 0.2770932941032242, "grad_norm": 1.2698124362494634, "learning_rate": 8.482590523998605e-06, "loss": 0.7912, "step": 9041 }, { "epoch": 0.2771239426259654, "grad_norm": 1.3767388025679015, "learning_rate": 8.482234379327998e-06, "loss": 0.7651, "step": 9042 }, { "epoch": 0.2771545911487066, "grad_norm": 0.49940271152416726, "learning_rate": 8.481878200346042e-06, "loss": 0.4697, "step": 9043 }, { "epoch": 0.2771852396714478, "grad_norm": 1.3907650947271453, "learning_rate": 8.481521987056251e-06, "loss": 0.8078, "step": 9044 }, { "epoch": 0.277215888194189, "grad_norm": 1.2077546805296984, "learning_rate": 8.481165739462135e-06, "loss": 0.6073, "step": 9045 }, { "epoch": 0.2772465367169302, "grad_norm": 1.333328352822954, "learning_rate": 8.480809457567201e-06, "loss": 0.6816, "step": 9046 }, { "epoch": 0.27727718523967143, "grad_norm": 1.376292201475095, "learning_rate": 8.48045314137496e-06, "loss": 0.76, "step": 9047 }, { "epoch": 0.27730783376241264, "grad_norm": 1.3058783198105068, "learning_rate": 8.480096790888923e-06, "loss": 0.6791, "step": 9048 }, { "epoch": 0.27733848228515384, "grad_norm": 1.2852044454679625, "learning_rate": 8.479740406112605e-06, "loss": 0.7182, "step": 9049 }, { "epoch": 0.27736913080789505, "grad_norm": 0.4894585092559821, "learning_rate": 8.479383987049512e-06, "loss": 0.4506, "step": 9050 }, { "epoch": 0.27739977933063625, "grad_norm": 1.3954291266883396, "learning_rate": 8.479027533703158e-06, "loss": 0.6611, "step": 9051 }, { "epoch": 0.27743042785337746, "grad_norm": 0.465567349785783, "learning_rate": 8.478671046077055e-06, "loss": 0.4461, "step": 9052 }, { "epoch": 0.27746107637611866, "grad_norm": 0.46132365109498347, "learning_rate": 8.47831452417472e-06, "loss": 0.4484, "step": 9053 }, { "epoch": 0.27749172489885987, "grad_norm": 1.7762569282999006, "learning_rate": 8.477957967999659e-06, "loss": 0.7301, "step": 9054 }, { "epoch": 0.2775223734216011, "grad_norm": 1.1549892658462246, "learning_rate": 8.47760137755539e-06, "loss": 0.5182, "step": 9055 }, { "epoch": 0.2775530219443423, "grad_norm": 0.44817720279468365, "learning_rate": 8.477244752845422e-06, "loss": 0.4403, "step": 9056 }, { "epoch": 0.2775836704670835, "grad_norm": 1.2454202880921355, "learning_rate": 8.476888093873274e-06, "loss": 0.6907, "step": 9057 }, { "epoch": 0.2776143189898247, "grad_norm": 1.2359602322495942, "learning_rate": 8.476531400642456e-06, "loss": 0.6743, "step": 9058 }, { "epoch": 0.2776449675125659, "grad_norm": 0.4720865802329989, "learning_rate": 8.476174673156488e-06, "loss": 0.442, "step": 9059 }, { "epoch": 0.2776756160353071, "grad_norm": 1.106547511188869, "learning_rate": 8.475817911418879e-06, "loss": 0.6537, "step": 9060 }, { "epoch": 0.2777062645580483, "grad_norm": 1.1084682588131491, "learning_rate": 8.475461115433147e-06, "loss": 0.6333, "step": 9061 }, { "epoch": 0.2777369130807895, "grad_norm": 0.4714449430016301, "learning_rate": 8.475104285202807e-06, "loss": 0.417, "step": 9062 }, { "epoch": 0.2777675616035307, "grad_norm": 1.2376794273493263, "learning_rate": 8.474747420731377e-06, "loss": 0.7524, "step": 9063 }, { "epoch": 0.2777982101262719, "grad_norm": 1.2516332142054396, "learning_rate": 8.47439052202237e-06, "loss": 0.6265, "step": 9064 }, { "epoch": 0.27782885864901313, "grad_norm": 1.2579023400021776, "learning_rate": 8.474033589079306e-06, "loss": 0.7606, "step": 9065 }, { "epoch": 0.27785950717175434, "grad_norm": 1.099932265834094, "learning_rate": 8.473676621905699e-06, "loss": 0.6943, "step": 9066 }, { "epoch": 0.27789015569449554, "grad_norm": 1.2776863195082413, "learning_rate": 8.473319620505067e-06, "loss": 0.7824, "step": 9067 }, { "epoch": 0.27792080421723675, "grad_norm": 1.1280563859182107, "learning_rate": 8.472962584880928e-06, "loss": 0.5899, "step": 9068 }, { "epoch": 0.27795145273997796, "grad_norm": 1.371456890538489, "learning_rate": 8.4726055150368e-06, "loss": 0.6833, "step": 9069 }, { "epoch": 0.27798210126271916, "grad_norm": 1.3038479950981354, "learning_rate": 8.472248410976203e-06, "loss": 0.5836, "step": 9070 }, { "epoch": 0.27801274978546037, "grad_norm": 1.3471949056774104, "learning_rate": 8.471891272702652e-06, "loss": 0.7751, "step": 9071 }, { "epoch": 0.2780433983082015, "grad_norm": 0.482911890961383, "learning_rate": 8.471534100219671e-06, "loss": 0.4396, "step": 9072 }, { "epoch": 0.2780740468309427, "grad_norm": 1.3227196612586678, "learning_rate": 8.471176893530774e-06, "loss": 0.6459, "step": 9073 }, { "epoch": 0.27810469535368393, "grad_norm": 1.3934580929012284, "learning_rate": 8.470819652639483e-06, "loss": 0.6663, "step": 9074 }, { "epoch": 0.27813534387642513, "grad_norm": 1.308201744108166, "learning_rate": 8.470462377549318e-06, "loss": 0.5995, "step": 9075 }, { "epoch": 0.27816599239916634, "grad_norm": 1.2843990508752694, "learning_rate": 8.4701050682638e-06, "loss": 0.7021, "step": 9076 }, { "epoch": 0.27819664092190755, "grad_norm": 1.2737645676065394, "learning_rate": 8.469747724786448e-06, "loss": 0.639, "step": 9077 }, { "epoch": 0.27822728944464875, "grad_norm": 1.2043799676203073, "learning_rate": 8.469390347120783e-06, "loss": 0.6699, "step": 9078 }, { "epoch": 0.27825793796738996, "grad_norm": 0.45317695489523013, "learning_rate": 8.46903293527033e-06, "loss": 0.442, "step": 9079 }, { "epoch": 0.27828858649013116, "grad_norm": 1.0653992925184121, "learning_rate": 8.468675489238604e-06, "loss": 0.6131, "step": 9080 }, { "epoch": 0.27831923501287237, "grad_norm": 1.3149763172793518, "learning_rate": 8.468318009029135e-06, "loss": 0.6973, "step": 9081 }, { "epoch": 0.2783498835356136, "grad_norm": 1.4400431256813735, "learning_rate": 8.46796049464544e-06, "loss": 0.7362, "step": 9082 }, { "epoch": 0.2783805320583548, "grad_norm": 0.4393443571626685, "learning_rate": 8.467602946091042e-06, "loss": 0.4225, "step": 9083 }, { "epoch": 0.278411180581096, "grad_norm": 1.2565948556770339, "learning_rate": 8.467245363369466e-06, "loss": 0.7206, "step": 9084 }, { "epoch": 0.2784418291038372, "grad_norm": 1.378892119375126, "learning_rate": 8.466887746484232e-06, "loss": 0.7231, "step": 9085 }, { "epoch": 0.2784724776265784, "grad_norm": 1.350792767510442, "learning_rate": 8.466530095438868e-06, "loss": 0.6836, "step": 9086 }, { "epoch": 0.2785031261493196, "grad_norm": 0.49374909339460704, "learning_rate": 8.466172410236896e-06, "loss": 0.4445, "step": 9087 }, { "epoch": 0.2785337746720608, "grad_norm": 1.3235278319269916, "learning_rate": 8.46581469088184e-06, "loss": 0.5957, "step": 9088 }, { "epoch": 0.278564423194802, "grad_norm": 1.317344882017332, "learning_rate": 8.465456937377226e-06, "loss": 0.66, "step": 9089 }, { "epoch": 0.2785950717175432, "grad_norm": 1.2372095632091669, "learning_rate": 8.465099149726577e-06, "loss": 0.7652, "step": 9090 }, { "epoch": 0.2786257202402844, "grad_norm": 1.3220647915909318, "learning_rate": 8.46474132793342e-06, "loss": 0.6635, "step": 9091 }, { "epoch": 0.27865636876302563, "grad_norm": 1.1888915125829644, "learning_rate": 8.464383472001278e-06, "loss": 0.6235, "step": 9092 }, { "epoch": 0.27868701728576684, "grad_norm": 1.2539882091121237, "learning_rate": 8.464025581933682e-06, "loss": 0.6969, "step": 9093 }, { "epoch": 0.27871766580850804, "grad_norm": 1.0792206007995706, "learning_rate": 8.463667657734155e-06, "loss": 0.7029, "step": 9094 }, { "epoch": 0.27874831433124925, "grad_norm": 1.3841068184322471, "learning_rate": 8.463309699406223e-06, "loss": 0.7343, "step": 9095 }, { "epoch": 0.27877896285399045, "grad_norm": 1.144787602600628, "learning_rate": 8.462951706953418e-06, "loss": 0.707, "step": 9096 }, { "epoch": 0.27880961137673166, "grad_norm": 1.270606411753058, "learning_rate": 8.462593680379259e-06, "loss": 0.6723, "step": 9097 }, { "epoch": 0.27884025989947286, "grad_norm": 1.1658438459429155, "learning_rate": 8.46223561968728e-06, "loss": 0.7065, "step": 9098 }, { "epoch": 0.27887090842221407, "grad_norm": 1.28958539189792, "learning_rate": 8.46187752488101e-06, "loss": 0.6289, "step": 9099 }, { "epoch": 0.2789015569449553, "grad_norm": 1.2050005439409217, "learning_rate": 8.461519395963973e-06, "loss": 0.6862, "step": 9100 }, { "epoch": 0.2789322054676965, "grad_norm": 1.1500658287201002, "learning_rate": 8.461161232939701e-06, "loss": 0.6714, "step": 9101 }, { "epoch": 0.2789628539904377, "grad_norm": 1.2355640100701812, "learning_rate": 8.46080303581172e-06, "loss": 0.7094, "step": 9102 }, { "epoch": 0.27899350251317884, "grad_norm": 1.2728324854690536, "learning_rate": 8.460444804583562e-06, "loss": 0.6797, "step": 9103 }, { "epoch": 0.27902415103592004, "grad_norm": 1.2833897144647815, "learning_rate": 8.460086539258758e-06, "loss": 0.6919, "step": 9104 }, { "epoch": 0.27905479955866125, "grad_norm": 1.54650334265686, "learning_rate": 8.459728239840833e-06, "loss": 0.6644, "step": 9105 }, { "epoch": 0.27908544808140245, "grad_norm": 1.4442600224443254, "learning_rate": 8.459369906333322e-06, "loss": 0.7646, "step": 9106 }, { "epoch": 0.27911609660414366, "grad_norm": 1.3463610771990793, "learning_rate": 8.459011538739754e-06, "loss": 0.7325, "step": 9107 }, { "epoch": 0.27914674512688487, "grad_norm": 1.1659675806235494, "learning_rate": 8.458653137063658e-06, "loss": 0.7241, "step": 9108 }, { "epoch": 0.27917739364962607, "grad_norm": 1.2576697577364782, "learning_rate": 8.45829470130857e-06, "loss": 0.7113, "step": 9109 }, { "epoch": 0.2792080421723673, "grad_norm": 1.2209977241382979, "learning_rate": 8.457936231478022e-06, "loss": 0.7229, "step": 9110 }, { "epoch": 0.2792386906951085, "grad_norm": 1.294184261748415, "learning_rate": 8.45757772757554e-06, "loss": 0.6978, "step": 9111 }, { "epoch": 0.2792693392178497, "grad_norm": 0.6403374908122551, "learning_rate": 8.45721918960466e-06, "loss": 0.4259, "step": 9112 }, { "epoch": 0.2792999877405909, "grad_norm": 1.1888723130111667, "learning_rate": 8.456860617568916e-06, "loss": 0.5943, "step": 9113 }, { "epoch": 0.2793306362633321, "grad_norm": 1.3765738373388492, "learning_rate": 8.45650201147184e-06, "loss": 0.6266, "step": 9114 }, { "epoch": 0.2793612847860733, "grad_norm": 1.0452783273205757, "learning_rate": 8.456143371316965e-06, "loss": 0.5935, "step": 9115 }, { "epoch": 0.2793919333088145, "grad_norm": 1.3302482285710857, "learning_rate": 8.455784697107823e-06, "loss": 0.7781, "step": 9116 }, { "epoch": 0.2794225818315557, "grad_norm": 1.2921418830711093, "learning_rate": 8.455425988847952e-06, "loss": 0.7355, "step": 9117 }, { "epoch": 0.2794532303542969, "grad_norm": 1.2720263434355659, "learning_rate": 8.455067246540887e-06, "loss": 0.6919, "step": 9118 }, { "epoch": 0.2794838788770381, "grad_norm": 1.3769858038627498, "learning_rate": 8.454708470190157e-06, "loss": 0.6448, "step": 9119 }, { "epoch": 0.27951452739977933, "grad_norm": 1.2559359899087774, "learning_rate": 8.454349659799301e-06, "loss": 0.7687, "step": 9120 }, { "epoch": 0.27954517592252054, "grad_norm": 0.6045312977151898, "learning_rate": 8.453990815371855e-06, "loss": 0.4621, "step": 9121 }, { "epoch": 0.27957582444526174, "grad_norm": 1.3510417819427707, "learning_rate": 8.453631936911352e-06, "loss": 0.6905, "step": 9122 }, { "epoch": 0.27960647296800295, "grad_norm": 1.197468682203349, "learning_rate": 8.45327302442133e-06, "loss": 0.6311, "step": 9123 }, { "epoch": 0.27963712149074416, "grad_norm": 1.3427792788537476, "learning_rate": 8.452914077905328e-06, "loss": 0.6733, "step": 9124 }, { "epoch": 0.27966777001348536, "grad_norm": 0.4856585189158081, "learning_rate": 8.452555097366879e-06, "loss": 0.4484, "step": 9125 }, { "epoch": 0.27969841853622657, "grad_norm": 1.1338296638023222, "learning_rate": 8.452196082809521e-06, "loss": 0.5702, "step": 9126 }, { "epoch": 0.2797290670589678, "grad_norm": 1.2179529882563738, "learning_rate": 8.451837034236791e-06, "loss": 0.6296, "step": 9127 }, { "epoch": 0.279759715581709, "grad_norm": 1.341451066532036, "learning_rate": 8.451477951652229e-06, "loss": 0.684, "step": 9128 }, { "epoch": 0.2797903641044502, "grad_norm": 1.3465523274933946, "learning_rate": 8.45111883505937e-06, "loss": 0.7867, "step": 9129 }, { "epoch": 0.2798210126271914, "grad_norm": 1.255369805850937, "learning_rate": 8.450759684461756e-06, "loss": 0.6392, "step": 9130 }, { "epoch": 0.2798516611499326, "grad_norm": 1.2300814454962483, "learning_rate": 8.450400499862922e-06, "loss": 0.6906, "step": 9131 }, { "epoch": 0.2798823096726738, "grad_norm": 1.3279419682141045, "learning_rate": 8.45004128126641e-06, "loss": 0.7915, "step": 9132 }, { "epoch": 0.279912958195415, "grad_norm": 0.5313203694600745, "learning_rate": 8.449682028675759e-06, "loss": 0.4428, "step": 9133 }, { "epoch": 0.27994360671815616, "grad_norm": 1.3211499672003801, "learning_rate": 8.449322742094505e-06, "loss": 0.7433, "step": 9134 }, { "epoch": 0.27997425524089736, "grad_norm": 0.469518592739306, "learning_rate": 8.448963421526196e-06, "loss": 0.433, "step": 9135 }, { "epoch": 0.28000490376363857, "grad_norm": 1.3115093104446345, "learning_rate": 8.448604066974367e-06, "loss": 0.6654, "step": 9136 }, { "epoch": 0.2800355522863798, "grad_norm": 1.2878578646461436, "learning_rate": 8.448244678442557e-06, "loss": 0.6915, "step": 9137 }, { "epoch": 0.280066200809121, "grad_norm": 1.2858891636199792, "learning_rate": 8.447885255934312e-06, "loss": 0.6936, "step": 9138 }, { "epoch": 0.2800968493318622, "grad_norm": 1.277997539592333, "learning_rate": 8.44752579945317e-06, "loss": 0.6768, "step": 9139 }, { "epoch": 0.2801274978546034, "grad_norm": 1.2798771996492817, "learning_rate": 8.447166309002677e-06, "loss": 0.6849, "step": 9140 }, { "epoch": 0.2801581463773446, "grad_norm": 1.3705941784056554, "learning_rate": 8.44680678458637e-06, "loss": 0.683, "step": 9141 }, { "epoch": 0.2801887949000858, "grad_norm": 1.2322734133317237, "learning_rate": 8.446447226207795e-06, "loss": 0.6217, "step": 9142 }, { "epoch": 0.280219443422827, "grad_norm": 1.1964859898880191, "learning_rate": 8.446087633870492e-06, "loss": 0.745, "step": 9143 }, { "epoch": 0.2802500919455682, "grad_norm": 1.2368697354466611, "learning_rate": 8.445728007578008e-06, "loss": 0.6473, "step": 9144 }, { "epoch": 0.2802807404683094, "grad_norm": 1.2165482596193091, "learning_rate": 8.445368347333884e-06, "loss": 0.6547, "step": 9145 }, { "epoch": 0.2803113889910506, "grad_norm": 0.590879750888502, "learning_rate": 8.445008653141662e-06, "loss": 0.4257, "step": 9146 }, { "epoch": 0.28034203751379183, "grad_norm": 1.2931249209157811, "learning_rate": 8.44464892500489e-06, "loss": 0.6453, "step": 9147 }, { "epoch": 0.28037268603653304, "grad_norm": 1.2676574925098747, "learning_rate": 8.44428916292711e-06, "loss": 0.6342, "step": 9148 }, { "epoch": 0.28040333455927424, "grad_norm": 1.437376528985698, "learning_rate": 8.443929366911869e-06, "loss": 0.742, "step": 9149 }, { "epoch": 0.28043398308201545, "grad_norm": 1.2541394947115894, "learning_rate": 8.44356953696271e-06, "loss": 0.6488, "step": 9150 }, { "epoch": 0.28046463160475665, "grad_norm": 1.2484008947729033, "learning_rate": 8.443209673083178e-06, "loss": 0.8067, "step": 9151 }, { "epoch": 0.28049528012749786, "grad_norm": 1.297370298727778, "learning_rate": 8.442849775276823e-06, "loss": 0.7017, "step": 9152 }, { "epoch": 0.28052592865023906, "grad_norm": 1.2430467463106198, "learning_rate": 8.442489843547187e-06, "loss": 0.6309, "step": 9153 }, { "epoch": 0.28055657717298027, "grad_norm": 1.2508212779111554, "learning_rate": 8.442129877897818e-06, "loss": 0.7779, "step": 9154 }, { "epoch": 0.2805872256957215, "grad_norm": 1.223249350394029, "learning_rate": 8.441769878332261e-06, "loss": 0.6662, "step": 9155 }, { "epoch": 0.2806178742184627, "grad_norm": 0.5314988843551565, "learning_rate": 8.441409844854067e-06, "loss": 0.433, "step": 9156 }, { "epoch": 0.2806485227412039, "grad_norm": 1.3125850043270073, "learning_rate": 8.441049777466778e-06, "loss": 0.599, "step": 9157 }, { "epoch": 0.2806791712639451, "grad_norm": 1.532567489025031, "learning_rate": 8.440689676173947e-06, "loss": 0.7749, "step": 9158 }, { "epoch": 0.2807098197866863, "grad_norm": 0.48399479517600014, "learning_rate": 8.440329540979122e-06, "loss": 0.4522, "step": 9159 }, { "epoch": 0.2807404683094275, "grad_norm": 1.256210573401959, "learning_rate": 8.439969371885847e-06, "loss": 0.6002, "step": 9160 }, { "epoch": 0.2807711168321687, "grad_norm": 1.2603975784491204, "learning_rate": 8.439609168897676e-06, "loss": 0.6298, "step": 9161 }, { "epoch": 0.2808017653549099, "grad_norm": 1.3742080728043604, "learning_rate": 8.439248932018153e-06, "loss": 0.7754, "step": 9162 }, { "epoch": 0.2808324138776511, "grad_norm": 1.2285452551282063, "learning_rate": 8.438888661250833e-06, "loss": 0.6189, "step": 9163 }, { "epoch": 0.2808630624003923, "grad_norm": 1.3958548359318803, "learning_rate": 8.438528356599262e-06, "loss": 0.6888, "step": 9164 }, { "epoch": 0.2808937109231335, "grad_norm": 1.1476179120637984, "learning_rate": 8.438168018066992e-06, "loss": 0.6657, "step": 9165 }, { "epoch": 0.2809243594458747, "grad_norm": 1.287552151676876, "learning_rate": 8.437807645657572e-06, "loss": 0.7078, "step": 9166 }, { "epoch": 0.2809550079686159, "grad_norm": 1.3756896675099166, "learning_rate": 8.437447239374554e-06, "loss": 0.6663, "step": 9167 }, { "epoch": 0.2809856564913571, "grad_norm": 1.1248654494710508, "learning_rate": 8.437086799221489e-06, "loss": 0.665, "step": 9168 }, { "epoch": 0.2810163050140983, "grad_norm": 1.151428140986703, "learning_rate": 8.436726325201926e-06, "loss": 0.6039, "step": 9169 }, { "epoch": 0.2810469535368395, "grad_norm": 1.151716371150253, "learning_rate": 8.436365817319423e-06, "loss": 0.6123, "step": 9170 }, { "epoch": 0.2810776020595807, "grad_norm": 1.2424572503517184, "learning_rate": 8.436005275577527e-06, "loss": 0.6445, "step": 9171 }, { "epoch": 0.2811082505823219, "grad_norm": 0.66071491749838, "learning_rate": 8.435644699979792e-06, "loss": 0.4466, "step": 9172 }, { "epoch": 0.2811388991050631, "grad_norm": 1.282988238034693, "learning_rate": 8.43528409052977e-06, "loss": 0.6542, "step": 9173 }, { "epoch": 0.28116954762780433, "grad_norm": 1.1565054622835937, "learning_rate": 8.434923447231015e-06, "loss": 0.5887, "step": 9174 }, { "epoch": 0.28120019615054553, "grad_norm": 1.2346091725770005, "learning_rate": 8.43456277008708e-06, "loss": 0.6602, "step": 9175 }, { "epoch": 0.28123084467328674, "grad_norm": 1.3841534086026508, "learning_rate": 8.43420205910152e-06, "loss": 0.6594, "step": 9176 }, { "epoch": 0.28126149319602795, "grad_norm": 0.5074456630477986, "learning_rate": 8.433841314277888e-06, "loss": 0.4596, "step": 9177 }, { "epoch": 0.28129214171876915, "grad_norm": 0.49051728398833694, "learning_rate": 8.433480535619741e-06, "loss": 0.4134, "step": 9178 }, { "epoch": 0.28132279024151036, "grad_norm": 1.3650129715411297, "learning_rate": 8.433119723130629e-06, "loss": 0.779, "step": 9179 }, { "epoch": 0.28135343876425156, "grad_norm": 1.335619867989174, "learning_rate": 8.43275887681411e-06, "loss": 0.7329, "step": 9180 }, { "epoch": 0.28138408728699277, "grad_norm": 1.2527429510010193, "learning_rate": 8.432397996673741e-06, "loss": 0.6913, "step": 9181 }, { "epoch": 0.281414735809734, "grad_norm": 1.3287703314395618, "learning_rate": 8.432037082713077e-06, "loss": 0.7336, "step": 9182 }, { "epoch": 0.2814453843324752, "grad_norm": 1.2342424962857563, "learning_rate": 8.43167613493567e-06, "loss": 0.7364, "step": 9183 }, { "epoch": 0.2814760328552164, "grad_norm": 1.255392502066227, "learning_rate": 8.431315153345084e-06, "loss": 0.8032, "step": 9184 }, { "epoch": 0.2815066813779576, "grad_norm": 1.1184038837805599, "learning_rate": 8.430954137944869e-06, "loss": 0.6959, "step": 9185 }, { "epoch": 0.2815373299006988, "grad_norm": 1.3446678118086701, "learning_rate": 8.430593088738586e-06, "loss": 0.7366, "step": 9186 }, { "epoch": 0.28156797842344, "grad_norm": 0.5228317336691076, "learning_rate": 8.430232005729792e-06, "loss": 0.463, "step": 9187 }, { "epoch": 0.2815986269461812, "grad_norm": 1.1505209492727153, "learning_rate": 8.429870888922045e-06, "loss": 0.7343, "step": 9188 }, { "epoch": 0.2816292754689224, "grad_norm": 1.327481714794589, "learning_rate": 8.429509738318902e-06, "loss": 0.7478, "step": 9189 }, { "epoch": 0.2816599239916636, "grad_norm": 1.2324280028841836, "learning_rate": 8.42914855392392e-06, "loss": 0.7547, "step": 9190 }, { "epoch": 0.2816905725144048, "grad_norm": 1.2579780538322032, "learning_rate": 8.428787335740663e-06, "loss": 0.6893, "step": 9191 }, { "epoch": 0.28172122103714603, "grad_norm": 1.3285700301278767, "learning_rate": 8.428426083772685e-06, "loss": 0.6755, "step": 9192 }, { "epoch": 0.28175186955988724, "grad_norm": 1.1515491431301004, "learning_rate": 8.428064798023548e-06, "loss": 0.5659, "step": 9193 }, { "epoch": 0.28178251808262844, "grad_norm": 1.1341073676794675, "learning_rate": 8.427703478496812e-06, "loss": 0.636, "step": 9194 }, { "epoch": 0.28181316660536965, "grad_norm": 1.2341738050911855, "learning_rate": 8.427342125196038e-06, "loss": 0.6927, "step": 9195 }, { "epoch": 0.28184381512811085, "grad_norm": 1.2532211346142974, "learning_rate": 8.426980738124783e-06, "loss": 0.7255, "step": 9196 }, { "epoch": 0.281874463650852, "grad_norm": 1.240719141056982, "learning_rate": 8.42661931728661e-06, "loss": 0.6881, "step": 9197 }, { "epoch": 0.2819051121735932, "grad_norm": 1.3581586520635427, "learning_rate": 8.42625786268508e-06, "loss": 0.6135, "step": 9198 }, { "epoch": 0.2819357606963344, "grad_norm": 1.1456332520080434, "learning_rate": 8.425896374323757e-06, "loss": 0.5374, "step": 9199 }, { "epoch": 0.2819664092190756, "grad_norm": 1.2537994406485922, "learning_rate": 8.425534852206198e-06, "loss": 0.7227, "step": 9200 }, { "epoch": 0.2819970577418168, "grad_norm": 1.655907141059018, "learning_rate": 8.425173296335967e-06, "loss": 0.702, "step": 9201 }, { "epoch": 0.28202770626455803, "grad_norm": 1.12215405214229, "learning_rate": 8.42481170671663e-06, "loss": 0.67, "step": 9202 }, { "epoch": 0.28205835478729924, "grad_norm": 1.3145932154372564, "learning_rate": 8.424450083351746e-06, "loss": 0.6606, "step": 9203 }, { "epoch": 0.28208900331004044, "grad_norm": 1.1823780708715717, "learning_rate": 8.424088426244877e-06, "loss": 0.6277, "step": 9204 }, { "epoch": 0.28211965183278165, "grad_norm": 0.5381851992672377, "learning_rate": 8.423726735399592e-06, "loss": 0.4456, "step": 9205 }, { "epoch": 0.28215030035552285, "grad_norm": 1.2964529607611965, "learning_rate": 8.423365010819449e-06, "loss": 0.6879, "step": 9206 }, { "epoch": 0.28218094887826406, "grad_norm": 1.2769044525304425, "learning_rate": 8.423003252508015e-06, "loss": 0.7571, "step": 9207 }, { "epoch": 0.28221159740100527, "grad_norm": 1.232360732172545, "learning_rate": 8.422641460468855e-06, "loss": 0.6738, "step": 9208 }, { "epoch": 0.28224224592374647, "grad_norm": 0.44718218969504225, "learning_rate": 8.422279634705531e-06, "loss": 0.4547, "step": 9209 }, { "epoch": 0.2822728944464877, "grad_norm": 1.2718144119068733, "learning_rate": 8.421917775221612e-06, "loss": 0.6493, "step": 9210 }, { "epoch": 0.2823035429692289, "grad_norm": 1.2041548598740826, "learning_rate": 8.421555882020662e-06, "loss": 0.6491, "step": 9211 }, { "epoch": 0.2823341914919701, "grad_norm": 1.2212617312600806, "learning_rate": 8.421193955106244e-06, "loss": 0.6696, "step": 9212 }, { "epoch": 0.2823648400147113, "grad_norm": 1.2203128691530731, "learning_rate": 8.420831994481928e-06, "loss": 0.6548, "step": 9213 }, { "epoch": 0.2823954885374525, "grad_norm": 1.1376360797371383, "learning_rate": 8.420470000151281e-06, "loss": 0.6764, "step": 9214 }, { "epoch": 0.2824261370601937, "grad_norm": 1.1770090652184146, "learning_rate": 8.420107972117865e-06, "loss": 0.6589, "step": 9215 }, { "epoch": 0.2824567855829349, "grad_norm": 1.291893902401559, "learning_rate": 8.419745910385253e-06, "loss": 0.7254, "step": 9216 }, { "epoch": 0.2824874341056761, "grad_norm": 1.4890372901449933, "learning_rate": 8.419383814957007e-06, "loss": 0.7141, "step": 9217 }, { "epoch": 0.2825180826284173, "grad_norm": 1.3788458321374437, "learning_rate": 8.419021685836698e-06, "loss": 0.7108, "step": 9218 }, { "epoch": 0.2825487311511585, "grad_norm": 1.3549729091357912, "learning_rate": 8.418659523027894e-06, "loss": 0.7724, "step": 9219 }, { "epoch": 0.28257937967389973, "grad_norm": 1.2008551588572876, "learning_rate": 8.418297326534165e-06, "loss": 0.6465, "step": 9220 }, { "epoch": 0.28261002819664094, "grad_norm": 1.1888927018424365, "learning_rate": 8.417935096359073e-06, "loss": 0.6875, "step": 9221 }, { "epoch": 0.28264067671938214, "grad_norm": 1.6649648783502096, "learning_rate": 8.417572832506196e-06, "loss": 0.6735, "step": 9222 }, { "epoch": 0.28267132524212335, "grad_norm": 1.132721346300644, "learning_rate": 8.417210534979098e-06, "loss": 0.7028, "step": 9223 }, { "epoch": 0.28270197376486456, "grad_norm": 1.2568296332696791, "learning_rate": 8.41684820378135e-06, "loss": 0.7096, "step": 9224 }, { "epoch": 0.28273262228760576, "grad_norm": 1.2114707120473442, "learning_rate": 8.416485838916522e-06, "loss": 0.7857, "step": 9225 }, { "epoch": 0.28276327081034697, "grad_norm": 1.1164794406081855, "learning_rate": 8.416123440388188e-06, "loss": 0.7136, "step": 9226 }, { "epoch": 0.2827939193330882, "grad_norm": 1.1859258871721117, "learning_rate": 8.415761008199912e-06, "loss": 0.7515, "step": 9227 }, { "epoch": 0.2828245678558293, "grad_norm": 1.3550619953579235, "learning_rate": 8.415398542355271e-06, "loss": 0.6169, "step": 9228 }, { "epoch": 0.28285521637857053, "grad_norm": 1.3405146575910047, "learning_rate": 8.415036042857834e-06, "loss": 0.7344, "step": 9229 }, { "epoch": 0.28288586490131173, "grad_norm": 0.5128562015015229, "learning_rate": 8.414673509711172e-06, "loss": 0.4458, "step": 9230 }, { "epoch": 0.28291651342405294, "grad_norm": 1.2401311547162681, "learning_rate": 8.41431094291886e-06, "loss": 0.7404, "step": 9231 }, { "epoch": 0.28294716194679415, "grad_norm": 1.452589593853879, "learning_rate": 8.413948342484466e-06, "loss": 0.6545, "step": 9232 }, { "epoch": 0.28297781046953535, "grad_norm": 1.1257464658254026, "learning_rate": 8.413585708411566e-06, "loss": 0.5913, "step": 9233 }, { "epoch": 0.28300845899227656, "grad_norm": 1.4552099816789452, "learning_rate": 8.413223040703735e-06, "loss": 0.7041, "step": 9234 }, { "epoch": 0.28303910751501776, "grad_norm": 1.1769643877362772, "learning_rate": 8.412860339364542e-06, "loss": 0.6512, "step": 9235 }, { "epoch": 0.28306975603775897, "grad_norm": 1.1627501594014924, "learning_rate": 8.412497604397564e-06, "loss": 0.7653, "step": 9236 }, { "epoch": 0.2831004045605002, "grad_norm": 1.3025462532270131, "learning_rate": 8.412134835806374e-06, "loss": 0.6258, "step": 9237 }, { "epoch": 0.2831310530832414, "grad_norm": 1.26188669482954, "learning_rate": 8.411772033594544e-06, "loss": 0.7162, "step": 9238 }, { "epoch": 0.2831617016059826, "grad_norm": 1.2511474786090804, "learning_rate": 8.411409197765654e-06, "loss": 0.7257, "step": 9239 }, { "epoch": 0.2831923501287238, "grad_norm": 0.4748453564883469, "learning_rate": 8.411046328323276e-06, "loss": 0.4381, "step": 9240 }, { "epoch": 0.283222998651465, "grad_norm": 1.1200879178723775, "learning_rate": 8.410683425270986e-06, "loss": 0.6614, "step": 9241 }, { "epoch": 0.2832536471742062, "grad_norm": 1.401393617186966, "learning_rate": 8.410320488612358e-06, "loss": 0.702, "step": 9242 }, { "epoch": 0.2832842956969474, "grad_norm": 1.359210212173494, "learning_rate": 8.40995751835097e-06, "loss": 0.717, "step": 9243 }, { "epoch": 0.2833149442196886, "grad_norm": 1.1652831660049767, "learning_rate": 8.409594514490401e-06, "loss": 0.6272, "step": 9244 }, { "epoch": 0.2833455927424298, "grad_norm": 1.1302477514259055, "learning_rate": 8.409231477034221e-06, "loss": 0.6312, "step": 9245 }, { "epoch": 0.283376241265171, "grad_norm": 1.2398369356118397, "learning_rate": 8.408868405986013e-06, "loss": 0.5984, "step": 9246 }, { "epoch": 0.28340688978791223, "grad_norm": 0.4574874860882398, "learning_rate": 8.408505301349352e-06, "loss": 0.456, "step": 9247 }, { "epoch": 0.28343753831065344, "grad_norm": 1.2783587399611254, "learning_rate": 8.408142163127815e-06, "loss": 0.7819, "step": 9248 }, { "epoch": 0.28346818683339464, "grad_norm": 1.2764200397360763, "learning_rate": 8.407778991324984e-06, "loss": 0.7116, "step": 9249 }, { "epoch": 0.28349883535613585, "grad_norm": 1.331184445567386, "learning_rate": 8.407415785944431e-06, "loss": 0.761, "step": 9250 }, { "epoch": 0.28352948387887705, "grad_norm": 1.2669053325896928, "learning_rate": 8.40705254698974e-06, "loss": 0.7163, "step": 9251 }, { "epoch": 0.28356013240161826, "grad_norm": 1.1694167857300928, "learning_rate": 8.40668927446449e-06, "loss": 0.7074, "step": 9252 }, { "epoch": 0.28359078092435946, "grad_norm": 1.303580263093037, "learning_rate": 8.406325968372258e-06, "loss": 0.7218, "step": 9253 }, { "epoch": 0.28362142944710067, "grad_norm": 0.47557210814164974, "learning_rate": 8.405962628716624e-06, "loss": 0.4402, "step": 9254 }, { "epoch": 0.2836520779698419, "grad_norm": 0.4844834107802085, "learning_rate": 8.405599255501168e-06, "loss": 0.4385, "step": 9255 }, { "epoch": 0.2836827264925831, "grad_norm": 1.3681879181358851, "learning_rate": 8.405235848729474e-06, "loss": 0.7274, "step": 9256 }, { "epoch": 0.2837133750153243, "grad_norm": 1.2579022318879494, "learning_rate": 8.404872408405118e-06, "loss": 0.6981, "step": 9257 }, { "epoch": 0.2837440235380655, "grad_norm": 1.2927994013535606, "learning_rate": 8.404508934531684e-06, "loss": 0.606, "step": 9258 }, { "epoch": 0.28377467206080664, "grad_norm": 0.45003492624849334, "learning_rate": 8.404145427112751e-06, "loss": 0.4283, "step": 9259 }, { "epoch": 0.28380532058354785, "grad_norm": 1.2466436246420014, "learning_rate": 8.403781886151902e-06, "loss": 0.6302, "step": 9260 }, { "epoch": 0.28383596910628905, "grad_norm": 1.2366515126619966, "learning_rate": 8.403418311652721e-06, "loss": 0.6809, "step": 9261 }, { "epoch": 0.28386661762903026, "grad_norm": 1.279741801556248, "learning_rate": 8.403054703618787e-06, "loss": 0.7489, "step": 9262 }, { "epoch": 0.28389726615177147, "grad_norm": 1.3896957873755642, "learning_rate": 8.402691062053685e-06, "loss": 0.7246, "step": 9263 }, { "epoch": 0.28392791467451267, "grad_norm": 1.2319780858800133, "learning_rate": 8.402327386960998e-06, "loss": 0.7356, "step": 9264 }, { "epoch": 0.2839585631972539, "grad_norm": 1.391574805056195, "learning_rate": 8.401963678344309e-06, "loss": 0.6159, "step": 9265 }, { "epoch": 0.2839892117199951, "grad_norm": 1.248822796059697, "learning_rate": 8.401599936207199e-06, "loss": 0.6739, "step": 9266 }, { "epoch": 0.2840198602427363, "grad_norm": 1.2899348012288518, "learning_rate": 8.401236160553257e-06, "loss": 0.7507, "step": 9267 }, { "epoch": 0.2840505087654775, "grad_norm": 1.1751503359569184, "learning_rate": 8.400872351386063e-06, "loss": 0.6921, "step": 9268 }, { "epoch": 0.2840811572882187, "grad_norm": 1.265192516183065, "learning_rate": 8.400508508709205e-06, "loss": 0.6847, "step": 9269 }, { "epoch": 0.2841118058109599, "grad_norm": 0.564238524205569, "learning_rate": 8.400144632526266e-06, "loss": 0.4501, "step": 9270 }, { "epoch": 0.2841424543337011, "grad_norm": 0.49469078765816693, "learning_rate": 8.399780722840832e-06, "loss": 0.4348, "step": 9271 }, { "epoch": 0.2841731028564423, "grad_norm": 1.2875143459281333, "learning_rate": 8.399416779656489e-06, "loss": 0.6744, "step": 9272 }, { "epoch": 0.2842037513791835, "grad_norm": 1.3534623493634348, "learning_rate": 8.399052802976822e-06, "loss": 0.8527, "step": 9273 }, { "epoch": 0.28423439990192473, "grad_norm": 1.2812597019610246, "learning_rate": 8.398688792805417e-06, "loss": 0.7051, "step": 9274 }, { "epoch": 0.28426504842466593, "grad_norm": 1.291189013444589, "learning_rate": 8.398324749145864e-06, "loss": 0.6299, "step": 9275 }, { "epoch": 0.28429569694740714, "grad_norm": 0.4929073947227348, "learning_rate": 8.397960672001748e-06, "loss": 0.4392, "step": 9276 }, { "epoch": 0.28432634547014835, "grad_norm": 1.3483945969587119, "learning_rate": 8.397596561376652e-06, "loss": 0.6298, "step": 9277 }, { "epoch": 0.28435699399288955, "grad_norm": 1.3917333150836797, "learning_rate": 8.397232417274172e-06, "loss": 0.7185, "step": 9278 }, { "epoch": 0.28438764251563076, "grad_norm": 1.314013258053593, "learning_rate": 8.396868239697891e-06, "loss": 0.7778, "step": 9279 }, { "epoch": 0.28441829103837196, "grad_norm": 1.3720083254655244, "learning_rate": 8.396504028651397e-06, "loss": 0.8036, "step": 9280 }, { "epoch": 0.28444893956111317, "grad_norm": 1.2861140833475124, "learning_rate": 8.39613978413828e-06, "loss": 0.7298, "step": 9281 }, { "epoch": 0.2844795880838544, "grad_norm": 1.1491441167667393, "learning_rate": 8.395775506162129e-06, "loss": 0.7423, "step": 9282 }, { "epoch": 0.2845102366065956, "grad_norm": 1.4287648426829982, "learning_rate": 8.395411194726533e-06, "loss": 0.7313, "step": 9283 }, { "epoch": 0.2845408851293368, "grad_norm": 1.2203713058397365, "learning_rate": 8.395046849835084e-06, "loss": 0.6901, "step": 9284 }, { "epoch": 0.284571533652078, "grad_norm": 0.5297534885909626, "learning_rate": 8.394682471491366e-06, "loss": 0.4339, "step": 9285 }, { "epoch": 0.2846021821748192, "grad_norm": 1.2740588662891366, "learning_rate": 8.394318059698976e-06, "loss": 0.6334, "step": 9286 }, { "epoch": 0.2846328306975604, "grad_norm": 1.237405541896655, "learning_rate": 8.393953614461501e-06, "loss": 0.6167, "step": 9287 }, { "epoch": 0.2846634792203016, "grad_norm": 1.1043672310297206, "learning_rate": 8.393589135782531e-06, "loss": 0.677, "step": 9288 }, { "epoch": 0.2846941277430428, "grad_norm": 1.1823927445051126, "learning_rate": 8.393224623665658e-06, "loss": 0.6221, "step": 9289 }, { "epoch": 0.28472477626578396, "grad_norm": 1.268784937968474, "learning_rate": 8.392860078114477e-06, "loss": 0.7071, "step": 9290 }, { "epoch": 0.28475542478852517, "grad_norm": 1.3288498983510373, "learning_rate": 8.39249549913258e-06, "loss": 0.6264, "step": 9291 }, { "epoch": 0.2847860733112664, "grad_norm": 1.256406378284224, "learning_rate": 8.392130886723553e-06, "loss": 0.6112, "step": 9292 }, { "epoch": 0.2848167218340076, "grad_norm": 1.211209930873399, "learning_rate": 8.391766240890993e-06, "loss": 0.7512, "step": 9293 }, { "epoch": 0.2848473703567488, "grad_norm": 1.4973113225115946, "learning_rate": 8.391401561638492e-06, "loss": 0.6124, "step": 9294 }, { "epoch": 0.28487801887949, "grad_norm": 1.380968089569239, "learning_rate": 8.391036848969646e-06, "loss": 0.7197, "step": 9295 }, { "epoch": 0.2849086674022312, "grad_norm": 1.277363765243512, "learning_rate": 8.390672102888044e-06, "loss": 0.7342, "step": 9296 }, { "epoch": 0.2849393159249724, "grad_norm": 1.325115900054896, "learning_rate": 8.390307323397285e-06, "loss": 0.617, "step": 9297 }, { "epoch": 0.2849699644477136, "grad_norm": 1.4162346469519955, "learning_rate": 8.389942510500957e-06, "loss": 0.7219, "step": 9298 }, { "epoch": 0.2850006129704548, "grad_norm": 1.1269007860765297, "learning_rate": 8.38957766420266e-06, "loss": 0.5976, "step": 9299 }, { "epoch": 0.285031261493196, "grad_norm": 1.3660061520437619, "learning_rate": 8.389212784505987e-06, "loss": 0.7067, "step": 9300 }, { "epoch": 0.2850619100159372, "grad_norm": 1.5105117249199183, "learning_rate": 8.388847871414533e-06, "loss": 0.7483, "step": 9301 }, { "epoch": 0.28509255853867843, "grad_norm": 1.330086815118695, "learning_rate": 8.388482924931893e-06, "loss": 0.7732, "step": 9302 }, { "epoch": 0.28512320706141964, "grad_norm": 1.2159156366065031, "learning_rate": 8.388117945061664e-06, "loss": 0.6953, "step": 9303 }, { "epoch": 0.28515385558416084, "grad_norm": 1.2077062044103486, "learning_rate": 8.387752931807442e-06, "loss": 0.6105, "step": 9304 }, { "epoch": 0.28518450410690205, "grad_norm": 1.3439005631193026, "learning_rate": 8.387387885172825e-06, "loss": 0.7122, "step": 9305 }, { "epoch": 0.28521515262964325, "grad_norm": 0.4931498307512153, "learning_rate": 8.387022805161408e-06, "loss": 0.4279, "step": 9306 }, { "epoch": 0.28524580115238446, "grad_norm": 1.2787236534025488, "learning_rate": 8.386657691776788e-06, "loss": 0.7659, "step": 9307 }, { "epoch": 0.28527644967512567, "grad_norm": 1.2812294137708005, "learning_rate": 8.386292545022563e-06, "loss": 0.7028, "step": 9308 }, { "epoch": 0.28530709819786687, "grad_norm": 1.355868597421637, "learning_rate": 8.385927364902332e-06, "loss": 0.6965, "step": 9309 }, { "epoch": 0.2853377467206081, "grad_norm": 1.352966286299714, "learning_rate": 8.385562151419693e-06, "loss": 0.6811, "step": 9310 }, { "epoch": 0.2853683952433493, "grad_norm": 1.168277429879305, "learning_rate": 8.385196904578243e-06, "loss": 0.773, "step": 9311 }, { "epoch": 0.2853990437660905, "grad_norm": 1.1291425690001142, "learning_rate": 8.384831624381582e-06, "loss": 0.6727, "step": 9312 }, { "epoch": 0.2854296922888317, "grad_norm": 0.4566032192422285, "learning_rate": 8.384466310833308e-06, "loss": 0.466, "step": 9313 }, { "epoch": 0.2854603408115729, "grad_norm": 1.221288835045839, "learning_rate": 8.384100963937023e-06, "loss": 0.8026, "step": 9314 }, { "epoch": 0.2854909893343141, "grad_norm": 1.3711821403976419, "learning_rate": 8.383735583696323e-06, "loss": 0.7194, "step": 9315 }, { "epoch": 0.2855216378570553, "grad_norm": 1.1906957376261813, "learning_rate": 8.383370170114812e-06, "loss": 0.6434, "step": 9316 }, { "epoch": 0.2855522863797965, "grad_norm": 1.166693563386867, "learning_rate": 8.383004723196088e-06, "loss": 0.6786, "step": 9317 }, { "epoch": 0.2855829349025377, "grad_norm": 1.3473171796506493, "learning_rate": 8.382639242943755e-06, "loss": 0.658, "step": 9318 }, { "epoch": 0.2856135834252789, "grad_norm": 1.2625997491508791, "learning_rate": 8.382273729361411e-06, "loss": 0.6893, "step": 9319 }, { "epoch": 0.28564423194802013, "grad_norm": 1.2545426710318035, "learning_rate": 8.381908182452659e-06, "loss": 0.6795, "step": 9320 }, { "epoch": 0.2856748804707613, "grad_norm": 1.2850665397141425, "learning_rate": 8.3815426022211e-06, "loss": 0.6674, "step": 9321 }, { "epoch": 0.2857055289935025, "grad_norm": 1.308251805758558, "learning_rate": 8.381176988670337e-06, "loss": 0.6929, "step": 9322 }, { "epoch": 0.2857361775162437, "grad_norm": 1.4261081053909395, "learning_rate": 8.38081134180397e-06, "loss": 0.6486, "step": 9323 }, { "epoch": 0.2857668260389849, "grad_norm": 0.528117868438913, "learning_rate": 8.380445661625606e-06, "loss": 0.4465, "step": 9324 }, { "epoch": 0.2857974745617261, "grad_norm": 1.3331642049206012, "learning_rate": 8.380079948138844e-06, "loss": 0.6957, "step": 9325 }, { "epoch": 0.2858281230844673, "grad_norm": 1.0997640426758728, "learning_rate": 8.379714201347291e-06, "loss": 0.6884, "step": 9326 }, { "epoch": 0.2858587716072085, "grad_norm": 1.2993873912673044, "learning_rate": 8.37934842125455e-06, "loss": 0.7257, "step": 9327 }, { "epoch": 0.2858894201299497, "grad_norm": 1.2773134408442137, "learning_rate": 8.378982607864224e-06, "loss": 0.6686, "step": 9328 }, { "epoch": 0.28592006865269093, "grad_norm": 1.3613275295099412, "learning_rate": 8.378616761179916e-06, "loss": 0.7598, "step": 9329 }, { "epoch": 0.28595071717543213, "grad_norm": 1.2765221041549586, "learning_rate": 8.378250881205235e-06, "loss": 0.722, "step": 9330 }, { "epoch": 0.28598136569817334, "grad_norm": 0.46556594328138867, "learning_rate": 8.377884967943781e-06, "loss": 0.4477, "step": 9331 }, { "epoch": 0.28601201422091455, "grad_norm": 1.220813826694471, "learning_rate": 8.377519021399164e-06, "loss": 0.6955, "step": 9332 }, { "epoch": 0.28604266274365575, "grad_norm": 1.3929804631193055, "learning_rate": 8.377153041574986e-06, "loss": 0.6907, "step": 9333 }, { "epoch": 0.28607331126639696, "grad_norm": 1.3524517021187714, "learning_rate": 8.376787028474858e-06, "loss": 0.7962, "step": 9334 }, { "epoch": 0.28610395978913816, "grad_norm": 1.0988321109095827, "learning_rate": 8.376420982102381e-06, "loss": 0.8317, "step": 9335 }, { "epoch": 0.28613460831187937, "grad_norm": 1.2579548372261145, "learning_rate": 8.376054902461166e-06, "loss": 0.791, "step": 9336 }, { "epoch": 0.2861652568346206, "grad_norm": 1.3050701416761032, "learning_rate": 8.375688789554817e-06, "loss": 0.6802, "step": 9337 }, { "epoch": 0.2861959053573618, "grad_norm": 0.4974717066887519, "learning_rate": 8.375322643386943e-06, "loss": 0.4222, "step": 9338 }, { "epoch": 0.286226553880103, "grad_norm": 1.2519257838937592, "learning_rate": 8.37495646396115e-06, "loss": 0.6546, "step": 9339 }, { "epoch": 0.2862572024028442, "grad_norm": 1.2694680737075896, "learning_rate": 8.37459025128105e-06, "loss": 0.6695, "step": 9340 }, { "epoch": 0.2862878509255854, "grad_norm": 1.215922137827379, "learning_rate": 8.374224005350247e-06, "loss": 0.7548, "step": 9341 }, { "epoch": 0.2863184994483266, "grad_norm": 1.2759981840072396, "learning_rate": 8.373857726172352e-06, "loss": 0.6878, "step": 9342 }, { "epoch": 0.2863491479710678, "grad_norm": 1.1776238276053186, "learning_rate": 8.373491413750974e-06, "loss": 0.6512, "step": 9343 }, { "epoch": 0.286379796493809, "grad_norm": 1.1219077907450012, "learning_rate": 8.373125068089722e-06, "loss": 0.6855, "step": 9344 }, { "epoch": 0.2864104450165502, "grad_norm": 0.4536583360966802, "learning_rate": 8.372758689192205e-06, "loss": 0.4415, "step": 9345 }, { "epoch": 0.2864410935392914, "grad_norm": 0.4767547585792884, "learning_rate": 8.372392277062034e-06, "loss": 0.4514, "step": 9346 }, { "epoch": 0.28647174206203263, "grad_norm": 1.368439422091689, "learning_rate": 8.372025831702819e-06, "loss": 0.7805, "step": 9347 }, { "epoch": 0.28650239058477384, "grad_norm": 1.1178520755666903, "learning_rate": 8.37165935311817e-06, "loss": 0.7501, "step": 9348 }, { "epoch": 0.28653303910751504, "grad_norm": 1.2159539016233343, "learning_rate": 8.371292841311701e-06, "loss": 0.7182, "step": 9349 }, { "epoch": 0.28656368763025625, "grad_norm": 1.2470524998073584, "learning_rate": 8.370926296287018e-06, "loss": 0.6967, "step": 9350 }, { "epoch": 0.28659433615299745, "grad_norm": 1.376153897074419, "learning_rate": 8.370559718047738e-06, "loss": 0.6746, "step": 9351 }, { "epoch": 0.2866249846757386, "grad_norm": 1.2209922658312482, "learning_rate": 8.37019310659747e-06, "loss": 0.6993, "step": 9352 }, { "epoch": 0.2866556331984798, "grad_norm": 1.1871874687910005, "learning_rate": 8.369826461939828e-06, "loss": 0.6974, "step": 9353 }, { "epoch": 0.286686281721221, "grad_norm": 1.3399635271846655, "learning_rate": 8.369459784078422e-06, "loss": 0.673, "step": 9354 }, { "epoch": 0.2867169302439622, "grad_norm": 1.29979462583641, "learning_rate": 8.369093073016868e-06, "loss": 0.8089, "step": 9355 }, { "epoch": 0.2867475787667034, "grad_norm": 1.2795796495969627, "learning_rate": 8.368726328758775e-06, "loss": 0.6633, "step": 9356 }, { "epoch": 0.28677822728944463, "grad_norm": 1.3030773761048755, "learning_rate": 8.368359551307762e-06, "loss": 0.6561, "step": 9357 }, { "epoch": 0.28680887581218584, "grad_norm": 1.2299669856701032, "learning_rate": 8.36799274066744e-06, "loss": 0.7219, "step": 9358 }, { "epoch": 0.28683952433492704, "grad_norm": 1.2359583429560517, "learning_rate": 8.367625896841425e-06, "loss": 0.7534, "step": 9359 }, { "epoch": 0.28687017285766825, "grad_norm": 1.1969835502343562, "learning_rate": 8.367259019833329e-06, "loss": 0.6811, "step": 9360 }, { "epoch": 0.28690082138040945, "grad_norm": 0.5273269902296602, "learning_rate": 8.36689210964677e-06, "loss": 0.4401, "step": 9361 }, { "epoch": 0.28693146990315066, "grad_norm": 1.4177358507510343, "learning_rate": 8.36652516628536e-06, "loss": 0.7168, "step": 9362 }, { "epoch": 0.28696211842589187, "grad_norm": 1.2568298021424185, "learning_rate": 8.366158189752715e-06, "loss": 0.6515, "step": 9363 }, { "epoch": 0.28699276694863307, "grad_norm": 1.2466098073216239, "learning_rate": 8.365791180052454e-06, "loss": 0.6773, "step": 9364 }, { "epoch": 0.2870234154713743, "grad_norm": 1.324225434229816, "learning_rate": 8.365424137188192e-06, "loss": 0.7198, "step": 9365 }, { "epoch": 0.2870540639941155, "grad_norm": 1.1576342668814334, "learning_rate": 8.365057061163544e-06, "loss": 0.707, "step": 9366 }, { "epoch": 0.2870847125168567, "grad_norm": 1.3911076498825967, "learning_rate": 8.364689951982126e-06, "loss": 0.6231, "step": 9367 }, { "epoch": 0.2871153610395979, "grad_norm": 1.1911156542059569, "learning_rate": 8.36432280964756e-06, "loss": 0.6951, "step": 9368 }, { "epoch": 0.2871460095623391, "grad_norm": 1.202989248489994, "learning_rate": 8.36395563416346e-06, "loss": 0.6566, "step": 9369 }, { "epoch": 0.2871766580850803, "grad_norm": 1.3973693891134216, "learning_rate": 8.363588425533442e-06, "loss": 0.8018, "step": 9370 }, { "epoch": 0.2872073066078215, "grad_norm": 0.4817285712435349, "learning_rate": 8.363221183761127e-06, "loss": 0.3906, "step": 9371 }, { "epoch": 0.2872379551305627, "grad_norm": 1.3581240896170839, "learning_rate": 8.362853908850136e-06, "loss": 0.5916, "step": 9372 }, { "epoch": 0.2872686036533039, "grad_norm": 1.3582348853725332, "learning_rate": 8.362486600804083e-06, "loss": 0.7013, "step": 9373 }, { "epoch": 0.28729925217604513, "grad_norm": 1.2611384317194703, "learning_rate": 8.36211925962659e-06, "loss": 0.6359, "step": 9374 }, { "epoch": 0.28732990069878633, "grad_norm": 1.2507195539287075, "learning_rate": 8.361751885321274e-06, "loss": 0.6208, "step": 9375 }, { "epoch": 0.28736054922152754, "grad_norm": 1.3740904764466937, "learning_rate": 8.36138447789176e-06, "loss": 0.7776, "step": 9376 }, { "epoch": 0.28739119774426874, "grad_norm": 1.5290149654208234, "learning_rate": 8.36101703734166e-06, "loss": 0.7234, "step": 9377 }, { "epoch": 0.28742184626700995, "grad_norm": 1.2147957919621313, "learning_rate": 8.360649563674604e-06, "loss": 0.6748, "step": 9378 }, { "epoch": 0.28745249478975116, "grad_norm": 1.3833632110200302, "learning_rate": 8.360282056894205e-06, "loss": 0.7208, "step": 9379 }, { "epoch": 0.28748314331249236, "grad_norm": 1.209786675038504, "learning_rate": 8.359914517004089e-06, "loss": 0.6932, "step": 9380 }, { "epoch": 0.28751379183523357, "grad_norm": 1.4052519209968652, "learning_rate": 8.359546944007873e-06, "loss": 0.6913, "step": 9381 }, { "epoch": 0.2875444403579748, "grad_norm": 0.5415878490175373, "learning_rate": 8.359179337909182e-06, "loss": 0.4455, "step": 9382 }, { "epoch": 0.2875750888807159, "grad_norm": 1.1830955963220218, "learning_rate": 8.35881169871164e-06, "loss": 0.6667, "step": 9383 }, { "epoch": 0.28760573740345713, "grad_norm": 1.2860922257930727, "learning_rate": 8.358444026418864e-06, "loss": 0.7136, "step": 9384 }, { "epoch": 0.28763638592619833, "grad_norm": 1.2757561514998144, "learning_rate": 8.35807632103448e-06, "loss": 0.8492, "step": 9385 }, { "epoch": 0.28766703444893954, "grad_norm": 1.3123174331956142, "learning_rate": 8.357708582562114e-06, "loss": 0.7341, "step": 9386 }, { "epoch": 0.28769768297168075, "grad_norm": 1.3699354409081936, "learning_rate": 8.357340811005383e-06, "loss": 0.7701, "step": 9387 }, { "epoch": 0.28772833149442195, "grad_norm": 1.3712066889885495, "learning_rate": 8.356973006367915e-06, "loss": 0.6875, "step": 9388 }, { "epoch": 0.28775898001716316, "grad_norm": 0.45655296241315346, "learning_rate": 8.356605168653334e-06, "loss": 0.4287, "step": 9389 }, { "epoch": 0.28778962853990436, "grad_norm": 1.3308460070953132, "learning_rate": 8.356237297865261e-06, "loss": 0.7282, "step": 9390 }, { "epoch": 0.28782027706264557, "grad_norm": 1.131149279789048, "learning_rate": 8.355869394007326e-06, "loss": 0.682, "step": 9391 }, { "epoch": 0.2878509255853868, "grad_norm": 1.2522553283736442, "learning_rate": 8.35550145708315e-06, "loss": 0.6921, "step": 9392 }, { "epoch": 0.287881574108128, "grad_norm": 0.47079250452393984, "learning_rate": 8.355133487096358e-06, "loss": 0.4289, "step": 9393 }, { "epoch": 0.2879122226308692, "grad_norm": 1.2696080264737208, "learning_rate": 8.35476548405058e-06, "loss": 0.6496, "step": 9394 }, { "epoch": 0.2879428711536104, "grad_norm": 0.4764195169311246, "learning_rate": 8.354397447949438e-06, "loss": 0.465, "step": 9395 }, { "epoch": 0.2879735196763516, "grad_norm": 0.46101533494266994, "learning_rate": 8.35402937879656e-06, "loss": 0.4304, "step": 9396 }, { "epoch": 0.2880041681990928, "grad_norm": 1.095573107019837, "learning_rate": 8.35366127659557e-06, "loss": 0.6657, "step": 9397 }, { "epoch": 0.288034816721834, "grad_norm": 1.2490421459799645, "learning_rate": 8.353293141350101e-06, "loss": 0.6571, "step": 9398 }, { "epoch": 0.2880654652445752, "grad_norm": 1.151000676308485, "learning_rate": 8.352924973063776e-06, "loss": 0.6517, "step": 9399 }, { "epoch": 0.2880961137673164, "grad_norm": 1.2014542863670288, "learning_rate": 8.35255677174022e-06, "loss": 0.7285, "step": 9400 }, { "epoch": 0.2881267622900576, "grad_norm": 0.4802575899579608, "learning_rate": 8.352188537383069e-06, "loss": 0.442, "step": 9401 }, { "epoch": 0.28815741081279883, "grad_norm": 1.2752895621037665, "learning_rate": 8.351820269995945e-06, "loss": 0.7582, "step": 9402 }, { "epoch": 0.28818805933554004, "grad_norm": 1.3719342658007252, "learning_rate": 8.351451969582478e-06, "loss": 0.6996, "step": 9403 }, { "epoch": 0.28821870785828124, "grad_norm": 1.2887399284049834, "learning_rate": 8.351083636146296e-06, "loss": 0.682, "step": 9404 }, { "epoch": 0.28824935638102245, "grad_norm": 1.1696839743254654, "learning_rate": 8.35071526969103e-06, "loss": 0.6486, "step": 9405 }, { "epoch": 0.28828000490376365, "grad_norm": 1.4034041007267049, "learning_rate": 8.350346870220311e-06, "loss": 0.7749, "step": 9406 }, { "epoch": 0.28831065342650486, "grad_norm": 0.48937204781593885, "learning_rate": 8.349978437737765e-06, "loss": 0.4363, "step": 9407 }, { "epoch": 0.28834130194924606, "grad_norm": 1.3319808599479674, "learning_rate": 8.349609972247026e-06, "loss": 0.6825, "step": 9408 }, { "epoch": 0.28837195047198727, "grad_norm": 1.390357645760019, "learning_rate": 8.349241473751721e-06, "loss": 0.7454, "step": 9409 }, { "epoch": 0.2884025989947285, "grad_norm": 1.6862096565546114, "learning_rate": 8.348872942255484e-06, "loss": 0.6365, "step": 9410 }, { "epoch": 0.2884332475174697, "grad_norm": 1.3096332713568453, "learning_rate": 8.348504377761945e-06, "loss": 0.6018, "step": 9411 }, { "epoch": 0.2884638960402109, "grad_norm": 1.4283777772209163, "learning_rate": 8.348135780274735e-06, "loss": 0.7446, "step": 9412 }, { "epoch": 0.2884945445629521, "grad_norm": 1.354577355761741, "learning_rate": 8.347767149797488e-06, "loss": 0.6637, "step": 9413 }, { "epoch": 0.28852519308569324, "grad_norm": 0.4591349607780646, "learning_rate": 8.347398486333835e-06, "loss": 0.4301, "step": 9414 }, { "epoch": 0.28855584160843445, "grad_norm": 1.4043541280951723, "learning_rate": 8.347029789887406e-06, "loss": 0.5936, "step": 9415 }, { "epoch": 0.28858649013117565, "grad_norm": 1.3623127046913979, "learning_rate": 8.346661060461838e-06, "loss": 0.6569, "step": 9416 }, { "epoch": 0.28861713865391686, "grad_norm": 0.4553537106358814, "learning_rate": 8.34629229806076e-06, "loss": 0.4266, "step": 9417 }, { "epoch": 0.28864778717665807, "grad_norm": 1.2413367361921739, "learning_rate": 8.34592350268781e-06, "loss": 0.6092, "step": 9418 }, { "epoch": 0.28867843569939927, "grad_norm": 1.141507857351157, "learning_rate": 8.345554674346618e-06, "loss": 0.6485, "step": 9419 }, { "epoch": 0.2887090842221405, "grad_norm": 1.3470624948603076, "learning_rate": 8.345185813040822e-06, "loss": 0.6241, "step": 9420 }, { "epoch": 0.2887397327448817, "grad_norm": 0.4648074881239776, "learning_rate": 8.344816918774052e-06, "loss": 0.4502, "step": 9421 }, { "epoch": 0.2887703812676229, "grad_norm": 1.1628839040706624, "learning_rate": 8.344447991549947e-06, "loss": 0.7401, "step": 9422 }, { "epoch": 0.2888010297903641, "grad_norm": 1.4395180072642242, "learning_rate": 8.344079031372138e-06, "loss": 0.6807, "step": 9423 }, { "epoch": 0.2888316783131053, "grad_norm": 1.3928723470011186, "learning_rate": 8.343710038244264e-06, "loss": 0.7038, "step": 9424 }, { "epoch": 0.2888623268358465, "grad_norm": 1.2539222697852894, "learning_rate": 8.343341012169958e-06, "loss": 0.6592, "step": 9425 }, { "epoch": 0.2888929753585877, "grad_norm": 1.28808891917836, "learning_rate": 8.34297195315286e-06, "loss": 0.6427, "step": 9426 }, { "epoch": 0.2889236238813289, "grad_norm": 1.2534070199832545, "learning_rate": 8.342602861196603e-06, "loss": 0.6538, "step": 9427 }, { "epoch": 0.2889542724040701, "grad_norm": 1.139336935287762, "learning_rate": 8.342233736304824e-06, "loss": 0.6501, "step": 9428 }, { "epoch": 0.28898492092681133, "grad_norm": 1.1861239087248068, "learning_rate": 8.341864578481162e-06, "loss": 0.6777, "step": 9429 }, { "epoch": 0.28901556944955253, "grad_norm": 0.511827870622904, "learning_rate": 8.341495387729253e-06, "loss": 0.4316, "step": 9430 }, { "epoch": 0.28904621797229374, "grad_norm": 0.5017370565824273, "learning_rate": 8.341126164052735e-06, "loss": 0.4529, "step": 9431 }, { "epoch": 0.28907686649503495, "grad_norm": 0.4421108283055225, "learning_rate": 8.340756907455246e-06, "loss": 0.4439, "step": 9432 }, { "epoch": 0.28910751501777615, "grad_norm": 1.37738101698348, "learning_rate": 8.340387617940424e-06, "loss": 0.7343, "step": 9433 }, { "epoch": 0.28913816354051736, "grad_norm": 0.4490549638695573, "learning_rate": 8.340018295511908e-06, "loss": 0.4195, "step": 9434 }, { "epoch": 0.28916881206325856, "grad_norm": 0.4613401962072764, "learning_rate": 8.339648940173337e-06, "loss": 0.439, "step": 9435 }, { "epoch": 0.28919946058599977, "grad_norm": 1.280632984516649, "learning_rate": 8.339279551928351e-06, "loss": 0.5928, "step": 9436 }, { "epoch": 0.289230109108741, "grad_norm": 1.2164882096620309, "learning_rate": 8.338910130780591e-06, "loss": 0.6634, "step": 9437 }, { "epoch": 0.2892607576314822, "grad_norm": 1.3973012138098055, "learning_rate": 8.338540676733693e-06, "loss": 0.7361, "step": 9438 }, { "epoch": 0.2892914061542234, "grad_norm": 1.333849551174077, "learning_rate": 8.3381711897913e-06, "loss": 0.703, "step": 9439 }, { "epoch": 0.2893220546769646, "grad_norm": 1.3823596761852912, "learning_rate": 8.337801669957052e-06, "loss": 0.6867, "step": 9440 }, { "epoch": 0.2893527031997058, "grad_norm": 1.4244535702659271, "learning_rate": 8.337432117234591e-06, "loss": 0.71, "step": 9441 }, { "epoch": 0.289383351722447, "grad_norm": 1.4410832848946646, "learning_rate": 8.337062531627556e-06, "loss": 0.8353, "step": 9442 }, { "epoch": 0.2894140002451882, "grad_norm": 1.34416754409965, "learning_rate": 8.33669291313959e-06, "loss": 0.7373, "step": 9443 }, { "epoch": 0.2894446487679294, "grad_norm": 1.2964548826671443, "learning_rate": 8.336323261774336e-06, "loss": 0.7412, "step": 9444 }, { "epoch": 0.28947529729067056, "grad_norm": 1.2407803855169028, "learning_rate": 8.335953577535437e-06, "loss": 0.6181, "step": 9445 }, { "epoch": 0.28950594581341177, "grad_norm": 1.196826374806418, "learning_rate": 8.33558386042653e-06, "loss": 0.6409, "step": 9446 }, { "epoch": 0.289536594336153, "grad_norm": 1.3473254778170403, "learning_rate": 8.335214110451264e-06, "loss": 0.6877, "step": 9447 }, { "epoch": 0.2895672428588942, "grad_norm": 1.173523930259979, "learning_rate": 8.334844327613278e-06, "loss": 0.6527, "step": 9448 }, { "epoch": 0.2895978913816354, "grad_norm": 1.3165377916827303, "learning_rate": 8.33447451191622e-06, "loss": 0.6961, "step": 9449 }, { "epoch": 0.2896285399043766, "grad_norm": 0.5692485146651645, "learning_rate": 8.334104663363732e-06, "loss": 0.4434, "step": 9450 }, { "epoch": 0.2896591884271178, "grad_norm": 1.2648844788874922, "learning_rate": 8.333734781959456e-06, "loss": 0.692, "step": 9451 }, { "epoch": 0.289689836949859, "grad_norm": 0.4825241673297642, "learning_rate": 8.333364867707038e-06, "loss": 0.4375, "step": 9452 }, { "epoch": 0.2897204854726002, "grad_norm": 1.2207555847520268, "learning_rate": 8.332994920610125e-06, "loss": 0.6488, "step": 9453 }, { "epoch": 0.2897511339953414, "grad_norm": 1.2524865336964344, "learning_rate": 8.332624940672358e-06, "loss": 0.718, "step": 9454 }, { "epoch": 0.2897817825180826, "grad_norm": 1.1883172198488805, "learning_rate": 8.332254927897386e-06, "loss": 0.5461, "step": 9455 }, { "epoch": 0.2898124310408238, "grad_norm": 1.4302121305155788, "learning_rate": 8.331884882288852e-06, "loss": 0.6888, "step": 9456 }, { "epoch": 0.28984307956356503, "grad_norm": 1.258053776720454, "learning_rate": 8.331514803850406e-06, "loss": 0.6944, "step": 9457 }, { "epoch": 0.28987372808630624, "grad_norm": 1.249206398207715, "learning_rate": 8.33114469258569e-06, "loss": 0.7115, "step": 9458 }, { "epoch": 0.28990437660904744, "grad_norm": 1.430208109517551, "learning_rate": 8.330774548498356e-06, "loss": 0.6654, "step": 9459 }, { "epoch": 0.28993502513178865, "grad_norm": 0.5894163126078759, "learning_rate": 8.330404371592046e-06, "loss": 0.4237, "step": 9460 }, { "epoch": 0.28996567365452985, "grad_norm": 0.5883354935304004, "learning_rate": 8.33003416187041e-06, "loss": 0.4344, "step": 9461 }, { "epoch": 0.28999632217727106, "grad_norm": 1.474899549894375, "learning_rate": 8.329663919337096e-06, "loss": 0.6766, "step": 9462 }, { "epoch": 0.29002697070001227, "grad_norm": 1.1871108357662659, "learning_rate": 8.32929364399575e-06, "loss": 0.723, "step": 9463 }, { "epoch": 0.29005761922275347, "grad_norm": 1.2930841881867985, "learning_rate": 8.328923335850023e-06, "loss": 0.7007, "step": 9464 }, { "epoch": 0.2900882677454947, "grad_norm": 0.588543222127694, "learning_rate": 8.328552994903562e-06, "loss": 0.4552, "step": 9465 }, { "epoch": 0.2901189162682359, "grad_norm": 1.174042473236761, "learning_rate": 8.328182621160018e-06, "loss": 0.6728, "step": 9466 }, { "epoch": 0.2901495647909771, "grad_norm": 1.2636781190714317, "learning_rate": 8.327812214623037e-06, "loss": 0.7312, "step": 9467 }, { "epoch": 0.2901802133137183, "grad_norm": 1.299416576092351, "learning_rate": 8.327441775296273e-06, "loss": 0.6975, "step": 9468 }, { "epoch": 0.2902108618364595, "grad_norm": 1.368857301520123, "learning_rate": 8.327071303183374e-06, "loss": 0.8139, "step": 9469 }, { "epoch": 0.2902415103592007, "grad_norm": 1.444611443157117, "learning_rate": 8.326700798287988e-06, "loss": 0.7038, "step": 9470 }, { "epoch": 0.2902721588819419, "grad_norm": 0.5408209377170611, "learning_rate": 8.326330260613768e-06, "loss": 0.4051, "step": 9471 }, { "epoch": 0.2903028074046831, "grad_norm": 1.1571187794311137, "learning_rate": 8.325959690164367e-06, "loss": 0.6237, "step": 9472 }, { "epoch": 0.2903334559274243, "grad_norm": 0.508778566814059, "learning_rate": 8.325589086943433e-06, "loss": 0.4548, "step": 9473 }, { "epoch": 0.2903641044501655, "grad_norm": 1.4976108119547955, "learning_rate": 8.325218450954619e-06, "loss": 0.6917, "step": 9474 }, { "epoch": 0.29039475297290673, "grad_norm": 0.4628624240649003, "learning_rate": 8.324847782201576e-06, "loss": 0.4233, "step": 9475 }, { "epoch": 0.2904254014956479, "grad_norm": 1.317250609442814, "learning_rate": 8.324477080687959e-06, "loss": 0.7618, "step": 9476 }, { "epoch": 0.2904560500183891, "grad_norm": 1.2411752051326992, "learning_rate": 8.324106346417416e-06, "loss": 0.714, "step": 9477 }, { "epoch": 0.2904866985411303, "grad_norm": 1.3090591806961895, "learning_rate": 8.323735579393604e-06, "loss": 0.7126, "step": 9478 }, { "epoch": 0.2905173470638715, "grad_norm": 1.4101817206899836, "learning_rate": 8.323364779620176e-06, "loss": 0.7206, "step": 9479 }, { "epoch": 0.2905479955866127, "grad_norm": 1.2447496497456285, "learning_rate": 8.322993947100783e-06, "loss": 0.7196, "step": 9480 }, { "epoch": 0.2905786441093539, "grad_norm": 0.5805815442198837, "learning_rate": 8.32262308183908e-06, "loss": 0.4496, "step": 9481 }, { "epoch": 0.2906092926320951, "grad_norm": 1.3910348475660852, "learning_rate": 8.322252183838723e-06, "loss": 0.7588, "step": 9482 }, { "epoch": 0.2906399411548363, "grad_norm": 1.4415042715408837, "learning_rate": 8.321881253103366e-06, "loss": 0.7377, "step": 9483 }, { "epoch": 0.29067058967757753, "grad_norm": 1.4371656018564671, "learning_rate": 8.32151028963666e-06, "loss": 0.775, "step": 9484 }, { "epoch": 0.29070123820031873, "grad_norm": 1.1797909926279633, "learning_rate": 8.321139293442266e-06, "loss": 0.7244, "step": 9485 }, { "epoch": 0.29073188672305994, "grad_norm": 1.4592613569029895, "learning_rate": 8.320768264523835e-06, "loss": 0.8094, "step": 9486 }, { "epoch": 0.29076253524580115, "grad_norm": 1.322155195613255, "learning_rate": 8.320397202885027e-06, "loss": 0.6876, "step": 9487 }, { "epoch": 0.29079318376854235, "grad_norm": 1.2411915520442134, "learning_rate": 8.320026108529494e-06, "loss": 0.6795, "step": 9488 }, { "epoch": 0.29082383229128356, "grad_norm": 1.190830536535352, "learning_rate": 8.319654981460895e-06, "loss": 0.6593, "step": 9489 }, { "epoch": 0.29085448081402476, "grad_norm": 1.3512460868017828, "learning_rate": 8.319283821682885e-06, "loss": 0.7062, "step": 9490 }, { "epoch": 0.29088512933676597, "grad_norm": 1.2763290274089056, "learning_rate": 8.318912629199123e-06, "loss": 0.6261, "step": 9491 }, { "epoch": 0.2909157778595072, "grad_norm": 1.1087464655905932, "learning_rate": 8.318541404013264e-06, "loss": 0.6806, "step": 9492 }, { "epoch": 0.2909464263822484, "grad_norm": 1.2752569068829578, "learning_rate": 8.31817014612897e-06, "loss": 0.7346, "step": 9493 }, { "epoch": 0.2909770749049896, "grad_norm": 0.49334487074624495, "learning_rate": 8.317798855549897e-06, "loss": 0.4367, "step": 9494 }, { "epoch": 0.2910077234277308, "grad_norm": 0.46080932067414326, "learning_rate": 8.317427532279702e-06, "loss": 0.4405, "step": 9495 }, { "epoch": 0.291038371950472, "grad_norm": 2.5717565898906933, "learning_rate": 8.317056176322044e-06, "loss": 0.7267, "step": 9496 }, { "epoch": 0.2910690204732132, "grad_norm": 0.44545788384967905, "learning_rate": 8.316684787680582e-06, "loss": 0.4401, "step": 9497 }, { "epoch": 0.2910996689959544, "grad_norm": 1.2552462609029271, "learning_rate": 8.316313366358978e-06, "loss": 0.765, "step": 9498 }, { "epoch": 0.2911303175186956, "grad_norm": 1.2989271850473227, "learning_rate": 8.31594191236089e-06, "loss": 0.6789, "step": 9499 }, { "epoch": 0.2911609660414368, "grad_norm": 1.46020238096692, "learning_rate": 8.315570425689975e-06, "loss": 0.765, "step": 9500 }, { "epoch": 0.291191614564178, "grad_norm": 1.4158601441053047, "learning_rate": 8.3151989063499e-06, "loss": 0.7327, "step": 9501 }, { "epoch": 0.29122226308691923, "grad_norm": 1.307982320422515, "learning_rate": 8.314827354344318e-06, "loss": 0.7311, "step": 9502 }, { "epoch": 0.29125291160966044, "grad_norm": 1.364373662097453, "learning_rate": 8.3144557696769e-06, "loss": 0.801, "step": 9503 }, { "epoch": 0.29128356013240164, "grad_norm": 1.4348069929242355, "learning_rate": 8.314084152351297e-06, "loss": 0.8248, "step": 9504 }, { "epoch": 0.29131420865514285, "grad_norm": 1.3711497495612068, "learning_rate": 8.313712502371174e-06, "loss": 0.7901, "step": 9505 }, { "epoch": 0.29134485717788405, "grad_norm": 1.3567398594001199, "learning_rate": 8.313340819740195e-06, "loss": 0.7331, "step": 9506 }, { "epoch": 0.2913755057006252, "grad_norm": 1.2768486686514495, "learning_rate": 8.312969104462024e-06, "loss": 0.6988, "step": 9507 }, { "epoch": 0.2914061542233664, "grad_norm": 0.6499431107069752, "learning_rate": 8.312597356540316e-06, "loss": 0.4423, "step": 9508 }, { "epoch": 0.2914368027461076, "grad_norm": 0.5665816619470744, "learning_rate": 8.312225575978741e-06, "loss": 0.4608, "step": 9509 }, { "epoch": 0.2914674512688488, "grad_norm": 1.3262793752499862, "learning_rate": 8.311853762780959e-06, "loss": 0.6141, "step": 9510 }, { "epoch": 0.29149809979159, "grad_norm": 1.1695686407081505, "learning_rate": 8.311481916950636e-06, "loss": 0.6075, "step": 9511 }, { "epoch": 0.29152874831433123, "grad_norm": 0.47849019265528187, "learning_rate": 8.311110038491435e-06, "loss": 0.4461, "step": 9512 }, { "epoch": 0.29155939683707244, "grad_norm": 1.1762460065575644, "learning_rate": 8.310738127407017e-06, "loss": 0.6012, "step": 9513 }, { "epoch": 0.29159004535981364, "grad_norm": 1.2673097415350592, "learning_rate": 8.310366183701051e-06, "loss": 0.6486, "step": 9514 }, { "epoch": 0.29162069388255485, "grad_norm": 1.2509336594654155, "learning_rate": 8.3099942073772e-06, "loss": 0.6942, "step": 9515 }, { "epoch": 0.29165134240529605, "grad_norm": 1.2352825228240798, "learning_rate": 8.30962219843913e-06, "loss": 0.6212, "step": 9516 }, { "epoch": 0.29168199092803726, "grad_norm": 0.5858553933811482, "learning_rate": 8.309250156890502e-06, "loss": 0.4498, "step": 9517 }, { "epoch": 0.29171263945077847, "grad_norm": 1.2405882377505433, "learning_rate": 8.308878082734988e-06, "loss": 0.6181, "step": 9518 }, { "epoch": 0.29174328797351967, "grad_norm": 1.1865246197241286, "learning_rate": 8.308505975976252e-06, "loss": 0.6917, "step": 9519 }, { "epoch": 0.2917739364962609, "grad_norm": 1.3828484612577479, "learning_rate": 8.30813383661796e-06, "loss": 0.7348, "step": 9520 }, { "epoch": 0.2918045850190021, "grad_norm": 1.318776335875596, "learning_rate": 8.307761664663778e-06, "loss": 0.7356, "step": 9521 }, { "epoch": 0.2918352335417433, "grad_norm": 1.163354923978088, "learning_rate": 8.307389460117375e-06, "loss": 0.6593, "step": 9522 }, { "epoch": 0.2918658820644845, "grad_norm": 1.170366090467351, "learning_rate": 8.307017222982416e-06, "loss": 0.6938, "step": 9523 }, { "epoch": 0.2918965305872257, "grad_norm": 0.519923191682972, "learning_rate": 8.306644953262571e-06, "loss": 0.4532, "step": 9524 }, { "epoch": 0.2919271791099669, "grad_norm": 1.2471178913628602, "learning_rate": 8.306272650961507e-06, "loss": 0.7333, "step": 9525 }, { "epoch": 0.2919578276327081, "grad_norm": 1.322613120150536, "learning_rate": 8.305900316082893e-06, "loss": 0.6751, "step": 9526 }, { "epoch": 0.2919884761554493, "grad_norm": 1.2000995896139608, "learning_rate": 8.305527948630398e-06, "loss": 0.6553, "step": 9527 }, { "epoch": 0.2920191246781905, "grad_norm": 1.1915735892914645, "learning_rate": 8.305155548607688e-06, "loss": 0.6632, "step": 9528 }, { "epoch": 0.29204977320093173, "grad_norm": 1.3894951691882838, "learning_rate": 8.304783116018437e-06, "loss": 0.7247, "step": 9529 }, { "epoch": 0.29208042172367293, "grad_norm": 1.181833113796285, "learning_rate": 8.304410650866312e-06, "loss": 0.7068, "step": 9530 }, { "epoch": 0.29211107024641414, "grad_norm": 1.2188210980387812, "learning_rate": 8.304038153154983e-06, "loss": 0.6213, "step": 9531 }, { "epoch": 0.29214171876915535, "grad_norm": 1.3893097850525695, "learning_rate": 8.303665622888121e-06, "loss": 0.6984, "step": 9532 }, { "epoch": 0.29217236729189655, "grad_norm": 1.281050379387364, "learning_rate": 8.303293060069394e-06, "loss": 0.6807, "step": 9533 }, { "epoch": 0.29220301581463776, "grad_norm": 1.23251372395199, "learning_rate": 8.30292046470248e-06, "loss": 0.6671, "step": 9534 }, { "epoch": 0.29223366433737896, "grad_norm": 1.2535421314629043, "learning_rate": 8.302547836791042e-06, "loss": 0.6085, "step": 9535 }, { "epoch": 0.29226431286012017, "grad_norm": 0.5288114298497151, "learning_rate": 8.302175176338756e-06, "loss": 0.4419, "step": 9536 }, { "epoch": 0.2922949613828614, "grad_norm": 1.1829259250814483, "learning_rate": 8.301802483349293e-06, "loss": 0.582, "step": 9537 }, { "epoch": 0.2923256099056025, "grad_norm": 1.5147807150073305, "learning_rate": 8.301429757826326e-06, "loss": 0.7423, "step": 9538 }, { "epoch": 0.29235625842834373, "grad_norm": 0.47872720497555227, "learning_rate": 8.301056999773527e-06, "loss": 0.4495, "step": 9539 }, { "epoch": 0.29238690695108494, "grad_norm": 1.105492619174405, "learning_rate": 8.300684209194567e-06, "loss": 0.5966, "step": 9540 }, { "epoch": 0.29241755547382614, "grad_norm": 1.2036796698985703, "learning_rate": 8.300311386093122e-06, "loss": 0.6122, "step": 9541 }, { "epoch": 0.29244820399656735, "grad_norm": 1.3650850529133483, "learning_rate": 8.299938530472866e-06, "loss": 0.6796, "step": 9542 }, { "epoch": 0.29247885251930855, "grad_norm": 0.48688287784751894, "learning_rate": 8.29956564233747e-06, "loss": 0.443, "step": 9543 }, { "epoch": 0.29250950104204976, "grad_norm": 1.4100703253500597, "learning_rate": 8.299192721690609e-06, "loss": 0.6667, "step": 9544 }, { "epoch": 0.29254014956479096, "grad_norm": 1.3244883875617401, "learning_rate": 8.298819768535959e-06, "loss": 0.7322, "step": 9545 }, { "epoch": 0.29257079808753217, "grad_norm": 1.2995068393446834, "learning_rate": 8.298446782877194e-06, "loss": 0.6586, "step": 9546 }, { "epoch": 0.2926014466102734, "grad_norm": 1.5416231136474947, "learning_rate": 8.298073764717988e-06, "loss": 0.6597, "step": 9547 }, { "epoch": 0.2926320951330146, "grad_norm": 1.2472016845004485, "learning_rate": 8.297700714062017e-06, "loss": 0.6765, "step": 9548 }, { "epoch": 0.2926627436557558, "grad_norm": 1.2565774325093055, "learning_rate": 8.297327630912958e-06, "loss": 0.6995, "step": 9549 }, { "epoch": 0.292693392178497, "grad_norm": 0.49875266026271026, "learning_rate": 8.296954515274485e-06, "loss": 0.4296, "step": 9550 }, { "epoch": 0.2927240407012382, "grad_norm": 1.2620407820826018, "learning_rate": 8.296581367150277e-06, "loss": 0.7037, "step": 9551 }, { "epoch": 0.2927546892239794, "grad_norm": 1.4872749147155484, "learning_rate": 8.296208186544008e-06, "loss": 0.6621, "step": 9552 }, { "epoch": 0.2927853377467206, "grad_norm": 0.4721597140184595, "learning_rate": 8.295834973459358e-06, "loss": 0.4307, "step": 9553 }, { "epoch": 0.2928159862694618, "grad_norm": 1.2373961091901446, "learning_rate": 8.295461727900003e-06, "loss": 0.6863, "step": 9554 }, { "epoch": 0.292846634792203, "grad_norm": 1.1903572877076747, "learning_rate": 8.295088449869619e-06, "loss": 0.6749, "step": 9555 }, { "epoch": 0.2928772833149442, "grad_norm": 1.2581107313763522, "learning_rate": 8.294715139371885e-06, "loss": 0.6727, "step": 9556 }, { "epoch": 0.29290793183768543, "grad_norm": 1.230478380371426, "learning_rate": 8.29434179641048e-06, "loss": 0.6619, "step": 9557 }, { "epoch": 0.29293858036042664, "grad_norm": 1.2118569169566356, "learning_rate": 8.293968420989083e-06, "loss": 0.7047, "step": 9558 }, { "epoch": 0.29296922888316784, "grad_norm": 1.345336918744501, "learning_rate": 8.293595013111373e-06, "loss": 0.6874, "step": 9559 }, { "epoch": 0.29299987740590905, "grad_norm": 1.1931904199853958, "learning_rate": 8.293221572781027e-06, "loss": 0.6341, "step": 9560 }, { "epoch": 0.29303052592865025, "grad_norm": 1.3509368939421222, "learning_rate": 8.292848100001727e-06, "loss": 0.7112, "step": 9561 }, { "epoch": 0.29306117445139146, "grad_norm": 1.50417757360852, "learning_rate": 8.292474594777152e-06, "loss": 0.6608, "step": 9562 }, { "epoch": 0.29309182297413267, "grad_norm": 1.1385432560600122, "learning_rate": 8.292101057110982e-06, "loss": 0.5948, "step": 9563 }, { "epoch": 0.29312247149687387, "grad_norm": 1.2588688857675616, "learning_rate": 8.2917274870069e-06, "loss": 0.6447, "step": 9564 }, { "epoch": 0.2931531200196151, "grad_norm": 1.1960720358355268, "learning_rate": 8.291353884468583e-06, "loss": 0.6719, "step": 9565 }, { "epoch": 0.2931837685423563, "grad_norm": 1.0490423241679905, "learning_rate": 8.290980249499714e-06, "loss": 0.5724, "step": 9566 }, { "epoch": 0.2932144170650975, "grad_norm": 1.147926248298742, "learning_rate": 8.290606582103975e-06, "loss": 0.7438, "step": 9567 }, { "epoch": 0.2932450655878387, "grad_norm": 1.093873622139205, "learning_rate": 8.290232882285047e-06, "loss": 0.6301, "step": 9568 }, { "epoch": 0.29327571411057984, "grad_norm": 1.3536595297151555, "learning_rate": 8.289859150046614e-06, "loss": 0.7011, "step": 9569 }, { "epoch": 0.29330636263332105, "grad_norm": 1.4555759699634232, "learning_rate": 8.289485385392356e-06, "loss": 0.7113, "step": 9570 }, { "epoch": 0.29333701115606226, "grad_norm": 1.2484742471073713, "learning_rate": 8.289111588325956e-06, "loss": 0.6592, "step": 9571 }, { "epoch": 0.29336765967880346, "grad_norm": 1.2550296017994778, "learning_rate": 8.2887377588511e-06, "loss": 0.6454, "step": 9572 }, { "epoch": 0.29339830820154467, "grad_norm": 0.5799843441917, "learning_rate": 8.288363896971468e-06, "loss": 0.4679, "step": 9573 }, { "epoch": 0.29342895672428587, "grad_norm": 1.2742879745004532, "learning_rate": 8.287990002690746e-06, "loss": 0.6606, "step": 9574 }, { "epoch": 0.2934596052470271, "grad_norm": 0.5044491967834319, "learning_rate": 8.287616076012617e-06, "loss": 0.4692, "step": 9575 }, { "epoch": 0.2934902537697683, "grad_norm": 1.2915255588983485, "learning_rate": 8.287242116940765e-06, "loss": 0.6823, "step": 9576 }, { "epoch": 0.2935209022925095, "grad_norm": 1.336639070409023, "learning_rate": 8.286868125478876e-06, "loss": 0.7183, "step": 9577 }, { "epoch": 0.2935515508152507, "grad_norm": 1.2222932971365958, "learning_rate": 8.286494101630633e-06, "loss": 0.623, "step": 9578 }, { "epoch": 0.2935821993379919, "grad_norm": 0.517749854409298, "learning_rate": 8.286120045399724e-06, "loss": 0.4488, "step": 9579 }, { "epoch": 0.2936128478607331, "grad_norm": 1.1411942422060157, "learning_rate": 8.285745956789832e-06, "loss": 0.6585, "step": 9580 }, { "epoch": 0.2936434963834743, "grad_norm": 1.2396040889074065, "learning_rate": 8.285371835804646e-06, "loss": 0.6625, "step": 9581 }, { "epoch": 0.2936741449062155, "grad_norm": 0.5031242780929933, "learning_rate": 8.28499768244785e-06, "loss": 0.4501, "step": 9582 }, { "epoch": 0.2937047934289567, "grad_norm": 0.4459560263786765, "learning_rate": 8.284623496723132e-06, "loss": 0.4213, "step": 9583 }, { "epoch": 0.29373544195169793, "grad_norm": 1.3244044316298562, "learning_rate": 8.284249278634178e-06, "loss": 0.72, "step": 9584 }, { "epoch": 0.29376609047443913, "grad_norm": 1.043491086370354, "learning_rate": 8.283875028184676e-06, "loss": 0.7195, "step": 9585 }, { "epoch": 0.29379673899718034, "grad_norm": 1.129030874405884, "learning_rate": 8.283500745378312e-06, "loss": 0.5774, "step": 9586 }, { "epoch": 0.29382738751992155, "grad_norm": 1.2978023808889025, "learning_rate": 8.283126430218776e-06, "loss": 0.6496, "step": 9587 }, { "epoch": 0.29385803604266275, "grad_norm": 1.2258001123426765, "learning_rate": 8.282752082709755e-06, "loss": 0.6212, "step": 9588 }, { "epoch": 0.29388868456540396, "grad_norm": 1.1627332847151368, "learning_rate": 8.282377702854937e-06, "loss": 0.6206, "step": 9589 }, { "epoch": 0.29391933308814516, "grad_norm": 1.2913142485930211, "learning_rate": 8.282003290658012e-06, "loss": 0.664, "step": 9590 }, { "epoch": 0.29394998161088637, "grad_norm": 1.3220639627149318, "learning_rate": 8.281628846122668e-06, "loss": 0.7423, "step": 9591 }, { "epoch": 0.2939806301336276, "grad_norm": 1.3104937926454159, "learning_rate": 8.281254369252598e-06, "loss": 0.6746, "step": 9592 }, { "epoch": 0.2940112786563688, "grad_norm": 0.6700110897070718, "learning_rate": 8.280879860051488e-06, "loss": 0.4483, "step": 9593 }, { "epoch": 0.29404192717911, "grad_norm": 1.2452327773586973, "learning_rate": 8.280505318523028e-06, "loss": 0.7399, "step": 9594 }, { "epoch": 0.2940725757018512, "grad_norm": 1.2069920632078623, "learning_rate": 8.28013074467091e-06, "loss": 0.6012, "step": 9595 }, { "epoch": 0.2941032242245924, "grad_norm": 1.3027040506916883, "learning_rate": 8.279756138498826e-06, "loss": 0.7058, "step": 9596 }, { "epoch": 0.2941338727473336, "grad_norm": 0.43730043567149135, "learning_rate": 8.279381500010466e-06, "loss": 0.4254, "step": 9597 }, { "epoch": 0.2941645212700748, "grad_norm": 1.2327432871010227, "learning_rate": 8.279006829209519e-06, "loss": 0.7774, "step": 9598 }, { "epoch": 0.294195169792816, "grad_norm": 0.48035518245403586, "learning_rate": 8.27863212609968e-06, "loss": 0.4714, "step": 9599 }, { "epoch": 0.29422581831555716, "grad_norm": 1.1503425934366358, "learning_rate": 8.278257390684639e-06, "loss": 0.692, "step": 9600 }, { "epoch": 0.29425646683829837, "grad_norm": 1.1986149609328889, "learning_rate": 8.277882622968089e-06, "loss": 0.6279, "step": 9601 }, { "epoch": 0.2942871153610396, "grad_norm": 1.4004827276106566, "learning_rate": 8.277507822953722e-06, "loss": 0.6527, "step": 9602 }, { "epoch": 0.2943177638837808, "grad_norm": 1.185565241885728, "learning_rate": 8.277132990645235e-06, "loss": 0.6609, "step": 9603 }, { "epoch": 0.294348412406522, "grad_norm": 1.3550868470886013, "learning_rate": 8.276758126046316e-06, "loss": 0.822, "step": 9604 }, { "epoch": 0.2943790609292632, "grad_norm": 1.199562210000864, "learning_rate": 8.27638322916066e-06, "loss": 0.6426, "step": 9605 }, { "epoch": 0.2944097094520044, "grad_norm": 1.3536211103766222, "learning_rate": 8.276008299991965e-06, "loss": 0.7498, "step": 9606 }, { "epoch": 0.2944403579747456, "grad_norm": 1.2695668363222052, "learning_rate": 8.275633338543918e-06, "loss": 0.6697, "step": 9607 }, { "epoch": 0.2944710064974868, "grad_norm": 1.3306845504906275, "learning_rate": 8.27525834482022e-06, "loss": 0.6591, "step": 9608 }, { "epoch": 0.294501655020228, "grad_norm": 1.2392086857786473, "learning_rate": 8.274883318824563e-06, "loss": 0.7283, "step": 9609 }, { "epoch": 0.2945323035429692, "grad_norm": 1.1378215683337813, "learning_rate": 8.274508260560644e-06, "loss": 0.6842, "step": 9610 }, { "epoch": 0.2945629520657104, "grad_norm": 1.3887641471097216, "learning_rate": 8.274133170032155e-06, "loss": 0.6535, "step": 9611 }, { "epoch": 0.29459360058845163, "grad_norm": 1.1744530884932451, "learning_rate": 8.273758047242795e-06, "loss": 0.6921, "step": 9612 }, { "epoch": 0.29462424911119284, "grad_norm": 0.5165448662643601, "learning_rate": 8.27338289219626e-06, "loss": 0.4367, "step": 9613 }, { "epoch": 0.29465489763393404, "grad_norm": 1.2053126674695236, "learning_rate": 8.273007704896246e-06, "loss": 0.6685, "step": 9614 }, { "epoch": 0.29468554615667525, "grad_norm": 0.48789223718644803, "learning_rate": 8.272632485346449e-06, "loss": 0.4302, "step": 9615 }, { "epoch": 0.29471619467941645, "grad_norm": 1.2620818962338385, "learning_rate": 8.272257233550566e-06, "loss": 0.7476, "step": 9616 }, { "epoch": 0.29474684320215766, "grad_norm": 1.3744967576548353, "learning_rate": 8.271881949512297e-06, "loss": 0.6866, "step": 9617 }, { "epoch": 0.29477749172489887, "grad_norm": 1.0700887648873794, "learning_rate": 8.271506633235335e-06, "loss": 0.6451, "step": 9618 }, { "epoch": 0.29480814024764007, "grad_norm": 0.45769447657441054, "learning_rate": 8.271131284723384e-06, "loss": 0.4347, "step": 9619 }, { "epoch": 0.2948387887703813, "grad_norm": 1.2751215416282378, "learning_rate": 8.270755903980139e-06, "loss": 0.6471, "step": 9620 }, { "epoch": 0.2948694372931225, "grad_norm": 1.4100450506888553, "learning_rate": 8.270380491009297e-06, "loss": 0.7269, "step": 9621 }, { "epoch": 0.2949000858158637, "grad_norm": 1.1996542916922035, "learning_rate": 8.270005045814563e-06, "loss": 0.6506, "step": 9622 }, { "epoch": 0.2949307343386049, "grad_norm": 1.4459112581320708, "learning_rate": 8.26962956839963e-06, "loss": 0.7323, "step": 9623 }, { "epoch": 0.2949613828613461, "grad_norm": 0.45111012622296465, "learning_rate": 8.269254058768201e-06, "loss": 0.4467, "step": 9624 }, { "epoch": 0.2949920313840873, "grad_norm": 1.3583729532851003, "learning_rate": 8.268878516923975e-06, "loss": 0.7571, "step": 9625 }, { "epoch": 0.2950226799068285, "grad_norm": 0.4403887043093365, "learning_rate": 8.268502942870654e-06, "loss": 0.4369, "step": 9626 }, { "epoch": 0.2950533284295697, "grad_norm": 1.2605314012438826, "learning_rate": 8.268127336611935e-06, "loss": 0.6503, "step": 9627 }, { "epoch": 0.2950839769523109, "grad_norm": 0.4460843384184048, "learning_rate": 8.267751698151523e-06, "loss": 0.4168, "step": 9628 }, { "epoch": 0.29511462547505213, "grad_norm": 1.4068948954489355, "learning_rate": 8.267376027493117e-06, "loss": 0.7186, "step": 9629 }, { "epoch": 0.29514527399779333, "grad_norm": 1.1308134663750846, "learning_rate": 8.267000324640418e-06, "loss": 0.8171, "step": 9630 }, { "epoch": 0.2951759225205345, "grad_norm": 1.1931796306151723, "learning_rate": 8.26662458959713e-06, "loss": 0.6513, "step": 9631 }, { "epoch": 0.2952065710432757, "grad_norm": 0.44779237898538427, "learning_rate": 8.266248822366953e-06, "loss": 0.4259, "step": 9632 }, { "epoch": 0.2952372195660169, "grad_norm": 0.44843984845856966, "learning_rate": 8.265873022953591e-06, "loss": 0.4433, "step": 9633 }, { "epoch": 0.2952678680887581, "grad_norm": 1.3030542687798221, "learning_rate": 8.265497191360747e-06, "loss": 0.6462, "step": 9634 }, { "epoch": 0.2952985166114993, "grad_norm": 1.2829481703364274, "learning_rate": 8.265121327592124e-06, "loss": 0.6818, "step": 9635 }, { "epoch": 0.2953291651342405, "grad_norm": 1.2491106209054608, "learning_rate": 8.264745431651424e-06, "loss": 0.6718, "step": 9636 }, { "epoch": 0.2953598136569817, "grad_norm": 1.414133166989569, "learning_rate": 8.264369503542353e-06, "loss": 0.6678, "step": 9637 }, { "epoch": 0.2953904621797229, "grad_norm": 1.2487672721130323, "learning_rate": 8.263993543268613e-06, "loss": 0.6998, "step": 9638 }, { "epoch": 0.29542111070246413, "grad_norm": 1.1523466991750402, "learning_rate": 8.263617550833911e-06, "loss": 0.6756, "step": 9639 }, { "epoch": 0.29545175922520533, "grad_norm": 1.3606315022332125, "learning_rate": 8.263241526241949e-06, "loss": 0.6941, "step": 9640 }, { "epoch": 0.29548240774794654, "grad_norm": 1.1917908613412824, "learning_rate": 8.262865469496433e-06, "loss": 0.6819, "step": 9641 }, { "epoch": 0.29551305627068775, "grad_norm": 0.5359981676056299, "learning_rate": 8.26248938060107e-06, "loss": 0.4321, "step": 9642 }, { "epoch": 0.29554370479342895, "grad_norm": 1.6330998892429678, "learning_rate": 8.262113259559564e-06, "loss": 0.773, "step": 9643 }, { "epoch": 0.29557435331617016, "grad_norm": 1.314819666059342, "learning_rate": 8.26173710637562e-06, "loss": 0.7039, "step": 9644 }, { "epoch": 0.29560500183891136, "grad_norm": 1.4682369044813985, "learning_rate": 8.261360921052948e-06, "loss": 0.7651, "step": 9645 }, { "epoch": 0.29563565036165257, "grad_norm": 1.3447739182713152, "learning_rate": 8.260984703595252e-06, "loss": 0.7376, "step": 9646 }, { "epoch": 0.2956662988843938, "grad_norm": 1.1597070601092383, "learning_rate": 8.260608454006238e-06, "loss": 0.7356, "step": 9647 }, { "epoch": 0.295696947407135, "grad_norm": 1.1906743919934497, "learning_rate": 8.260232172289615e-06, "loss": 0.6993, "step": 9648 }, { "epoch": 0.2957275959298762, "grad_norm": 1.1181957109135239, "learning_rate": 8.25985585844909e-06, "loss": 0.6785, "step": 9649 }, { "epoch": 0.2957582444526174, "grad_norm": 0.5117780061113391, "learning_rate": 8.259479512488373e-06, "loss": 0.4425, "step": 9650 }, { "epoch": 0.2957888929753586, "grad_norm": 1.2707240961579445, "learning_rate": 8.259103134411168e-06, "loss": 0.6773, "step": 9651 }, { "epoch": 0.2958195414980998, "grad_norm": 1.3578594417581331, "learning_rate": 8.258726724221187e-06, "loss": 0.789, "step": 9652 }, { "epoch": 0.295850190020841, "grad_norm": 1.1932875763769788, "learning_rate": 8.258350281922138e-06, "loss": 0.6765, "step": 9653 }, { "epoch": 0.2958808385435822, "grad_norm": 1.2504580263182314, "learning_rate": 8.25797380751773e-06, "loss": 0.7977, "step": 9654 }, { "epoch": 0.2959114870663234, "grad_norm": 1.328847860852738, "learning_rate": 8.257597301011673e-06, "loss": 0.6433, "step": 9655 }, { "epoch": 0.2959421355890646, "grad_norm": 1.3420953083633647, "learning_rate": 8.257220762407675e-06, "loss": 0.7188, "step": 9656 }, { "epoch": 0.29597278411180583, "grad_norm": 1.3490116873415903, "learning_rate": 8.256844191709447e-06, "loss": 0.6893, "step": 9657 }, { "epoch": 0.29600343263454704, "grad_norm": 1.1958498119304484, "learning_rate": 8.256467588920703e-06, "loss": 0.6817, "step": 9658 }, { "epoch": 0.29603408115728824, "grad_norm": 0.493284682077564, "learning_rate": 8.256090954045146e-06, "loss": 0.437, "step": 9659 }, { "epoch": 0.29606472968002945, "grad_norm": 1.1838130075672768, "learning_rate": 8.255714287086496e-06, "loss": 0.6359, "step": 9660 }, { "epoch": 0.29609537820277065, "grad_norm": 1.3008128501404195, "learning_rate": 8.255337588048458e-06, "loss": 0.76, "step": 9661 }, { "epoch": 0.2961260267255118, "grad_norm": 1.4501989082908617, "learning_rate": 8.254960856934746e-06, "loss": 0.7003, "step": 9662 }, { "epoch": 0.296156675248253, "grad_norm": 1.1964042914690005, "learning_rate": 8.254584093749071e-06, "loss": 0.7862, "step": 9663 }, { "epoch": 0.2961873237709942, "grad_norm": 1.1203285364308644, "learning_rate": 8.254207298495148e-06, "loss": 0.5793, "step": 9664 }, { "epoch": 0.2962179722937354, "grad_norm": 0.4548816265571695, "learning_rate": 8.253830471176687e-06, "loss": 0.4209, "step": 9665 }, { "epoch": 0.2962486208164766, "grad_norm": 1.4031052427211432, "learning_rate": 8.253453611797403e-06, "loss": 0.6763, "step": 9666 }, { "epoch": 0.29627926933921783, "grad_norm": 0.47415288390853244, "learning_rate": 8.253076720361006e-06, "loss": 0.4535, "step": 9667 }, { "epoch": 0.29630991786195904, "grad_norm": 1.3142185763256862, "learning_rate": 8.252699796871213e-06, "loss": 0.7511, "step": 9668 }, { "epoch": 0.29634056638470024, "grad_norm": 0.4609375941677661, "learning_rate": 8.252322841331737e-06, "loss": 0.4395, "step": 9669 }, { "epoch": 0.29637121490744145, "grad_norm": 1.368657318680322, "learning_rate": 8.251945853746293e-06, "loss": 0.7338, "step": 9670 }, { "epoch": 0.29640186343018265, "grad_norm": 1.3761425213780858, "learning_rate": 8.251568834118592e-06, "loss": 0.7401, "step": 9671 }, { "epoch": 0.29643251195292386, "grad_norm": 1.2264908360432587, "learning_rate": 8.251191782452352e-06, "loss": 0.7208, "step": 9672 }, { "epoch": 0.29646316047566507, "grad_norm": 1.4306986954263008, "learning_rate": 8.250814698751289e-06, "loss": 0.7006, "step": 9673 }, { "epoch": 0.29649380899840627, "grad_norm": 1.1740330503628218, "learning_rate": 8.250437583019114e-06, "loss": 0.7148, "step": 9674 }, { "epoch": 0.2965244575211475, "grad_norm": 1.1001680252911519, "learning_rate": 8.250060435259548e-06, "loss": 0.4656, "step": 9675 }, { "epoch": 0.2965551060438887, "grad_norm": 1.2515157654513454, "learning_rate": 8.249683255476304e-06, "loss": 0.7174, "step": 9676 }, { "epoch": 0.2965857545666299, "grad_norm": 0.5395237840246254, "learning_rate": 8.2493060436731e-06, "loss": 0.4634, "step": 9677 }, { "epoch": 0.2966164030893711, "grad_norm": 1.4935946253469912, "learning_rate": 8.248928799853652e-06, "loss": 0.6624, "step": 9678 }, { "epoch": 0.2966470516121123, "grad_norm": 1.2000951902443775, "learning_rate": 8.248551524021678e-06, "loss": 0.6393, "step": 9679 }, { "epoch": 0.2966777001348535, "grad_norm": 1.2186494556550593, "learning_rate": 8.248174216180895e-06, "loss": 0.6356, "step": 9680 }, { "epoch": 0.2967083486575947, "grad_norm": 1.0909546315406133, "learning_rate": 8.247796876335019e-06, "loss": 0.5839, "step": 9681 }, { "epoch": 0.2967389971803359, "grad_norm": 1.0254819492833052, "learning_rate": 8.247419504487769e-06, "loss": 0.5516, "step": 9682 }, { "epoch": 0.2967696457030771, "grad_norm": 0.45192575029055104, "learning_rate": 8.247042100642863e-06, "loss": 0.4282, "step": 9683 }, { "epoch": 0.29680029422581833, "grad_norm": 1.3213472177158072, "learning_rate": 8.246664664804024e-06, "loss": 0.7396, "step": 9684 }, { "epoch": 0.29683094274855953, "grad_norm": 1.2425912052449977, "learning_rate": 8.246287196974964e-06, "loss": 0.6632, "step": 9685 }, { "epoch": 0.29686159127130074, "grad_norm": 1.2778938319786677, "learning_rate": 8.245909697159408e-06, "loss": 0.6503, "step": 9686 }, { "epoch": 0.29689223979404195, "grad_norm": 1.2349247368340908, "learning_rate": 8.245532165361072e-06, "loss": 0.669, "step": 9687 }, { "epoch": 0.29692288831678315, "grad_norm": 4.006402471916324, "learning_rate": 8.245154601583678e-06, "loss": 0.7065, "step": 9688 }, { "epoch": 0.29695353683952436, "grad_norm": 1.2327686784274263, "learning_rate": 8.244777005830944e-06, "loss": 0.6478, "step": 9689 }, { "epoch": 0.29698418536226556, "grad_norm": 1.2596081702779904, "learning_rate": 8.244399378106593e-06, "loss": 0.7438, "step": 9690 }, { "epoch": 0.29701483388500677, "grad_norm": 1.4504176545338257, "learning_rate": 8.244021718414344e-06, "loss": 0.7464, "step": 9691 }, { "epoch": 0.297045482407748, "grad_norm": 1.2416187825560852, "learning_rate": 8.24364402675792e-06, "loss": 0.5489, "step": 9692 }, { "epoch": 0.2970761309304891, "grad_norm": 1.14747813517756, "learning_rate": 8.243266303141042e-06, "loss": 0.623, "step": 9693 }, { "epoch": 0.29710677945323033, "grad_norm": 1.1428557227661527, "learning_rate": 8.24288854756743e-06, "loss": 0.6349, "step": 9694 }, { "epoch": 0.29713742797597154, "grad_norm": 1.2840378158215986, "learning_rate": 8.242510760040807e-06, "loss": 0.6799, "step": 9695 }, { "epoch": 0.29716807649871274, "grad_norm": 1.224081770616697, "learning_rate": 8.242132940564898e-06, "loss": 0.6673, "step": 9696 }, { "epoch": 0.29719872502145395, "grad_norm": 0.49511815452347807, "learning_rate": 8.241755089143421e-06, "loss": 0.4215, "step": 9697 }, { "epoch": 0.29722937354419515, "grad_norm": 1.3227798612455175, "learning_rate": 8.241377205780103e-06, "loss": 0.7567, "step": 9698 }, { "epoch": 0.29726002206693636, "grad_norm": 1.1755190999403504, "learning_rate": 8.240999290478667e-06, "loss": 0.6099, "step": 9699 }, { "epoch": 0.29729067058967756, "grad_norm": 1.246184472505638, "learning_rate": 8.240621343242832e-06, "loss": 0.6725, "step": 9700 }, { "epoch": 0.29732131911241877, "grad_norm": 1.1857539567591608, "learning_rate": 8.240243364076328e-06, "loss": 0.6544, "step": 9701 }, { "epoch": 0.29735196763516, "grad_norm": 1.2671913290586883, "learning_rate": 8.23986535298288e-06, "loss": 0.7177, "step": 9702 }, { "epoch": 0.2973826161579012, "grad_norm": 1.2111812928147003, "learning_rate": 8.239487309966205e-06, "loss": 0.7122, "step": 9703 }, { "epoch": 0.2974132646806424, "grad_norm": 1.2228548785014681, "learning_rate": 8.239109235030037e-06, "loss": 0.6769, "step": 9704 }, { "epoch": 0.2974439132033836, "grad_norm": 1.2120681207452804, "learning_rate": 8.238731128178094e-06, "loss": 0.4982, "step": 9705 }, { "epoch": 0.2974745617261248, "grad_norm": 0.5203876333261463, "learning_rate": 8.238352989414104e-06, "loss": 0.4495, "step": 9706 }, { "epoch": 0.297505210248866, "grad_norm": 1.4616036618669697, "learning_rate": 8.237974818741796e-06, "loss": 0.6677, "step": 9707 }, { "epoch": 0.2975358587716072, "grad_norm": 1.229336532112047, "learning_rate": 8.237596616164893e-06, "loss": 0.5439, "step": 9708 }, { "epoch": 0.2975665072943484, "grad_norm": 1.3164503767083313, "learning_rate": 8.23721838168712e-06, "loss": 0.7839, "step": 9709 }, { "epoch": 0.2975971558170896, "grad_norm": 1.6500784787857372, "learning_rate": 8.236840115312207e-06, "loss": 0.6513, "step": 9710 }, { "epoch": 0.2976278043398308, "grad_norm": 1.5897247740600047, "learning_rate": 8.236461817043881e-06, "loss": 0.6733, "step": 9711 }, { "epoch": 0.29765845286257203, "grad_norm": 1.2075275555207707, "learning_rate": 8.236083486885869e-06, "loss": 0.6457, "step": 9712 }, { "epoch": 0.29768910138531324, "grad_norm": 1.2443102699403537, "learning_rate": 8.235705124841898e-06, "loss": 0.7201, "step": 9713 }, { "epoch": 0.29771974990805444, "grad_norm": 0.4418032217758344, "learning_rate": 8.235326730915696e-06, "loss": 0.4293, "step": 9714 }, { "epoch": 0.29775039843079565, "grad_norm": 1.3047813278549587, "learning_rate": 8.234948305110993e-06, "loss": 0.7894, "step": 9715 }, { "epoch": 0.29778104695353685, "grad_norm": 0.43489549723228416, "learning_rate": 8.234569847431514e-06, "loss": 0.4638, "step": 9716 }, { "epoch": 0.29781169547627806, "grad_norm": 1.2710404562327662, "learning_rate": 8.234191357880994e-06, "loss": 0.7281, "step": 9717 }, { "epoch": 0.29784234399901927, "grad_norm": 1.25794847447744, "learning_rate": 8.233812836463157e-06, "loss": 0.6457, "step": 9718 }, { "epoch": 0.29787299252176047, "grad_norm": 1.3677673892839477, "learning_rate": 8.233434283181737e-06, "loss": 0.6936, "step": 9719 }, { "epoch": 0.2979036410445017, "grad_norm": 1.29548592941712, "learning_rate": 8.23305569804046e-06, "loss": 0.7345, "step": 9720 }, { "epoch": 0.2979342895672429, "grad_norm": 1.2903727930559892, "learning_rate": 8.232677081043057e-06, "loss": 0.7344, "step": 9721 }, { "epoch": 0.2979649380899841, "grad_norm": 1.2047798199300612, "learning_rate": 8.23229843219326e-06, "loss": 0.6673, "step": 9722 }, { "epoch": 0.2979955866127253, "grad_norm": 1.3418439322215892, "learning_rate": 8.231919751494802e-06, "loss": 0.6911, "step": 9723 }, { "epoch": 0.29802623513546644, "grad_norm": 1.250592976261402, "learning_rate": 8.23154103895141e-06, "loss": 0.6935, "step": 9724 }, { "epoch": 0.29805688365820765, "grad_norm": 1.3148799488562877, "learning_rate": 8.231162294566817e-06, "loss": 0.7054, "step": 9725 }, { "epoch": 0.29808753218094886, "grad_norm": 1.6924805505396987, "learning_rate": 8.230783518344754e-06, "loss": 0.7328, "step": 9726 }, { "epoch": 0.29811818070369006, "grad_norm": 0.46392882558480325, "learning_rate": 8.230404710288955e-06, "loss": 0.4242, "step": 9727 }, { "epoch": 0.29814882922643127, "grad_norm": 1.2438370943446575, "learning_rate": 8.230025870403153e-06, "loss": 0.6581, "step": 9728 }, { "epoch": 0.2981794777491725, "grad_norm": 1.3138501199468775, "learning_rate": 8.22964699869108e-06, "loss": 0.6543, "step": 9729 }, { "epoch": 0.2982101262719137, "grad_norm": 1.5872688953568896, "learning_rate": 8.229268095156469e-06, "loss": 0.7624, "step": 9730 }, { "epoch": 0.2982407747946549, "grad_norm": 1.3650405324446486, "learning_rate": 8.22888915980305e-06, "loss": 0.7523, "step": 9731 }, { "epoch": 0.2982714233173961, "grad_norm": 1.3785377103150722, "learning_rate": 8.228510192634564e-06, "loss": 0.7237, "step": 9732 }, { "epoch": 0.2983020718401373, "grad_norm": 1.3552476165435037, "learning_rate": 8.228131193654739e-06, "loss": 0.6817, "step": 9733 }, { "epoch": 0.2983327203628785, "grad_norm": 1.225778626085964, "learning_rate": 8.227752162867312e-06, "loss": 0.7233, "step": 9734 }, { "epoch": 0.2983633688856197, "grad_norm": 1.182783370925635, "learning_rate": 8.227373100276017e-06, "loss": 0.7024, "step": 9735 }, { "epoch": 0.2983940174083609, "grad_norm": 1.2867206687387687, "learning_rate": 8.226994005884588e-06, "loss": 0.7119, "step": 9736 }, { "epoch": 0.2984246659311021, "grad_norm": 0.45530981598097636, "learning_rate": 8.226614879696762e-06, "loss": 0.4337, "step": 9737 }, { "epoch": 0.2984553144538433, "grad_norm": 1.3221188024016979, "learning_rate": 8.226235721716274e-06, "loss": 0.6207, "step": 9738 }, { "epoch": 0.29848596297658453, "grad_norm": 0.46261933531820293, "learning_rate": 8.22585653194686e-06, "loss": 0.4489, "step": 9739 }, { "epoch": 0.29851661149932573, "grad_norm": 1.1747624755285573, "learning_rate": 8.225477310392259e-06, "loss": 0.6209, "step": 9740 }, { "epoch": 0.29854726002206694, "grad_norm": 1.1575275635175049, "learning_rate": 8.2250980570562e-06, "loss": 0.7497, "step": 9741 }, { "epoch": 0.29857790854480815, "grad_norm": 1.331097724212828, "learning_rate": 8.224718771942428e-06, "loss": 0.6759, "step": 9742 }, { "epoch": 0.29860855706754935, "grad_norm": 1.530166488674781, "learning_rate": 8.224339455054675e-06, "loss": 0.6874, "step": 9743 }, { "epoch": 0.29863920559029056, "grad_norm": 1.271155341471987, "learning_rate": 8.223960106396681e-06, "loss": 0.7193, "step": 9744 }, { "epoch": 0.29866985411303176, "grad_norm": 1.2438172192414896, "learning_rate": 8.223580725972184e-06, "loss": 0.6915, "step": 9745 }, { "epoch": 0.29870050263577297, "grad_norm": 1.1881795329703244, "learning_rate": 8.223201313784921e-06, "loss": 0.6216, "step": 9746 }, { "epoch": 0.2987311511585142, "grad_norm": 1.1435922400502727, "learning_rate": 8.22282186983863e-06, "loss": 0.6377, "step": 9747 }, { "epoch": 0.2987617996812554, "grad_norm": 1.252108446417388, "learning_rate": 8.22244239413705e-06, "loss": 0.6985, "step": 9748 }, { "epoch": 0.2987924482039966, "grad_norm": 1.246705697690584, "learning_rate": 8.222062886683923e-06, "loss": 0.6584, "step": 9749 }, { "epoch": 0.2988230967267378, "grad_norm": 1.342420404499056, "learning_rate": 8.221683347482984e-06, "loss": 0.6742, "step": 9750 }, { "epoch": 0.298853745249479, "grad_norm": 1.152916232701824, "learning_rate": 8.221303776537975e-06, "loss": 0.5851, "step": 9751 }, { "epoch": 0.2988843937722202, "grad_norm": 1.1488749796995104, "learning_rate": 8.220924173852635e-06, "loss": 0.5553, "step": 9752 }, { "epoch": 0.2989150422949614, "grad_norm": 1.619411014056638, "learning_rate": 8.220544539430707e-06, "loss": 0.6633, "step": 9753 }, { "epoch": 0.2989456908177026, "grad_norm": 1.5697616576901159, "learning_rate": 8.220164873275928e-06, "loss": 0.728, "step": 9754 }, { "epoch": 0.29897633934044376, "grad_norm": 1.2689879462393543, "learning_rate": 8.21978517539204e-06, "loss": 0.6742, "step": 9755 }, { "epoch": 0.29900698786318497, "grad_norm": 1.2994813805912695, "learning_rate": 8.219405445782786e-06, "loss": 0.7416, "step": 9756 }, { "epoch": 0.2990376363859262, "grad_norm": 1.417378502164575, "learning_rate": 8.219025684451907e-06, "loss": 0.7424, "step": 9757 }, { "epoch": 0.2990682849086674, "grad_norm": 1.38802821823646, "learning_rate": 8.218645891403145e-06, "loss": 0.7679, "step": 9758 }, { "epoch": 0.2990989334314086, "grad_norm": 1.3459575560748562, "learning_rate": 8.218266066640238e-06, "loss": 0.7452, "step": 9759 }, { "epoch": 0.2991295819541498, "grad_norm": 1.1218483964077755, "learning_rate": 8.217886210166936e-06, "loss": 0.6482, "step": 9760 }, { "epoch": 0.299160230476891, "grad_norm": 1.3637577800924858, "learning_rate": 8.217506321986976e-06, "loss": 0.6841, "step": 9761 }, { "epoch": 0.2991908789996322, "grad_norm": 1.2607884557070215, "learning_rate": 8.217126402104103e-06, "loss": 0.6384, "step": 9762 }, { "epoch": 0.2992215275223734, "grad_norm": 0.5299586785385665, "learning_rate": 8.216746450522059e-06, "loss": 0.4259, "step": 9763 }, { "epoch": 0.2992521760451146, "grad_norm": 1.4428778441750725, "learning_rate": 8.216366467244592e-06, "loss": 0.6659, "step": 9764 }, { "epoch": 0.2992828245678558, "grad_norm": 1.191829094743175, "learning_rate": 8.215986452275442e-06, "loss": 0.6419, "step": 9765 }, { "epoch": 0.299313473090597, "grad_norm": 1.2469562033421824, "learning_rate": 8.215606405618355e-06, "loss": 0.7546, "step": 9766 }, { "epoch": 0.29934412161333823, "grad_norm": 1.2384633428022505, "learning_rate": 8.215226327277073e-06, "loss": 0.6933, "step": 9767 }, { "epoch": 0.29937477013607944, "grad_norm": 1.2367695797838527, "learning_rate": 8.214846217255346e-06, "loss": 0.6912, "step": 9768 }, { "epoch": 0.29940541865882064, "grad_norm": 1.40367544854945, "learning_rate": 8.214466075556915e-06, "loss": 0.6587, "step": 9769 }, { "epoch": 0.29943606718156185, "grad_norm": 1.3161278737570963, "learning_rate": 8.21408590218553e-06, "loss": 0.6761, "step": 9770 }, { "epoch": 0.29946671570430305, "grad_norm": 1.1214751243164045, "learning_rate": 8.213705697144932e-06, "loss": 0.6223, "step": 9771 }, { "epoch": 0.29949736422704426, "grad_norm": 0.5117515954960983, "learning_rate": 8.213325460438868e-06, "loss": 0.4486, "step": 9772 }, { "epoch": 0.29952801274978547, "grad_norm": 1.338617342468079, "learning_rate": 8.212945192071089e-06, "loss": 0.6334, "step": 9773 }, { "epoch": 0.29955866127252667, "grad_norm": 1.3190650112791136, "learning_rate": 8.212564892045338e-06, "loss": 0.7254, "step": 9774 }, { "epoch": 0.2995893097952679, "grad_norm": 1.2088064369251603, "learning_rate": 8.212184560365363e-06, "loss": 0.7265, "step": 9775 }, { "epoch": 0.2996199583180091, "grad_norm": 1.2961493088614704, "learning_rate": 8.211804197034913e-06, "loss": 0.6616, "step": 9776 }, { "epoch": 0.2996506068407503, "grad_norm": 1.3514438855930537, "learning_rate": 8.211423802057733e-06, "loss": 0.7231, "step": 9777 }, { "epoch": 0.2996812553634915, "grad_norm": 1.1676037241043153, "learning_rate": 8.211043375437573e-06, "loss": 0.6912, "step": 9778 }, { "epoch": 0.2997119038862327, "grad_norm": 1.1647890345068355, "learning_rate": 8.21066291717818e-06, "loss": 0.7495, "step": 9779 }, { "epoch": 0.2997425524089739, "grad_norm": 1.1851650023119542, "learning_rate": 8.210282427283304e-06, "loss": 0.6791, "step": 9780 }, { "epoch": 0.2997732009317151, "grad_norm": 0.4816461071802743, "learning_rate": 8.209901905756695e-06, "loss": 0.4614, "step": 9781 }, { "epoch": 0.2998038494544563, "grad_norm": 1.1541911779608205, "learning_rate": 8.209521352602102e-06, "loss": 0.6384, "step": 9782 }, { "epoch": 0.2998344979771975, "grad_norm": 1.1182777609646541, "learning_rate": 8.209140767823271e-06, "loss": 0.7268, "step": 9783 }, { "epoch": 0.29986514649993873, "grad_norm": 1.2289757279217741, "learning_rate": 8.208760151423959e-06, "loss": 0.6902, "step": 9784 }, { "epoch": 0.29989579502267993, "grad_norm": 0.4696175656667898, "learning_rate": 8.208379503407908e-06, "loss": 0.4581, "step": 9785 }, { "epoch": 0.2999264435454211, "grad_norm": 1.3874742980253485, "learning_rate": 8.207998823778874e-06, "loss": 0.6993, "step": 9786 }, { "epoch": 0.2999570920681623, "grad_norm": 1.395712467758243, "learning_rate": 8.207618112540607e-06, "loss": 0.713, "step": 9787 }, { "epoch": 0.2999877405909035, "grad_norm": 1.1959332135318297, "learning_rate": 8.20723736969686e-06, "loss": 0.7068, "step": 9788 }, { "epoch": 0.3000183891136447, "grad_norm": 1.2983290224835717, "learning_rate": 8.20685659525138e-06, "loss": 0.7776, "step": 9789 }, { "epoch": 0.3000490376363859, "grad_norm": 1.118959955978787, "learning_rate": 8.206475789207924e-06, "loss": 0.6164, "step": 9790 }, { "epoch": 0.3000796861591271, "grad_norm": 1.2915952353231077, "learning_rate": 8.20609495157024e-06, "loss": 0.6978, "step": 9791 }, { "epoch": 0.3001103346818683, "grad_norm": 2.560749786480205, "learning_rate": 8.205714082342082e-06, "loss": 0.6796, "step": 9792 }, { "epoch": 0.3001409832046095, "grad_norm": 0.49797275224807913, "learning_rate": 8.205333181527203e-06, "loss": 0.4437, "step": 9793 }, { "epoch": 0.30017163172735073, "grad_norm": 1.3509784947608041, "learning_rate": 8.204952249129356e-06, "loss": 0.676, "step": 9794 }, { "epoch": 0.30020228025009194, "grad_norm": 1.3315761653104414, "learning_rate": 8.204571285152293e-06, "loss": 0.6854, "step": 9795 }, { "epoch": 0.30023292877283314, "grad_norm": 1.1495641012931814, "learning_rate": 8.204190289599773e-06, "loss": 0.6651, "step": 9796 }, { "epoch": 0.30026357729557435, "grad_norm": 1.3466261723549804, "learning_rate": 8.203809262475545e-06, "loss": 0.5913, "step": 9797 }, { "epoch": 0.30029422581831555, "grad_norm": 1.270413392633421, "learning_rate": 8.203428203783362e-06, "loss": 0.5981, "step": 9798 }, { "epoch": 0.30032487434105676, "grad_norm": 1.1385482561463067, "learning_rate": 8.203047113526983e-06, "loss": 0.7104, "step": 9799 }, { "epoch": 0.30035552286379796, "grad_norm": 1.1436171959127306, "learning_rate": 8.202665991710162e-06, "loss": 0.6324, "step": 9800 }, { "epoch": 0.30038617138653917, "grad_norm": 1.1783582599202216, "learning_rate": 8.202284838336654e-06, "loss": 0.6405, "step": 9801 }, { "epoch": 0.3004168199092804, "grad_norm": 1.2785624726989986, "learning_rate": 8.201903653410213e-06, "loss": 0.6806, "step": 9802 }, { "epoch": 0.3004474684320216, "grad_norm": 1.1853930461081075, "learning_rate": 8.201522436934596e-06, "loss": 0.6557, "step": 9803 }, { "epoch": 0.3004781169547628, "grad_norm": 0.4990066433798806, "learning_rate": 8.201141188913559e-06, "loss": 0.4436, "step": 9804 }, { "epoch": 0.300508765477504, "grad_norm": 1.3099256130362271, "learning_rate": 8.20075990935086e-06, "loss": 0.6738, "step": 9805 }, { "epoch": 0.3005394140002452, "grad_norm": 1.1970323027277319, "learning_rate": 8.200378598250253e-06, "loss": 0.6753, "step": 9806 }, { "epoch": 0.3005700625229864, "grad_norm": 1.1853061103167204, "learning_rate": 8.199997255615497e-06, "loss": 0.6888, "step": 9807 }, { "epoch": 0.3006007110457276, "grad_norm": 1.26444057863915, "learning_rate": 8.19961588145035e-06, "loss": 0.7097, "step": 9808 }, { "epoch": 0.3006313595684688, "grad_norm": 1.280597370839224, "learning_rate": 8.19923447575857e-06, "loss": 0.6357, "step": 9809 }, { "epoch": 0.30066200809121, "grad_norm": 1.203677927983821, "learning_rate": 8.198853038543913e-06, "loss": 0.7024, "step": 9810 }, { "epoch": 0.3006926566139512, "grad_norm": 1.3060505603869952, "learning_rate": 8.198471569810138e-06, "loss": 0.7793, "step": 9811 }, { "epoch": 0.30072330513669243, "grad_norm": 1.2367888922935777, "learning_rate": 8.198090069561005e-06, "loss": 0.6745, "step": 9812 }, { "epoch": 0.30075395365943364, "grad_norm": 1.1095330062591469, "learning_rate": 8.197708537800271e-06, "loss": 0.6826, "step": 9813 }, { "epoch": 0.30078460218217484, "grad_norm": 1.2018938883327293, "learning_rate": 8.197326974531699e-06, "loss": 0.5976, "step": 9814 }, { "epoch": 0.30081525070491605, "grad_norm": 1.2670801853830111, "learning_rate": 8.196945379759045e-06, "loss": 0.7165, "step": 9815 }, { "epoch": 0.30084589922765725, "grad_norm": 1.333600110517507, "learning_rate": 8.19656375348607e-06, "loss": 0.6736, "step": 9816 }, { "epoch": 0.3008765477503984, "grad_norm": 0.48665329600215124, "learning_rate": 8.196182095716534e-06, "loss": 0.4383, "step": 9817 }, { "epoch": 0.3009071962731396, "grad_norm": 0.5045325709756613, "learning_rate": 8.1958004064542e-06, "loss": 0.4242, "step": 9818 }, { "epoch": 0.3009378447958808, "grad_norm": 1.2172587278008877, "learning_rate": 8.195418685702826e-06, "loss": 0.7029, "step": 9819 }, { "epoch": 0.300968493318622, "grad_norm": 1.2998463124552335, "learning_rate": 8.195036933466173e-06, "loss": 0.6337, "step": 9820 }, { "epoch": 0.3009991418413632, "grad_norm": 1.2886184775744143, "learning_rate": 8.194655149748005e-06, "loss": 0.6335, "step": 9821 }, { "epoch": 0.30102979036410443, "grad_norm": 1.3723369111918544, "learning_rate": 8.194273334552081e-06, "loss": 0.6707, "step": 9822 }, { "epoch": 0.30106043888684564, "grad_norm": 1.1781187461496996, "learning_rate": 8.193891487882167e-06, "loss": 0.6676, "step": 9823 }, { "epoch": 0.30109108740958684, "grad_norm": 1.184413892003798, "learning_rate": 8.19350960974202e-06, "loss": 0.6611, "step": 9824 }, { "epoch": 0.30112173593232805, "grad_norm": 1.2159868714124504, "learning_rate": 8.193127700135408e-06, "loss": 0.756, "step": 9825 }, { "epoch": 0.30115238445506926, "grad_norm": 1.3548609378266727, "learning_rate": 8.192745759066089e-06, "loss": 0.5912, "step": 9826 }, { "epoch": 0.30118303297781046, "grad_norm": 1.2225299668777823, "learning_rate": 8.192363786537834e-06, "loss": 0.7438, "step": 9827 }, { "epoch": 0.30121368150055167, "grad_norm": 0.6108496454458413, "learning_rate": 8.191981782554397e-06, "loss": 0.4499, "step": 9828 }, { "epoch": 0.30124433002329287, "grad_norm": 1.2452411906859082, "learning_rate": 8.19159974711955e-06, "loss": 0.7665, "step": 9829 }, { "epoch": 0.3012749785460341, "grad_norm": 1.3972389940854264, "learning_rate": 8.191217680237053e-06, "loss": 0.6746, "step": 9830 }, { "epoch": 0.3013056270687753, "grad_norm": 0.49607890864849963, "learning_rate": 8.19083558191067e-06, "loss": 0.4389, "step": 9831 }, { "epoch": 0.3013362755915165, "grad_norm": 1.1631633786053295, "learning_rate": 8.19045345214417e-06, "loss": 0.6573, "step": 9832 }, { "epoch": 0.3013669241142577, "grad_norm": 1.3914160615274915, "learning_rate": 8.190071290941313e-06, "loss": 0.6646, "step": 9833 }, { "epoch": 0.3013975726369989, "grad_norm": 1.3965540631585234, "learning_rate": 8.18968909830587e-06, "loss": 0.7246, "step": 9834 }, { "epoch": 0.3014282211597401, "grad_norm": 1.2693872288754378, "learning_rate": 8.189306874241603e-06, "loss": 0.6991, "step": 9835 }, { "epoch": 0.3014588696824813, "grad_norm": 1.1146366711813722, "learning_rate": 8.18892461875228e-06, "loss": 0.6453, "step": 9836 }, { "epoch": 0.3014895182052225, "grad_norm": 1.1996388621277807, "learning_rate": 8.188542331841667e-06, "loss": 0.7382, "step": 9837 }, { "epoch": 0.3015201667279637, "grad_norm": 1.2449619866269666, "learning_rate": 8.188160013513531e-06, "loss": 0.7588, "step": 9838 }, { "epoch": 0.30155081525070493, "grad_norm": 0.6303057623329422, "learning_rate": 8.187777663771637e-06, "loss": 0.4412, "step": 9839 }, { "epoch": 0.30158146377344613, "grad_norm": 1.5083528670793198, "learning_rate": 8.187395282619755e-06, "loss": 0.6899, "step": 9840 }, { "epoch": 0.30161211229618734, "grad_norm": 1.4876850449497268, "learning_rate": 8.18701287006165e-06, "loss": 0.774, "step": 9841 }, { "epoch": 0.30164276081892855, "grad_norm": 0.4899896983172769, "learning_rate": 8.186630426101094e-06, "loss": 0.4416, "step": 9842 }, { "epoch": 0.30167340934166975, "grad_norm": 1.2318210194279968, "learning_rate": 8.186247950741852e-06, "loss": 0.7536, "step": 9843 }, { "epoch": 0.30170405786441096, "grad_norm": 1.136282097776719, "learning_rate": 8.185865443987695e-06, "loss": 0.5276, "step": 9844 }, { "epoch": 0.30173470638715216, "grad_norm": 1.5558181245673384, "learning_rate": 8.18548290584239e-06, "loss": 0.6303, "step": 9845 }, { "epoch": 0.30176535490989337, "grad_norm": 1.4387365143410897, "learning_rate": 8.185100336309706e-06, "loss": 0.6648, "step": 9846 }, { "epoch": 0.3017960034326346, "grad_norm": 1.3279414184008347, "learning_rate": 8.184717735393415e-06, "loss": 0.7071, "step": 9847 }, { "epoch": 0.3018266519553757, "grad_norm": 1.467996262713598, "learning_rate": 8.184335103097284e-06, "loss": 0.7122, "step": 9848 }, { "epoch": 0.30185730047811693, "grad_norm": 1.4029485944318796, "learning_rate": 8.183952439425084e-06, "loss": 0.7022, "step": 9849 }, { "epoch": 0.30188794900085814, "grad_norm": 1.3978352795732583, "learning_rate": 8.183569744380587e-06, "loss": 0.702, "step": 9850 }, { "epoch": 0.30191859752359934, "grad_norm": 1.4509277760503452, "learning_rate": 8.183187017967562e-06, "loss": 0.6692, "step": 9851 }, { "epoch": 0.30194924604634055, "grad_norm": 1.292778161573153, "learning_rate": 8.182804260189783e-06, "loss": 0.6298, "step": 9852 }, { "epoch": 0.30197989456908175, "grad_norm": 1.498446617953784, "learning_rate": 8.182421471051018e-06, "loss": 0.6514, "step": 9853 }, { "epoch": 0.30201054309182296, "grad_norm": 1.3638138136711317, "learning_rate": 8.18203865055504e-06, "loss": 0.6727, "step": 9854 }, { "epoch": 0.30204119161456416, "grad_norm": 1.1695075844467044, "learning_rate": 8.181655798705618e-06, "loss": 0.6043, "step": 9855 }, { "epoch": 0.30207184013730537, "grad_norm": 1.2605963037526275, "learning_rate": 8.18127291550653e-06, "loss": 0.6927, "step": 9856 }, { "epoch": 0.3021024886600466, "grad_norm": 1.0566094662601433, "learning_rate": 8.180890000961548e-06, "loss": 0.5968, "step": 9857 }, { "epoch": 0.3021331371827878, "grad_norm": 1.2517336164190747, "learning_rate": 8.18050705507444e-06, "loss": 0.6515, "step": 9858 }, { "epoch": 0.302163785705529, "grad_norm": 1.3521477883042834, "learning_rate": 8.180124077848983e-06, "loss": 0.7202, "step": 9859 }, { "epoch": 0.3021944342282702, "grad_norm": 1.3156101821290505, "learning_rate": 8.179741069288951e-06, "loss": 0.5621, "step": 9860 }, { "epoch": 0.3022250827510114, "grad_norm": 1.0807116229153615, "learning_rate": 8.179358029398117e-06, "loss": 0.7112, "step": 9861 }, { "epoch": 0.3022557312737526, "grad_norm": 1.3864588141004874, "learning_rate": 8.178974958180253e-06, "loss": 0.7049, "step": 9862 }, { "epoch": 0.3022863797964938, "grad_norm": 1.4209415413133935, "learning_rate": 8.178591855639136e-06, "loss": 0.7272, "step": 9863 }, { "epoch": 0.302317028319235, "grad_norm": 1.304058468860858, "learning_rate": 8.17820872177854e-06, "loss": 0.7175, "step": 9864 }, { "epoch": 0.3023476768419762, "grad_norm": 1.2494619739153943, "learning_rate": 8.17782555660224e-06, "loss": 0.7103, "step": 9865 }, { "epoch": 0.3023783253647174, "grad_norm": 1.181825994549511, "learning_rate": 8.177442360114012e-06, "loss": 0.699, "step": 9866 }, { "epoch": 0.30240897388745863, "grad_norm": 0.8246450930199596, "learning_rate": 8.17705913231763e-06, "loss": 0.4279, "step": 9867 }, { "epoch": 0.30243962241019984, "grad_norm": 1.2467823586690006, "learning_rate": 8.176675873216874e-06, "loss": 0.6337, "step": 9868 }, { "epoch": 0.30247027093294104, "grad_norm": 1.149914209319475, "learning_rate": 8.176292582815517e-06, "loss": 0.5931, "step": 9869 }, { "epoch": 0.30250091945568225, "grad_norm": 1.1797942527200398, "learning_rate": 8.175909261117336e-06, "loss": 0.6301, "step": 9870 }, { "epoch": 0.30253156797842345, "grad_norm": 0.4892415620124969, "learning_rate": 8.17552590812611e-06, "loss": 0.4315, "step": 9871 }, { "epoch": 0.30256221650116466, "grad_norm": 1.4814962183187443, "learning_rate": 8.175142523845613e-06, "loss": 0.6656, "step": 9872 }, { "epoch": 0.30259286502390587, "grad_norm": 1.4904465531072009, "learning_rate": 8.174759108279625e-06, "loss": 0.6804, "step": 9873 }, { "epoch": 0.30262351354664707, "grad_norm": 1.4479782996001798, "learning_rate": 8.174375661431924e-06, "loss": 0.8331, "step": 9874 }, { "epoch": 0.3026541620693883, "grad_norm": 1.1290115728599357, "learning_rate": 8.173992183306285e-06, "loss": 0.5948, "step": 9875 }, { "epoch": 0.3026848105921295, "grad_norm": 1.2050519254628973, "learning_rate": 8.17360867390649e-06, "loss": 0.7149, "step": 9876 }, { "epoch": 0.3027154591148707, "grad_norm": 1.1526667520103326, "learning_rate": 8.173225133236317e-06, "loss": 0.6779, "step": 9877 }, { "epoch": 0.3027461076376119, "grad_norm": 1.3043944505948553, "learning_rate": 8.172841561299547e-06, "loss": 0.7546, "step": 9878 }, { "epoch": 0.30277675616035304, "grad_norm": 1.1230281743952801, "learning_rate": 8.172457958099954e-06, "loss": 0.6963, "step": 9879 }, { "epoch": 0.30280740468309425, "grad_norm": 1.0976997496426508, "learning_rate": 8.172074323641323e-06, "loss": 0.621, "step": 9880 }, { "epoch": 0.30283805320583546, "grad_norm": 1.3002226132565715, "learning_rate": 8.171690657927432e-06, "loss": 0.7116, "step": 9881 }, { "epoch": 0.30286870172857666, "grad_norm": 1.2586368047982825, "learning_rate": 8.17130696096206e-06, "loss": 0.7856, "step": 9882 }, { "epoch": 0.30289935025131787, "grad_norm": 1.2192539453293034, "learning_rate": 8.17092323274899e-06, "loss": 0.6306, "step": 9883 }, { "epoch": 0.3029299987740591, "grad_norm": 1.3331908766333265, "learning_rate": 8.170539473292001e-06, "loss": 0.7982, "step": 9884 }, { "epoch": 0.3029606472968003, "grad_norm": 1.4694821728475076, "learning_rate": 8.170155682594877e-06, "loss": 0.7388, "step": 9885 }, { "epoch": 0.3029912958195415, "grad_norm": 0.8714824420183832, "learning_rate": 8.169771860661397e-06, "loss": 0.4547, "step": 9886 }, { "epoch": 0.3030219443422827, "grad_norm": 0.6919987208438505, "learning_rate": 8.169388007495344e-06, "loss": 0.4287, "step": 9887 }, { "epoch": 0.3030525928650239, "grad_norm": 1.443874753948642, "learning_rate": 8.169004123100501e-06, "loss": 0.634, "step": 9888 }, { "epoch": 0.3030832413877651, "grad_norm": 1.2807121447281695, "learning_rate": 8.168620207480649e-06, "loss": 0.6851, "step": 9889 }, { "epoch": 0.3031138899105063, "grad_norm": 1.387917478904094, "learning_rate": 8.16823626063957e-06, "loss": 0.6984, "step": 9890 }, { "epoch": 0.3031445384332475, "grad_norm": 1.174264039952779, "learning_rate": 8.16785228258105e-06, "loss": 0.5665, "step": 9891 }, { "epoch": 0.3031751869559887, "grad_norm": 1.305577483100902, "learning_rate": 8.16746827330887e-06, "loss": 0.6781, "step": 9892 }, { "epoch": 0.3032058354787299, "grad_norm": 1.4804592674737118, "learning_rate": 8.167084232826816e-06, "loss": 0.6095, "step": 9893 }, { "epoch": 0.30323648400147113, "grad_norm": 1.2748975369541555, "learning_rate": 8.16670016113867e-06, "loss": 0.7404, "step": 9894 }, { "epoch": 0.30326713252421234, "grad_norm": 1.0771931815689109, "learning_rate": 8.166316058248217e-06, "loss": 0.4536, "step": 9895 }, { "epoch": 0.30329778104695354, "grad_norm": 1.308800455782719, "learning_rate": 8.165931924159242e-06, "loss": 0.6839, "step": 9896 }, { "epoch": 0.30332842956969475, "grad_norm": 1.2126181621852903, "learning_rate": 8.165547758875529e-06, "loss": 0.6567, "step": 9897 }, { "epoch": 0.30335907809243595, "grad_norm": 1.3793210290090967, "learning_rate": 8.165163562400864e-06, "loss": 0.6441, "step": 9898 }, { "epoch": 0.30338972661517716, "grad_norm": 1.2074164234817248, "learning_rate": 8.164779334739033e-06, "loss": 0.6171, "step": 9899 }, { "epoch": 0.30342037513791836, "grad_norm": 0.5154877389645585, "learning_rate": 8.164395075893822e-06, "loss": 0.4458, "step": 9900 }, { "epoch": 0.30345102366065957, "grad_norm": 1.3613551474504466, "learning_rate": 8.164010785869016e-06, "loss": 0.6975, "step": 9901 }, { "epoch": 0.3034816721834008, "grad_norm": 1.26178307337842, "learning_rate": 8.1636264646684e-06, "loss": 0.6975, "step": 9902 }, { "epoch": 0.303512320706142, "grad_norm": 1.218456021316831, "learning_rate": 8.163242112295767e-06, "loss": 0.6752, "step": 9903 }, { "epoch": 0.3035429692288832, "grad_norm": 0.567434017624891, "learning_rate": 8.162857728754898e-06, "loss": 0.4501, "step": 9904 }, { "epoch": 0.3035736177516244, "grad_norm": 1.2101951239173891, "learning_rate": 8.162473314049584e-06, "loss": 0.6449, "step": 9905 }, { "epoch": 0.3036042662743656, "grad_norm": 1.294369460765717, "learning_rate": 8.16208886818361e-06, "loss": 0.664, "step": 9906 }, { "epoch": 0.3036349147971068, "grad_norm": 0.5259394979652242, "learning_rate": 8.161704391160765e-06, "loss": 0.4446, "step": 9907 }, { "epoch": 0.303665563319848, "grad_norm": 1.2681412847709304, "learning_rate": 8.16131988298484e-06, "loss": 0.6823, "step": 9908 }, { "epoch": 0.3036962118425892, "grad_norm": 1.2768705174206425, "learning_rate": 8.160935343659618e-06, "loss": 0.6958, "step": 9909 }, { "epoch": 0.30372686036533036, "grad_norm": 1.2719024340950584, "learning_rate": 8.160550773188894e-06, "loss": 0.6547, "step": 9910 }, { "epoch": 0.30375750888807157, "grad_norm": 1.3740826823547831, "learning_rate": 8.160166171576453e-06, "loss": 0.6569, "step": 9911 }, { "epoch": 0.3037881574108128, "grad_norm": 1.2448212728020192, "learning_rate": 8.159781538826087e-06, "loss": 0.6767, "step": 9912 }, { "epoch": 0.303818805933554, "grad_norm": 1.3931758308605686, "learning_rate": 8.159396874941584e-06, "loss": 0.6759, "step": 9913 }, { "epoch": 0.3038494544562952, "grad_norm": 1.3298581359568526, "learning_rate": 8.159012179926736e-06, "loss": 0.6308, "step": 9914 }, { "epoch": 0.3038801029790364, "grad_norm": 1.17301921206971, "learning_rate": 8.158627453785332e-06, "loss": 0.6547, "step": 9915 }, { "epoch": 0.3039107515017776, "grad_norm": 1.3103218268709287, "learning_rate": 8.158242696521165e-06, "loss": 0.7415, "step": 9916 }, { "epoch": 0.3039414000245188, "grad_norm": 1.2903191534425902, "learning_rate": 8.157857908138022e-06, "loss": 0.7055, "step": 9917 }, { "epoch": 0.30397204854726, "grad_norm": 1.3046963795371997, "learning_rate": 8.1574730886397e-06, "loss": 0.7291, "step": 9918 }, { "epoch": 0.3040026970700012, "grad_norm": 1.3022686017187872, "learning_rate": 8.157088238029986e-06, "loss": 0.7441, "step": 9919 }, { "epoch": 0.3040333455927424, "grad_norm": 1.0166751882301797, "learning_rate": 8.156703356312676e-06, "loss": 0.6017, "step": 9920 }, { "epoch": 0.3040639941154836, "grad_norm": 1.2451394418334392, "learning_rate": 8.156318443491558e-06, "loss": 0.7355, "step": 9921 }, { "epoch": 0.30409464263822483, "grad_norm": 1.1068669051515534, "learning_rate": 8.155933499570428e-06, "loss": 0.6634, "step": 9922 }, { "epoch": 0.30412529116096604, "grad_norm": 1.2643237775443372, "learning_rate": 8.155548524553076e-06, "loss": 0.724, "step": 9923 }, { "epoch": 0.30415593968370724, "grad_norm": 1.423142900973934, "learning_rate": 8.155163518443298e-06, "loss": 0.6219, "step": 9924 }, { "epoch": 0.30418658820644845, "grad_norm": 1.4638884852571956, "learning_rate": 8.154778481244888e-06, "loss": 0.675, "step": 9925 }, { "epoch": 0.30421723672918966, "grad_norm": 1.359251861571067, "learning_rate": 8.154393412961637e-06, "loss": 0.6588, "step": 9926 }, { "epoch": 0.30424788525193086, "grad_norm": 1.571368872509606, "learning_rate": 8.15400831359734e-06, "loss": 0.753, "step": 9927 }, { "epoch": 0.30427853377467207, "grad_norm": 1.4371638716462045, "learning_rate": 8.153623183155793e-06, "loss": 0.7521, "step": 9928 }, { "epoch": 0.30430918229741327, "grad_norm": 1.4270148195119343, "learning_rate": 8.15323802164079e-06, "loss": 0.6178, "step": 9929 }, { "epoch": 0.3043398308201545, "grad_norm": 1.2896834751527557, "learning_rate": 8.152852829056126e-06, "loss": 0.6948, "step": 9930 }, { "epoch": 0.3043704793428957, "grad_norm": 1.2507507740819195, "learning_rate": 8.152467605405596e-06, "loss": 0.6917, "step": 9931 }, { "epoch": 0.3044011278656369, "grad_norm": 1.5023419811341425, "learning_rate": 8.152082350692996e-06, "loss": 0.7494, "step": 9932 }, { "epoch": 0.3044317763883781, "grad_norm": 1.1886340035884115, "learning_rate": 8.151697064922121e-06, "loss": 0.6409, "step": 9933 }, { "epoch": 0.3044624249111193, "grad_norm": 1.4571323511079173, "learning_rate": 8.151311748096771e-06, "loss": 0.7766, "step": 9934 }, { "epoch": 0.3044930734338605, "grad_norm": 1.573180274152788, "learning_rate": 8.150926400220738e-06, "loss": 0.7973, "step": 9935 }, { "epoch": 0.3045237219566017, "grad_norm": 1.3155805298097842, "learning_rate": 8.150541021297822e-06, "loss": 0.7215, "step": 9936 }, { "epoch": 0.3045543704793429, "grad_norm": 0.5817974532626203, "learning_rate": 8.150155611331819e-06, "loss": 0.4527, "step": 9937 }, { "epoch": 0.3045850190020841, "grad_norm": 1.1291137793791883, "learning_rate": 8.149770170326527e-06, "loss": 0.6172, "step": 9938 }, { "epoch": 0.30461566752482533, "grad_norm": 1.2877278138926207, "learning_rate": 8.149384698285742e-06, "loss": 0.6856, "step": 9939 }, { "epoch": 0.30464631604756653, "grad_norm": 1.162783097762226, "learning_rate": 8.148999195213266e-06, "loss": 0.685, "step": 9940 }, { "epoch": 0.3046769645703077, "grad_norm": 1.0883817902078676, "learning_rate": 8.148613661112894e-06, "loss": 0.6086, "step": 9941 }, { "epoch": 0.3047076130930489, "grad_norm": 1.385979247810925, "learning_rate": 8.148228095988427e-06, "loss": 0.6929, "step": 9942 }, { "epoch": 0.3047382616157901, "grad_norm": 1.3780280366686972, "learning_rate": 8.147842499843663e-06, "loss": 0.6952, "step": 9943 }, { "epoch": 0.3047689101385313, "grad_norm": 1.4111784492228905, "learning_rate": 8.1474568726824e-06, "loss": 0.658, "step": 9944 }, { "epoch": 0.3047995586612725, "grad_norm": 1.2341468919481933, "learning_rate": 8.14707121450844e-06, "loss": 0.8049, "step": 9945 }, { "epoch": 0.3048302071840137, "grad_norm": 0.49368820731173335, "learning_rate": 8.146685525325582e-06, "loss": 0.4323, "step": 9946 }, { "epoch": 0.3048608557067549, "grad_norm": 1.2797036533048265, "learning_rate": 8.146299805137626e-06, "loss": 0.7075, "step": 9947 }, { "epoch": 0.3048915042294961, "grad_norm": 1.305148526928279, "learning_rate": 8.145914053948373e-06, "loss": 0.6595, "step": 9948 }, { "epoch": 0.30492215275223733, "grad_norm": 1.3510106375560755, "learning_rate": 8.145528271761624e-06, "loss": 0.7144, "step": 9949 }, { "epoch": 0.30495280127497854, "grad_norm": 1.2137684227874201, "learning_rate": 8.145142458581182e-06, "loss": 0.6859, "step": 9950 }, { "epoch": 0.30498344979771974, "grad_norm": 1.449097048733747, "learning_rate": 8.144756614410846e-06, "loss": 0.6591, "step": 9951 }, { "epoch": 0.30501409832046095, "grad_norm": 1.1864555347730383, "learning_rate": 8.144370739254418e-06, "loss": 0.653, "step": 9952 }, { "epoch": 0.30504474684320215, "grad_norm": 1.1407678929331078, "learning_rate": 8.1439848331157e-06, "loss": 0.6963, "step": 9953 }, { "epoch": 0.30507539536594336, "grad_norm": 1.3036974604294984, "learning_rate": 8.143598895998494e-06, "loss": 0.7375, "step": 9954 }, { "epoch": 0.30510604388868456, "grad_norm": 1.4740513564782827, "learning_rate": 8.143212927906606e-06, "loss": 0.7105, "step": 9955 }, { "epoch": 0.30513669241142577, "grad_norm": 1.2037321042189006, "learning_rate": 8.142826928843835e-06, "loss": 0.5876, "step": 9956 }, { "epoch": 0.305167340934167, "grad_norm": 0.5341777110965747, "learning_rate": 8.142440898813985e-06, "loss": 0.4331, "step": 9957 }, { "epoch": 0.3051979894569082, "grad_norm": 1.3282201239747453, "learning_rate": 8.142054837820865e-06, "loss": 0.6756, "step": 9958 }, { "epoch": 0.3052286379796494, "grad_norm": 1.1104986244332147, "learning_rate": 8.141668745868271e-06, "loss": 0.548, "step": 9959 }, { "epoch": 0.3052592865023906, "grad_norm": 1.4767963036073046, "learning_rate": 8.141282622960012e-06, "loss": 0.7448, "step": 9960 }, { "epoch": 0.3052899350251318, "grad_norm": 1.297424881656805, "learning_rate": 8.140896469099893e-06, "loss": 0.7228, "step": 9961 }, { "epoch": 0.305320583547873, "grad_norm": 0.4724756667232002, "learning_rate": 8.140510284291716e-06, "loss": 0.4325, "step": 9962 }, { "epoch": 0.3053512320706142, "grad_norm": 1.308909919947136, "learning_rate": 8.140124068539288e-06, "loss": 0.7037, "step": 9963 }, { "epoch": 0.3053818805933554, "grad_norm": 1.6293856446153676, "learning_rate": 8.139737821846415e-06, "loss": 0.5855, "step": 9964 }, { "epoch": 0.3054125291160966, "grad_norm": 1.3070733758770665, "learning_rate": 8.139351544216901e-06, "loss": 0.7261, "step": 9965 }, { "epoch": 0.3054431776388378, "grad_norm": 1.2280881824561576, "learning_rate": 8.138965235654553e-06, "loss": 0.7321, "step": 9966 }, { "epoch": 0.30547382616157903, "grad_norm": 1.3112107815970955, "learning_rate": 8.138578896163177e-06, "loss": 0.7401, "step": 9967 }, { "epoch": 0.30550447468432024, "grad_norm": 1.2556625291507766, "learning_rate": 8.138192525746582e-06, "loss": 0.5915, "step": 9968 }, { "epoch": 0.30553512320706144, "grad_norm": 1.3263236367299867, "learning_rate": 8.137806124408572e-06, "loss": 0.6369, "step": 9969 }, { "epoch": 0.30556577172980265, "grad_norm": 1.1588952248353857, "learning_rate": 8.137419692152954e-06, "loss": 0.6645, "step": 9970 }, { "epoch": 0.30559642025254385, "grad_norm": 0.5165019605848525, "learning_rate": 8.137033228983538e-06, "loss": 0.457, "step": 9971 }, { "epoch": 0.305627068775285, "grad_norm": 1.469034746913743, "learning_rate": 8.136646734904132e-06, "loss": 0.7117, "step": 9972 }, { "epoch": 0.3056577172980262, "grad_norm": 1.2181056091582507, "learning_rate": 8.136260209918541e-06, "loss": 0.6991, "step": 9973 }, { "epoch": 0.3056883658207674, "grad_norm": 1.2726866733993567, "learning_rate": 8.135873654030577e-06, "loss": 0.6833, "step": 9974 }, { "epoch": 0.3057190143435086, "grad_norm": 1.2887039262601312, "learning_rate": 8.135487067244048e-06, "loss": 0.6797, "step": 9975 }, { "epoch": 0.3057496628662498, "grad_norm": 1.3308608935064388, "learning_rate": 8.135100449562763e-06, "loss": 0.6291, "step": 9976 }, { "epoch": 0.30578031138899103, "grad_norm": 1.2971766478930569, "learning_rate": 8.13471380099053e-06, "loss": 0.7932, "step": 9977 }, { "epoch": 0.30581095991173224, "grad_norm": 1.486186653961261, "learning_rate": 8.13432712153116e-06, "loss": 0.7261, "step": 9978 }, { "epoch": 0.30584160843447344, "grad_norm": 1.1532005098582567, "learning_rate": 8.133940411188463e-06, "loss": 0.6534, "step": 9979 }, { "epoch": 0.30587225695721465, "grad_norm": 1.2613303077320932, "learning_rate": 8.13355366996625e-06, "loss": 0.6962, "step": 9980 }, { "epoch": 0.30590290547995586, "grad_norm": 1.2994322524390967, "learning_rate": 8.13316689786833e-06, "loss": 0.711, "step": 9981 }, { "epoch": 0.30593355400269706, "grad_norm": 1.334245547865637, "learning_rate": 8.132780094898515e-06, "loss": 0.6479, "step": 9982 }, { "epoch": 0.30596420252543827, "grad_norm": 1.0523414919047804, "learning_rate": 8.132393261060616e-06, "loss": 0.5991, "step": 9983 }, { "epoch": 0.3059948510481795, "grad_norm": 0.4733689109655217, "learning_rate": 8.132006396358447e-06, "loss": 0.4292, "step": 9984 }, { "epoch": 0.3060254995709207, "grad_norm": 1.1315988988890042, "learning_rate": 8.131619500795815e-06, "loss": 0.6264, "step": 9985 }, { "epoch": 0.3060561480936619, "grad_norm": 1.2331765893215083, "learning_rate": 8.131232574376535e-06, "loss": 0.6752, "step": 9986 }, { "epoch": 0.3060867966164031, "grad_norm": 1.3594634183417083, "learning_rate": 8.130845617104419e-06, "loss": 0.6866, "step": 9987 }, { "epoch": 0.3061174451391443, "grad_norm": 1.337353406490237, "learning_rate": 8.130458628983281e-06, "loss": 0.7263, "step": 9988 }, { "epoch": 0.3061480936618855, "grad_norm": 1.3132007874666984, "learning_rate": 8.130071610016934e-06, "loss": 0.6353, "step": 9989 }, { "epoch": 0.3061787421846267, "grad_norm": 1.3561455976224157, "learning_rate": 8.12968456020919e-06, "loss": 0.6312, "step": 9990 }, { "epoch": 0.3062093907073679, "grad_norm": 1.3389204657066487, "learning_rate": 8.129297479563863e-06, "loss": 0.666, "step": 9991 }, { "epoch": 0.3062400392301091, "grad_norm": 1.3250158476394929, "learning_rate": 8.128910368084767e-06, "loss": 0.7781, "step": 9992 }, { "epoch": 0.3062706877528503, "grad_norm": 1.2206179944621194, "learning_rate": 8.128523225775717e-06, "loss": 0.6915, "step": 9993 }, { "epoch": 0.30630133627559153, "grad_norm": 1.3837245289875766, "learning_rate": 8.128136052640526e-06, "loss": 0.7737, "step": 9994 }, { "epoch": 0.30633198479833273, "grad_norm": 0.481895410117802, "learning_rate": 8.12774884868301e-06, "loss": 0.4274, "step": 9995 }, { "epoch": 0.30636263332107394, "grad_norm": 1.3482820142412346, "learning_rate": 8.127361613906988e-06, "loss": 0.7507, "step": 9996 }, { "epoch": 0.30639328184381515, "grad_norm": 1.5539542047289878, "learning_rate": 8.126974348316268e-06, "loss": 0.7689, "step": 9997 }, { "epoch": 0.30642393036655635, "grad_norm": 1.2329645022231337, "learning_rate": 8.12658705191467e-06, "loss": 0.6564, "step": 9998 }, { "epoch": 0.30645457888929756, "grad_norm": 1.2380605580689859, "learning_rate": 8.126199724706012e-06, "loss": 0.7214, "step": 9999 }, { "epoch": 0.30648522741203876, "grad_norm": 1.2085756376146388, "learning_rate": 8.125812366694106e-06, "loss": 0.6313, "step": 10000 }, { "epoch": 0.30651587593477997, "grad_norm": 0.4428176263994913, "learning_rate": 8.125424977882772e-06, "loss": 0.4496, "step": 10001 }, { "epoch": 0.3065465244575212, "grad_norm": 0.4455412074979356, "learning_rate": 8.125037558275826e-06, "loss": 0.4258, "step": 10002 }, { "epoch": 0.3065771729802623, "grad_norm": 1.260617877966495, "learning_rate": 8.124650107877086e-06, "loss": 0.6145, "step": 10003 }, { "epoch": 0.30660782150300353, "grad_norm": 1.3175531826085534, "learning_rate": 8.124262626690367e-06, "loss": 0.6805, "step": 10004 }, { "epoch": 0.30663847002574474, "grad_norm": 1.345355115277243, "learning_rate": 8.123875114719491e-06, "loss": 0.6948, "step": 10005 }, { "epoch": 0.30666911854848594, "grad_norm": 1.1406065107151713, "learning_rate": 8.123487571968273e-06, "loss": 0.6294, "step": 10006 }, { "epoch": 0.30669976707122715, "grad_norm": 1.3162208813715437, "learning_rate": 8.123099998440535e-06, "loss": 0.7381, "step": 10007 }, { "epoch": 0.30673041559396835, "grad_norm": 1.1922952633725532, "learning_rate": 8.12271239414009e-06, "loss": 0.6311, "step": 10008 }, { "epoch": 0.30676106411670956, "grad_norm": 1.193325306491299, "learning_rate": 8.122324759070764e-06, "loss": 0.6808, "step": 10009 }, { "epoch": 0.30679171263945076, "grad_norm": 1.3865498540823291, "learning_rate": 8.121937093236371e-06, "loss": 0.6543, "step": 10010 }, { "epoch": 0.30682236116219197, "grad_norm": 1.3579278197814824, "learning_rate": 8.121549396640736e-06, "loss": 0.7011, "step": 10011 }, { "epoch": 0.3068530096849332, "grad_norm": 1.3404549187894854, "learning_rate": 8.121161669287674e-06, "loss": 0.6852, "step": 10012 }, { "epoch": 0.3068836582076744, "grad_norm": 1.1373607410925142, "learning_rate": 8.120773911181005e-06, "loss": 0.6607, "step": 10013 }, { "epoch": 0.3069143067304156, "grad_norm": 1.1751472807216892, "learning_rate": 8.120386122324556e-06, "loss": 0.7131, "step": 10014 }, { "epoch": 0.3069449552531568, "grad_norm": 1.2142241850794402, "learning_rate": 8.119998302722143e-06, "loss": 0.5877, "step": 10015 }, { "epoch": 0.306975603775898, "grad_norm": 1.1619993452918271, "learning_rate": 8.119610452377588e-06, "loss": 0.6633, "step": 10016 }, { "epoch": 0.3070062522986392, "grad_norm": 1.2857458612875676, "learning_rate": 8.119222571294714e-06, "loss": 0.615, "step": 10017 }, { "epoch": 0.3070369008213804, "grad_norm": 1.7681901292639632, "learning_rate": 8.118834659477341e-06, "loss": 0.5586, "step": 10018 }, { "epoch": 0.3070675493441216, "grad_norm": 1.389170822039941, "learning_rate": 8.118446716929294e-06, "loss": 0.6826, "step": 10019 }, { "epoch": 0.3070981978668628, "grad_norm": 1.305456562085776, "learning_rate": 8.118058743654392e-06, "loss": 0.6986, "step": 10020 }, { "epoch": 0.307128846389604, "grad_norm": 0.46099155306253126, "learning_rate": 8.117670739656457e-06, "loss": 0.4286, "step": 10021 }, { "epoch": 0.30715949491234523, "grad_norm": 1.2454448289135995, "learning_rate": 8.117282704939318e-06, "loss": 0.7116, "step": 10022 }, { "epoch": 0.30719014343508644, "grad_norm": 1.0954591438337868, "learning_rate": 8.116894639506794e-06, "loss": 0.679, "step": 10023 }, { "epoch": 0.30722079195782764, "grad_norm": 1.2592466016893769, "learning_rate": 8.11650654336271e-06, "loss": 0.7027, "step": 10024 }, { "epoch": 0.30725144048056885, "grad_norm": 1.2658349431710525, "learning_rate": 8.116118416510889e-06, "loss": 0.7104, "step": 10025 }, { "epoch": 0.30728208900331005, "grad_norm": 1.2923863496462653, "learning_rate": 8.115730258955156e-06, "loss": 0.704, "step": 10026 }, { "epoch": 0.30731273752605126, "grad_norm": 1.2777467358709382, "learning_rate": 8.115342070699335e-06, "loss": 0.7449, "step": 10027 }, { "epoch": 0.30734338604879247, "grad_norm": 1.2719881953430776, "learning_rate": 8.114953851747252e-06, "loss": 0.714, "step": 10028 }, { "epoch": 0.30737403457153367, "grad_norm": 1.2736432401081181, "learning_rate": 8.114565602102733e-06, "loss": 0.7127, "step": 10029 }, { "epoch": 0.3074046830942749, "grad_norm": 1.2157195381151396, "learning_rate": 8.1141773217696e-06, "loss": 0.6912, "step": 10030 }, { "epoch": 0.3074353316170161, "grad_norm": 0.48233986918802774, "learning_rate": 8.113789010751682e-06, "loss": 0.434, "step": 10031 }, { "epoch": 0.3074659801397573, "grad_norm": 1.2779816880495423, "learning_rate": 8.113400669052805e-06, "loss": 0.6697, "step": 10032 }, { "epoch": 0.3074966286624985, "grad_norm": 0.434130258577079, "learning_rate": 8.113012296676793e-06, "loss": 0.4482, "step": 10033 }, { "epoch": 0.30752727718523964, "grad_norm": 1.48935107262864, "learning_rate": 8.112623893627476e-06, "loss": 0.8315, "step": 10034 }, { "epoch": 0.30755792570798085, "grad_norm": 1.257818363727375, "learning_rate": 8.11223545990868e-06, "loss": 0.6925, "step": 10035 }, { "epoch": 0.30758857423072206, "grad_norm": 1.292306957775985, "learning_rate": 8.111846995524228e-06, "loss": 0.7051, "step": 10036 }, { "epoch": 0.30761922275346326, "grad_norm": 1.2600046910087854, "learning_rate": 8.111458500477955e-06, "loss": 0.668, "step": 10037 }, { "epoch": 0.30764987127620447, "grad_norm": 1.2861735143833444, "learning_rate": 8.111069974773684e-06, "loss": 0.6809, "step": 10038 }, { "epoch": 0.3076805197989457, "grad_norm": 1.4409175489601074, "learning_rate": 8.110681418415245e-06, "loss": 0.7197, "step": 10039 }, { "epoch": 0.3077111683216869, "grad_norm": 1.2939054937446737, "learning_rate": 8.110292831406466e-06, "loss": 0.6878, "step": 10040 }, { "epoch": 0.3077418168444281, "grad_norm": 0.4840931143203318, "learning_rate": 8.109904213751174e-06, "loss": 0.4573, "step": 10041 }, { "epoch": 0.3077724653671693, "grad_norm": 1.1817363818234656, "learning_rate": 8.109515565453202e-06, "loss": 0.639, "step": 10042 }, { "epoch": 0.3078031138899105, "grad_norm": 1.0393509955461893, "learning_rate": 8.109126886516376e-06, "loss": 0.6755, "step": 10043 }, { "epoch": 0.3078337624126517, "grad_norm": 1.237115667080974, "learning_rate": 8.108738176944529e-06, "loss": 0.792, "step": 10044 }, { "epoch": 0.3078644109353929, "grad_norm": 1.2057045675446265, "learning_rate": 8.10834943674149e-06, "loss": 0.614, "step": 10045 }, { "epoch": 0.3078950594581341, "grad_norm": 0.4887431693624977, "learning_rate": 8.107960665911087e-06, "loss": 0.4531, "step": 10046 }, { "epoch": 0.3079257079808753, "grad_norm": 1.2733404285336378, "learning_rate": 8.107571864457153e-06, "loss": 0.6955, "step": 10047 }, { "epoch": 0.3079563565036165, "grad_norm": 1.1541244476635644, "learning_rate": 8.107183032383517e-06, "loss": 0.6895, "step": 10048 }, { "epoch": 0.30798700502635773, "grad_norm": 1.2121951676946492, "learning_rate": 8.106794169694012e-06, "loss": 0.7114, "step": 10049 }, { "epoch": 0.30801765354909894, "grad_norm": 1.3361878475764963, "learning_rate": 8.106405276392471e-06, "loss": 0.7191, "step": 10050 }, { "epoch": 0.30804830207184014, "grad_norm": 1.2381154752267098, "learning_rate": 8.106016352482722e-06, "loss": 0.7198, "step": 10051 }, { "epoch": 0.30807895059458135, "grad_norm": 0.46391665543655264, "learning_rate": 8.105627397968601e-06, "loss": 0.4204, "step": 10052 }, { "epoch": 0.30810959911732255, "grad_norm": 0.4538689858940449, "learning_rate": 8.105238412853937e-06, "loss": 0.4264, "step": 10053 }, { "epoch": 0.30814024764006376, "grad_norm": 0.4516449889337437, "learning_rate": 8.104849397142566e-06, "loss": 0.4343, "step": 10054 }, { "epoch": 0.30817089616280496, "grad_norm": 0.4511844926794007, "learning_rate": 8.104460350838318e-06, "loss": 0.4497, "step": 10055 }, { "epoch": 0.30820154468554617, "grad_norm": 0.4423923453823242, "learning_rate": 8.104071273945029e-06, "loss": 0.4273, "step": 10056 }, { "epoch": 0.3082321932082874, "grad_norm": 1.304894305114033, "learning_rate": 8.10368216646653e-06, "loss": 0.7373, "step": 10057 }, { "epoch": 0.3082628417310286, "grad_norm": 1.208792780490871, "learning_rate": 8.103293028406658e-06, "loss": 0.5216, "step": 10058 }, { "epoch": 0.3082934902537698, "grad_norm": 1.3022629839231914, "learning_rate": 8.102903859769244e-06, "loss": 0.6814, "step": 10059 }, { "epoch": 0.308324138776511, "grad_norm": 1.2809503250072727, "learning_rate": 8.102514660558126e-06, "loss": 0.7051, "step": 10060 }, { "epoch": 0.3083547872992522, "grad_norm": 1.1865147937179574, "learning_rate": 8.102125430777138e-06, "loss": 0.6872, "step": 10061 }, { "epoch": 0.3083854358219934, "grad_norm": 1.168310909979173, "learning_rate": 8.101736170430113e-06, "loss": 0.7109, "step": 10062 }, { "epoch": 0.3084160843447346, "grad_norm": 1.2274550056756364, "learning_rate": 8.101346879520888e-06, "loss": 0.6585, "step": 10063 }, { "epoch": 0.3084467328674758, "grad_norm": 1.2768707435385884, "learning_rate": 8.100957558053298e-06, "loss": 0.7132, "step": 10064 }, { "epoch": 0.30847738139021696, "grad_norm": 1.4686067524720108, "learning_rate": 8.10056820603118e-06, "loss": 0.6348, "step": 10065 }, { "epoch": 0.30850802991295817, "grad_norm": 1.1972958451624804, "learning_rate": 8.100178823458373e-06, "loss": 0.7302, "step": 10066 }, { "epoch": 0.3085386784356994, "grad_norm": 1.2643218994280327, "learning_rate": 8.099789410338708e-06, "loss": 0.7811, "step": 10067 }, { "epoch": 0.3085693269584406, "grad_norm": 1.304979336521598, "learning_rate": 8.099399966676025e-06, "loss": 0.7363, "step": 10068 }, { "epoch": 0.3085999754811818, "grad_norm": 1.4379254601957867, "learning_rate": 8.099010492474162e-06, "loss": 0.6691, "step": 10069 }, { "epoch": 0.308630624003923, "grad_norm": 1.2366983096541606, "learning_rate": 8.098620987736953e-06, "loss": 0.6705, "step": 10070 }, { "epoch": 0.3086612725266642, "grad_norm": 0.6061099257329968, "learning_rate": 8.098231452468242e-06, "loss": 0.4426, "step": 10071 }, { "epoch": 0.3086919210494054, "grad_norm": 1.3578319804322845, "learning_rate": 8.097841886671863e-06, "loss": 0.6975, "step": 10072 }, { "epoch": 0.3087225695721466, "grad_norm": 1.2051367398153037, "learning_rate": 8.097452290351655e-06, "loss": 0.7097, "step": 10073 }, { "epoch": 0.3087532180948878, "grad_norm": 1.302202251826689, "learning_rate": 8.097062663511457e-06, "loss": 0.6982, "step": 10074 }, { "epoch": 0.308783866617629, "grad_norm": 1.3242534867132778, "learning_rate": 8.096673006155107e-06, "loss": 0.6092, "step": 10075 }, { "epoch": 0.3088145151403702, "grad_norm": 1.2806803209876658, "learning_rate": 8.096283318286446e-06, "loss": 0.6869, "step": 10076 }, { "epoch": 0.30884516366311143, "grad_norm": 1.3991909323606657, "learning_rate": 8.095893599909315e-06, "loss": 0.6484, "step": 10077 }, { "epoch": 0.30887581218585264, "grad_norm": 1.214452211349537, "learning_rate": 8.09550385102755e-06, "loss": 0.7574, "step": 10078 }, { "epoch": 0.30890646070859384, "grad_norm": 0.5183292089453279, "learning_rate": 8.095114071644996e-06, "loss": 0.42, "step": 10079 }, { "epoch": 0.30893710923133505, "grad_norm": 1.3082345016586299, "learning_rate": 8.09472426176549e-06, "loss": 0.693, "step": 10080 }, { "epoch": 0.30896775775407626, "grad_norm": 1.326679644705559, "learning_rate": 8.094334421392873e-06, "loss": 0.649, "step": 10081 }, { "epoch": 0.30899840627681746, "grad_norm": 1.2189312657901046, "learning_rate": 8.09394455053099e-06, "loss": 0.5789, "step": 10082 }, { "epoch": 0.30902905479955867, "grad_norm": 1.2982649185290702, "learning_rate": 8.093554649183677e-06, "loss": 0.714, "step": 10083 }, { "epoch": 0.3090597033222999, "grad_norm": 5.6010041509132416, "learning_rate": 8.093164717354779e-06, "loss": 0.6281, "step": 10084 }, { "epoch": 0.3090903518450411, "grad_norm": 0.46558585864391067, "learning_rate": 8.092774755048138e-06, "loss": 0.4265, "step": 10085 }, { "epoch": 0.3091210003677823, "grad_norm": 1.2276513103701383, "learning_rate": 8.092384762267596e-06, "loss": 0.6561, "step": 10086 }, { "epoch": 0.3091516488905235, "grad_norm": 1.296216495851575, "learning_rate": 8.091994739016995e-06, "loss": 0.665, "step": 10087 }, { "epoch": 0.3091822974132647, "grad_norm": 1.233892988709858, "learning_rate": 8.09160468530018e-06, "loss": 0.6607, "step": 10088 }, { "epoch": 0.3092129459360059, "grad_norm": 1.3736260271776222, "learning_rate": 8.091214601120992e-06, "loss": 0.7537, "step": 10089 }, { "epoch": 0.3092435944587471, "grad_norm": 1.5314344014038601, "learning_rate": 8.090824486483274e-06, "loss": 0.735, "step": 10090 }, { "epoch": 0.3092742429814883, "grad_norm": 0.5408742524515209, "learning_rate": 8.090434341390874e-06, "loss": 0.4467, "step": 10091 }, { "epoch": 0.3093048915042295, "grad_norm": 1.351843437438509, "learning_rate": 8.090044165847634e-06, "loss": 0.7141, "step": 10092 }, { "epoch": 0.3093355400269707, "grad_norm": 1.1005209632920951, "learning_rate": 8.089653959857398e-06, "loss": 0.6277, "step": 10093 }, { "epoch": 0.30936618854971193, "grad_norm": 1.255443919051051, "learning_rate": 8.08926372342401e-06, "loss": 0.607, "step": 10094 }, { "epoch": 0.30939683707245313, "grad_norm": 1.1707193201127222, "learning_rate": 8.088873456551317e-06, "loss": 0.6977, "step": 10095 }, { "epoch": 0.3094274855951943, "grad_norm": 1.236171642194746, "learning_rate": 8.088483159243164e-06, "loss": 0.6635, "step": 10096 }, { "epoch": 0.3094581341179355, "grad_norm": 1.1366366306678755, "learning_rate": 8.088092831503394e-06, "loss": 0.6156, "step": 10097 }, { "epoch": 0.3094887826406767, "grad_norm": 1.2221190885620283, "learning_rate": 8.087702473335858e-06, "loss": 0.6539, "step": 10098 }, { "epoch": 0.3095194311634179, "grad_norm": 1.41476255376286, "learning_rate": 8.087312084744397e-06, "loss": 0.5666, "step": 10099 }, { "epoch": 0.3095500796861591, "grad_norm": 1.195715785671408, "learning_rate": 8.086921665732861e-06, "loss": 0.5628, "step": 10100 }, { "epoch": 0.3095807282089003, "grad_norm": 1.1509316933656661, "learning_rate": 8.086531216305095e-06, "loss": 0.6442, "step": 10101 }, { "epoch": 0.3096113767316415, "grad_norm": 1.260438207678889, "learning_rate": 8.086140736464949e-06, "loss": 0.6796, "step": 10102 }, { "epoch": 0.3096420252543827, "grad_norm": 1.1721480877616117, "learning_rate": 8.085750226216267e-06, "loss": 0.611, "step": 10103 }, { "epoch": 0.30967267377712393, "grad_norm": 1.1144116203861794, "learning_rate": 8.0853596855629e-06, "loss": 0.6207, "step": 10104 }, { "epoch": 0.30970332229986514, "grad_norm": 1.3416494674678003, "learning_rate": 8.084969114508693e-06, "loss": 0.6822, "step": 10105 }, { "epoch": 0.30973397082260634, "grad_norm": 1.3638037778673044, "learning_rate": 8.084578513057499e-06, "loss": 0.6979, "step": 10106 }, { "epoch": 0.30976461934534755, "grad_norm": 0.4721681895590589, "learning_rate": 8.084187881213162e-06, "loss": 0.4086, "step": 10107 }, { "epoch": 0.30979526786808875, "grad_norm": 0.4687966523739319, "learning_rate": 8.083797218979532e-06, "loss": 0.4307, "step": 10108 }, { "epoch": 0.30982591639082996, "grad_norm": 1.2592852410720283, "learning_rate": 8.083406526360459e-06, "loss": 0.6171, "step": 10109 }, { "epoch": 0.30985656491357116, "grad_norm": 1.2479564208313945, "learning_rate": 8.083015803359793e-06, "loss": 0.6855, "step": 10110 }, { "epoch": 0.30988721343631237, "grad_norm": 0.5236044795606218, "learning_rate": 8.082625049981383e-06, "loss": 0.4212, "step": 10111 }, { "epoch": 0.3099178619590536, "grad_norm": 1.245242609989165, "learning_rate": 8.08223426622908e-06, "loss": 0.6435, "step": 10112 }, { "epoch": 0.3099485104817948, "grad_norm": 1.238643464493382, "learning_rate": 8.081843452106735e-06, "loss": 0.6174, "step": 10113 }, { "epoch": 0.309979159004536, "grad_norm": 1.1885662458904576, "learning_rate": 8.081452607618196e-06, "loss": 0.5843, "step": 10114 }, { "epoch": 0.3100098075272772, "grad_norm": 0.45674219196986593, "learning_rate": 8.08106173276732e-06, "loss": 0.4408, "step": 10115 }, { "epoch": 0.3100404560500184, "grad_norm": 1.348003453217751, "learning_rate": 8.08067082755795e-06, "loss": 0.6366, "step": 10116 }, { "epoch": 0.3100711045727596, "grad_norm": 1.3025514343252034, "learning_rate": 8.080279891993943e-06, "loss": 0.7502, "step": 10117 }, { "epoch": 0.3101017530955008, "grad_norm": 1.2974300290068719, "learning_rate": 8.079888926079152e-06, "loss": 0.7322, "step": 10118 }, { "epoch": 0.310132401618242, "grad_norm": 1.1967943356930273, "learning_rate": 8.079497929817426e-06, "loss": 0.7276, "step": 10119 }, { "epoch": 0.3101630501409832, "grad_norm": 1.1398273727450479, "learning_rate": 8.07910690321262e-06, "loss": 0.5461, "step": 10120 }, { "epoch": 0.3101936986637244, "grad_norm": 0.47189568954301164, "learning_rate": 8.078715846268583e-06, "loss": 0.4395, "step": 10121 }, { "epoch": 0.31022434718646563, "grad_norm": 1.153409877423181, "learning_rate": 8.078324758989174e-06, "loss": 0.6703, "step": 10122 }, { "epoch": 0.31025499570920684, "grad_norm": 0.4749468198091147, "learning_rate": 8.077933641378243e-06, "loss": 0.4288, "step": 10123 }, { "epoch": 0.31028564423194804, "grad_norm": 1.356542458422647, "learning_rate": 8.077542493439643e-06, "loss": 0.6485, "step": 10124 }, { "epoch": 0.31031629275468925, "grad_norm": 1.2109053764176936, "learning_rate": 8.077151315177232e-06, "loss": 0.7202, "step": 10125 }, { "epoch": 0.31034694127743045, "grad_norm": 1.3267647453519813, "learning_rate": 8.076760106594859e-06, "loss": 0.6527, "step": 10126 }, { "epoch": 0.3103775898001716, "grad_norm": 1.4660627536500683, "learning_rate": 8.076368867696382e-06, "loss": 0.6825, "step": 10127 }, { "epoch": 0.3104082383229128, "grad_norm": 1.3201507956444558, "learning_rate": 8.075977598485656e-06, "loss": 0.6793, "step": 10128 }, { "epoch": 0.310438886845654, "grad_norm": 1.2310180755040643, "learning_rate": 8.075586298966536e-06, "loss": 0.6374, "step": 10129 }, { "epoch": 0.3104695353683952, "grad_norm": 0.5403974104983149, "learning_rate": 8.075194969142876e-06, "loss": 0.4341, "step": 10130 }, { "epoch": 0.3105001838911364, "grad_norm": 1.2741518443747548, "learning_rate": 8.074803609018535e-06, "loss": 0.7386, "step": 10131 }, { "epoch": 0.31053083241387763, "grad_norm": 1.2033544346734193, "learning_rate": 8.074412218597367e-06, "loss": 0.6972, "step": 10132 }, { "epoch": 0.31056148093661884, "grad_norm": 1.2130874812009351, "learning_rate": 8.07402079788323e-06, "loss": 0.7441, "step": 10133 }, { "epoch": 0.31059212945936004, "grad_norm": 1.2270881929375173, "learning_rate": 8.073629346879976e-06, "loss": 0.7201, "step": 10134 }, { "epoch": 0.31062277798210125, "grad_norm": 1.375841159078268, "learning_rate": 8.073237865591468e-06, "loss": 0.7638, "step": 10135 }, { "epoch": 0.31065342650484246, "grad_norm": 1.3627757384228891, "learning_rate": 8.07284635402156e-06, "loss": 0.6614, "step": 10136 }, { "epoch": 0.31068407502758366, "grad_norm": 1.1252260767926328, "learning_rate": 8.072454812174111e-06, "loss": 0.6918, "step": 10137 }, { "epoch": 0.31071472355032487, "grad_norm": 1.2734069152209733, "learning_rate": 8.072063240052978e-06, "loss": 0.5911, "step": 10138 }, { "epoch": 0.3107453720730661, "grad_norm": 1.1446485501781352, "learning_rate": 8.071671637662022e-06, "loss": 0.608, "step": 10139 }, { "epoch": 0.3107760205958073, "grad_norm": 1.182347381085354, "learning_rate": 8.071280005005098e-06, "loss": 0.6866, "step": 10140 }, { "epoch": 0.3108066691185485, "grad_norm": 1.2990053272959725, "learning_rate": 8.070888342086065e-06, "loss": 0.6678, "step": 10141 }, { "epoch": 0.3108373176412897, "grad_norm": 0.5569790571118444, "learning_rate": 8.070496648908786e-06, "loss": 0.4235, "step": 10142 }, { "epoch": 0.3108679661640309, "grad_norm": 1.2943197345173034, "learning_rate": 8.070104925477116e-06, "loss": 0.5738, "step": 10143 }, { "epoch": 0.3108986146867721, "grad_norm": 0.5008896663021754, "learning_rate": 8.069713171794918e-06, "loss": 0.4367, "step": 10144 }, { "epoch": 0.3109292632095133, "grad_norm": 1.2289467433921033, "learning_rate": 8.06932138786605e-06, "loss": 0.6954, "step": 10145 }, { "epoch": 0.3109599117322545, "grad_norm": 1.0725600934825372, "learning_rate": 8.068929573694373e-06, "loss": 0.5416, "step": 10146 }, { "epoch": 0.3109905602549957, "grad_norm": 1.4188431444094773, "learning_rate": 8.068537729283748e-06, "loss": 0.6804, "step": 10147 }, { "epoch": 0.3110212087777369, "grad_norm": 1.251923684160638, "learning_rate": 8.068145854638034e-06, "loss": 0.7439, "step": 10148 }, { "epoch": 0.31105185730047813, "grad_norm": 1.1521412503108168, "learning_rate": 8.067753949761095e-06, "loss": 0.6568, "step": 10149 }, { "epoch": 0.31108250582321934, "grad_norm": 1.3375156152486696, "learning_rate": 8.067362014656792e-06, "loss": 0.665, "step": 10150 }, { "epoch": 0.31111315434596054, "grad_norm": 1.1970639306717628, "learning_rate": 8.066970049328985e-06, "loss": 0.5397, "step": 10151 }, { "epoch": 0.31114380286870175, "grad_norm": 1.3051029109367187, "learning_rate": 8.06657805378154e-06, "loss": 0.7131, "step": 10152 }, { "epoch": 0.31117445139144295, "grad_norm": 1.2529234607732718, "learning_rate": 8.066186028018314e-06, "loss": 0.622, "step": 10153 }, { "epoch": 0.31120509991418416, "grad_norm": 1.2543775446517482, "learning_rate": 8.065793972043175e-06, "loss": 0.6952, "step": 10154 }, { "epoch": 0.31123574843692536, "grad_norm": 1.3940841013424454, "learning_rate": 8.06540188585998e-06, "loss": 0.6676, "step": 10155 }, { "epoch": 0.31126639695966657, "grad_norm": 1.2409182417499691, "learning_rate": 8.065009769472598e-06, "loss": 0.6713, "step": 10156 }, { "epoch": 0.3112970454824078, "grad_norm": 1.1838606342106008, "learning_rate": 8.064617622884892e-06, "loss": 0.7112, "step": 10157 }, { "epoch": 0.3113276940051489, "grad_norm": 1.1933058271684245, "learning_rate": 8.064225446100723e-06, "loss": 0.6065, "step": 10158 }, { "epoch": 0.31135834252789013, "grad_norm": 1.2688075350684929, "learning_rate": 8.063833239123958e-06, "loss": 0.7009, "step": 10159 }, { "epoch": 0.31138899105063134, "grad_norm": 1.2808166564824208, "learning_rate": 8.063441001958456e-06, "loss": 0.6796, "step": 10160 }, { "epoch": 0.31141963957337254, "grad_norm": 1.3620381686652527, "learning_rate": 8.06304873460809e-06, "loss": 0.7528, "step": 10161 }, { "epoch": 0.31145028809611375, "grad_norm": 1.224107313187035, "learning_rate": 8.06265643707672e-06, "loss": 0.5818, "step": 10162 }, { "epoch": 0.31148093661885495, "grad_norm": 1.227089734223045, "learning_rate": 8.062264109368214e-06, "loss": 0.6707, "step": 10163 }, { "epoch": 0.31151158514159616, "grad_norm": 1.22231604261314, "learning_rate": 8.061871751486434e-06, "loss": 0.7034, "step": 10164 }, { "epoch": 0.31154223366433736, "grad_norm": 1.3080092029323005, "learning_rate": 8.06147936343525e-06, "loss": 0.6269, "step": 10165 }, { "epoch": 0.31157288218707857, "grad_norm": 1.321700989604664, "learning_rate": 8.061086945218523e-06, "loss": 0.708, "step": 10166 }, { "epoch": 0.3116035307098198, "grad_norm": 1.36183668589753, "learning_rate": 8.060694496840127e-06, "loss": 0.7235, "step": 10167 }, { "epoch": 0.311634179232561, "grad_norm": 1.368253828289131, "learning_rate": 8.060302018303923e-06, "loss": 0.6629, "step": 10168 }, { "epoch": 0.3116648277553022, "grad_norm": 1.2987616023895565, "learning_rate": 8.059909509613781e-06, "loss": 0.7224, "step": 10169 }, { "epoch": 0.3116954762780434, "grad_norm": 1.120199902406105, "learning_rate": 8.059516970773566e-06, "loss": 0.7138, "step": 10170 }, { "epoch": 0.3117261248007846, "grad_norm": 1.2992522925904382, "learning_rate": 8.05912440178715e-06, "loss": 0.6521, "step": 10171 }, { "epoch": 0.3117567733235258, "grad_norm": 1.1283772815689765, "learning_rate": 8.058731802658397e-06, "loss": 0.5874, "step": 10172 }, { "epoch": 0.311787421846267, "grad_norm": 1.286713852011776, "learning_rate": 8.058339173391179e-06, "loss": 0.578, "step": 10173 }, { "epoch": 0.3118180703690082, "grad_norm": 1.4145843757550343, "learning_rate": 8.05794651398936e-06, "loss": 0.7128, "step": 10174 }, { "epoch": 0.3118487188917494, "grad_norm": 1.163116646325579, "learning_rate": 8.057553824456812e-06, "loss": 0.6691, "step": 10175 }, { "epoch": 0.3118793674144906, "grad_norm": 1.5270932416109249, "learning_rate": 8.057161104797404e-06, "loss": 0.7052, "step": 10176 }, { "epoch": 0.31191001593723183, "grad_norm": 1.251250867411388, "learning_rate": 8.056768355015008e-06, "loss": 0.7122, "step": 10177 }, { "epoch": 0.31194066445997304, "grad_norm": 1.380558881340668, "learning_rate": 8.056375575113489e-06, "loss": 0.7232, "step": 10178 }, { "epoch": 0.31197131298271424, "grad_norm": 1.27413373248588, "learning_rate": 8.055982765096719e-06, "loss": 0.6261, "step": 10179 }, { "epoch": 0.31200196150545545, "grad_norm": 1.251145472216047, "learning_rate": 8.05558992496857e-06, "loss": 0.7042, "step": 10180 }, { "epoch": 0.31203261002819666, "grad_norm": 1.13330368035184, "learning_rate": 8.05519705473291e-06, "loss": 0.6476, "step": 10181 }, { "epoch": 0.31206325855093786, "grad_norm": 1.2189874727651158, "learning_rate": 8.054804154393614e-06, "loss": 0.6302, "step": 10182 }, { "epoch": 0.31209390707367907, "grad_norm": 1.1874385845377973, "learning_rate": 8.05441122395455e-06, "loss": 0.7394, "step": 10183 }, { "epoch": 0.3121245555964203, "grad_norm": 1.487609144087838, "learning_rate": 8.054018263419591e-06, "loss": 0.6829, "step": 10184 }, { "epoch": 0.3121552041191615, "grad_norm": 1.327281353685737, "learning_rate": 8.053625272792609e-06, "loss": 0.6477, "step": 10185 }, { "epoch": 0.3121858526419027, "grad_norm": 1.2431352931403894, "learning_rate": 8.053232252077475e-06, "loss": 0.6323, "step": 10186 }, { "epoch": 0.3122165011646439, "grad_norm": 1.0854540018501055, "learning_rate": 8.052839201278063e-06, "loss": 0.7352, "step": 10187 }, { "epoch": 0.3122471496873851, "grad_norm": 0.7234397406546378, "learning_rate": 8.052446120398246e-06, "loss": 0.4439, "step": 10188 }, { "epoch": 0.31227779821012625, "grad_norm": 1.4395731724785195, "learning_rate": 8.052053009441893e-06, "loss": 0.8172, "step": 10189 }, { "epoch": 0.31230844673286745, "grad_norm": 1.3726064364928303, "learning_rate": 8.051659868412885e-06, "loss": 0.747, "step": 10190 }, { "epoch": 0.31233909525560866, "grad_norm": 1.3668262252000158, "learning_rate": 8.051266697315087e-06, "loss": 0.6086, "step": 10191 }, { "epoch": 0.31236974377834986, "grad_norm": 1.1642343740842203, "learning_rate": 8.050873496152382e-06, "loss": 0.7054, "step": 10192 }, { "epoch": 0.31240039230109107, "grad_norm": 1.2221955109204654, "learning_rate": 8.050480264928637e-06, "loss": 0.6797, "step": 10193 }, { "epoch": 0.3124310408238323, "grad_norm": 1.2851336365853958, "learning_rate": 8.050087003647731e-06, "loss": 0.6805, "step": 10194 }, { "epoch": 0.3124616893465735, "grad_norm": 1.2954293119604423, "learning_rate": 8.049693712313537e-06, "loss": 0.6319, "step": 10195 }, { "epoch": 0.3124923378693147, "grad_norm": 1.3375820324327778, "learning_rate": 8.049300390929931e-06, "loss": 0.7506, "step": 10196 }, { "epoch": 0.3125229863920559, "grad_norm": 1.2656837071302274, "learning_rate": 8.048907039500786e-06, "loss": 0.7046, "step": 10197 }, { "epoch": 0.3125536349147971, "grad_norm": 1.2112338091298704, "learning_rate": 8.048513658029981e-06, "loss": 0.7122, "step": 10198 }, { "epoch": 0.3125842834375383, "grad_norm": 1.2849335206315426, "learning_rate": 8.048120246521392e-06, "loss": 0.6328, "step": 10199 }, { "epoch": 0.3126149319602795, "grad_norm": 1.1744763893803758, "learning_rate": 8.047726804978893e-06, "loss": 0.6137, "step": 10200 }, { "epoch": 0.3126455804830207, "grad_norm": 1.3029545957030173, "learning_rate": 8.047333333406363e-06, "loss": 0.6702, "step": 10201 }, { "epoch": 0.3126762290057619, "grad_norm": 1.1537791554129864, "learning_rate": 8.046939831807678e-06, "loss": 0.7513, "step": 10202 }, { "epoch": 0.3127068775285031, "grad_norm": 1.2257703897612329, "learning_rate": 8.046546300186714e-06, "loss": 0.6684, "step": 10203 }, { "epoch": 0.31273752605124433, "grad_norm": 1.2012181087785718, "learning_rate": 8.04615273854735e-06, "loss": 0.6444, "step": 10204 }, { "epoch": 0.31276817457398554, "grad_norm": 1.3964449083989832, "learning_rate": 8.045759146893465e-06, "loss": 0.6783, "step": 10205 }, { "epoch": 0.31279882309672674, "grad_norm": 1.3513874200947225, "learning_rate": 8.045365525228934e-06, "loss": 0.7048, "step": 10206 }, { "epoch": 0.31282947161946795, "grad_norm": 1.3617544853570094, "learning_rate": 8.044971873557639e-06, "loss": 0.7117, "step": 10207 }, { "epoch": 0.31286012014220915, "grad_norm": 1.1546590860109416, "learning_rate": 8.044578191883456e-06, "loss": 0.6482, "step": 10208 }, { "epoch": 0.31289076866495036, "grad_norm": 1.2204411493381615, "learning_rate": 8.044184480210267e-06, "loss": 0.5939, "step": 10209 }, { "epoch": 0.31292141718769156, "grad_norm": 1.1731066756403465, "learning_rate": 8.043790738541947e-06, "loss": 0.7066, "step": 10210 }, { "epoch": 0.31295206571043277, "grad_norm": 1.2346507010396395, "learning_rate": 8.043396966882377e-06, "loss": 0.6535, "step": 10211 }, { "epoch": 0.312982714233174, "grad_norm": 0.5490443208301371, "learning_rate": 8.04300316523544e-06, "loss": 0.4621, "step": 10212 }, { "epoch": 0.3130133627559152, "grad_norm": 0.5608020339970083, "learning_rate": 8.042609333605017e-06, "loss": 0.4474, "step": 10213 }, { "epoch": 0.3130440112786564, "grad_norm": 1.247801240475141, "learning_rate": 8.042215471994981e-06, "loss": 0.6991, "step": 10214 }, { "epoch": 0.3130746598013976, "grad_norm": 1.239734082439945, "learning_rate": 8.04182158040922e-06, "loss": 0.7106, "step": 10215 }, { "epoch": 0.3131053083241388, "grad_norm": 1.3287077238061082, "learning_rate": 8.041427658851613e-06, "loss": 0.5931, "step": 10216 }, { "epoch": 0.31313595684688, "grad_norm": 0.5061492334841573, "learning_rate": 8.04103370732604e-06, "loss": 0.4536, "step": 10217 }, { "epoch": 0.3131666053696212, "grad_norm": 0.4837757304352321, "learning_rate": 8.040639725836384e-06, "loss": 0.4432, "step": 10218 }, { "epoch": 0.3131972538923624, "grad_norm": 1.2283213135773463, "learning_rate": 8.040245714386528e-06, "loss": 0.6988, "step": 10219 }, { "epoch": 0.31322790241510357, "grad_norm": 1.260568625467334, "learning_rate": 8.03985167298035e-06, "loss": 0.7147, "step": 10220 }, { "epoch": 0.31325855093784477, "grad_norm": 1.157639333263762, "learning_rate": 8.039457601621738e-06, "loss": 0.7403, "step": 10221 }, { "epoch": 0.313289199460586, "grad_norm": 1.3828470148785965, "learning_rate": 8.039063500314572e-06, "loss": 0.6654, "step": 10222 }, { "epoch": 0.3133198479833272, "grad_norm": 1.2345241218148422, "learning_rate": 8.038669369062736e-06, "loss": 0.7514, "step": 10223 }, { "epoch": 0.3133504965060684, "grad_norm": 0.4951400120014033, "learning_rate": 8.038275207870114e-06, "loss": 0.4351, "step": 10224 }, { "epoch": 0.3133811450288096, "grad_norm": 1.4388004013133697, "learning_rate": 8.037881016740587e-06, "loss": 0.7351, "step": 10225 }, { "epoch": 0.3134117935515508, "grad_norm": 1.3225096771924103, "learning_rate": 8.037486795678042e-06, "loss": 0.7531, "step": 10226 }, { "epoch": 0.313442442074292, "grad_norm": 1.3346683424190176, "learning_rate": 8.037092544686364e-06, "loss": 0.7391, "step": 10227 }, { "epoch": 0.3134730905970332, "grad_norm": 1.2063616056144097, "learning_rate": 8.036698263769434e-06, "loss": 0.6694, "step": 10228 }, { "epoch": 0.3135037391197744, "grad_norm": 1.309092400272816, "learning_rate": 8.03630395293114e-06, "loss": 0.6918, "step": 10229 }, { "epoch": 0.3135343876425156, "grad_norm": 1.1015215722641432, "learning_rate": 8.035909612175366e-06, "loss": 0.6018, "step": 10230 }, { "epoch": 0.3135650361652568, "grad_norm": 1.3029124048352396, "learning_rate": 8.035515241505999e-06, "loss": 0.714, "step": 10231 }, { "epoch": 0.31359568468799803, "grad_norm": 1.3935695934276464, "learning_rate": 8.035120840926922e-06, "loss": 0.723, "step": 10232 }, { "epoch": 0.31362633321073924, "grad_norm": 1.1373685673374307, "learning_rate": 8.034726410442024e-06, "loss": 0.5958, "step": 10233 }, { "epoch": 0.31365698173348044, "grad_norm": 1.2340479062050957, "learning_rate": 8.034331950055188e-06, "loss": 0.6335, "step": 10234 }, { "epoch": 0.31368763025622165, "grad_norm": 1.3275913949989946, "learning_rate": 8.033937459770306e-06, "loss": 0.6837, "step": 10235 }, { "epoch": 0.31371827877896286, "grad_norm": 1.2754810538429788, "learning_rate": 8.03354293959126e-06, "loss": 0.6236, "step": 10236 }, { "epoch": 0.31374892730170406, "grad_norm": 1.2157453005326213, "learning_rate": 8.033148389521939e-06, "loss": 0.7834, "step": 10237 }, { "epoch": 0.31377957582444527, "grad_norm": 1.2606782714146818, "learning_rate": 8.032753809566232e-06, "loss": 0.6548, "step": 10238 }, { "epoch": 0.3138102243471865, "grad_norm": 1.4432293128991383, "learning_rate": 8.032359199728025e-06, "loss": 0.8069, "step": 10239 }, { "epoch": 0.3138408728699277, "grad_norm": 1.46310757747456, "learning_rate": 8.031964560011207e-06, "loss": 0.6368, "step": 10240 }, { "epoch": 0.3138715213926689, "grad_norm": 1.2614872185003232, "learning_rate": 8.031569890419667e-06, "loss": 0.6477, "step": 10241 }, { "epoch": 0.3139021699154101, "grad_norm": 1.5083842146744793, "learning_rate": 8.031175190957295e-06, "loss": 0.7789, "step": 10242 }, { "epoch": 0.3139328184381513, "grad_norm": 1.192967285928218, "learning_rate": 8.030780461627975e-06, "loss": 0.6232, "step": 10243 }, { "epoch": 0.3139634669608925, "grad_norm": 1.0947752373857706, "learning_rate": 8.0303857024356e-06, "loss": 0.6767, "step": 10244 }, { "epoch": 0.3139941154836337, "grad_norm": 1.2553724031878548, "learning_rate": 8.02999091338406e-06, "loss": 0.7402, "step": 10245 }, { "epoch": 0.3140247640063749, "grad_norm": 1.2425756392249319, "learning_rate": 8.029596094477246e-06, "loss": 0.7125, "step": 10246 }, { "epoch": 0.3140554125291161, "grad_norm": 1.4032064337013834, "learning_rate": 8.029201245719046e-06, "loss": 0.7183, "step": 10247 }, { "epoch": 0.3140860610518573, "grad_norm": 1.373450014104975, "learning_rate": 8.028806367113349e-06, "loss": 0.6939, "step": 10248 }, { "epoch": 0.31411670957459853, "grad_norm": 1.4157218141426982, "learning_rate": 8.028411458664047e-06, "loss": 0.7183, "step": 10249 }, { "epoch": 0.31414735809733974, "grad_norm": 1.4316254564858049, "learning_rate": 8.028016520375036e-06, "loss": 0.6894, "step": 10250 }, { "epoch": 0.3141780066200809, "grad_norm": 0.6461731040570846, "learning_rate": 8.0276215522502e-06, "loss": 0.443, "step": 10251 }, { "epoch": 0.3142086551428221, "grad_norm": 0.5198870292569139, "learning_rate": 8.027226554293435e-06, "loss": 0.4536, "step": 10252 }, { "epoch": 0.3142393036655633, "grad_norm": 1.1675193228691219, "learning_rate": 8.026831526508633e-06, "loss": 0.6697, "step": 10253 }, { "epoch": 0.3142699521883045, "grad_norm": 1.2616026209093318, "learning_rate": 8.026436468899686e-06, "loss": 0.6812, "step": 10254 }, { "epoch": 0.3143006007110457, "grad_norm": 1.143483082952483, "learning_rate": 8.026041381470486e-06, "loss": 0.6718, "step": 10255 }, { "epoch": 0.3143312492337869, "grad_norm": 1.258809026906852, "learning_rate": 8.025646264224924e-06, "loss": 0.5861, "step": 10256 }, { "epoch": 0.3143618977565281, "grad_norm": 0.5729104697161461, "learning_rate": 8.025251117166896e-06, "loss": 0.4321, "step": 10257 }, { "epoch": 0.3143925462792693, "grad_norm": 1.2015177806893136, "learning_rate": 8.024855940300298e-06, "loss": 0.7113, "step": 10258 }, { "epoch": 0.31442319480201053, "grad_norm": 1.306553480554251, "learning_rate": 8.024460733629017e-06, "loss": 0.6902, "step": 10259 }, { "epoch": 0.31445384332475174, "grad_norm": 0.5263827743862417, "learning_rate": 8.024065497156951e-06, "loss": 0.4283, "step": 10260 }, { "epoch": 0.31448449184749294, "grad_norm": 1.2638512657681251, "learning_rate": 8.023670230887995e-06, "loss": 0.6479, "step": 10261 }, { "epoch": 0.31451514037023415, "grad_norm": 1.3584580311744128, "learning_rate": 8.023274934826042e-06, "loss": 0.7677, "step": 10262 }, { "epoch": 0.31454578889297535, "grad_norm": 1.088038224315568, "learning_rate": 8.022879608974988e-06, "loss": 0.665, "step": 10263 }, { "epoch": 0.31457643741571656, "grad_norm": 0.4675734955805233, "learning_rate": 8.022484253338726e-06, "loss": 0.4371, "step": 10264 }, { "epoch": 0.31460708593845776, "grad_norm": 1.3542848047681386, "learning_rate": 8.022088867921157e-06, "loss": 0.631, "step": 10265 }, { "epoch": 0.31463773446119897, "grad_norm": 1.1622892356223686, "learning_rate": 8.02169345272617e-06, "loss": 0.7472, "step": 10266 }, { "epoch": 0.3146683829839402, "grad_norm": 1.1454903440338742, "learning_rate": 8.021298007757663e-06, "loss": 0.6111, "step": 10267 }, { "epoch": 0.3146990315066814, "grad_norm": 1.1635731790473665, "learning_rate": 8.020902533019536e-06, "loss": 0.6664, "step": 10268 }, { "epoch": 0.3147296800294226, "grad_norm": 1.2712544237805403, "learning_rate": 8.020507028515684e-06, "loss": 0.8641, "step": 10269 }, { "epoch": 0.3147603285521638, "grad_norm": 1.2131976874212171, "learning_rate": 8.020111494250003e-06, "loss": 0.6874, "step": 10270 }, { "epoch": 0.314790977074905, "grad_norm": 1.3067488328069956, "learning_rate": 8.019715930226389e-06, "loss": 0.7388, "step": 10271 }, { "epoch": 0.3148216255976462, "grad_norm": 0.5041452125762554, "learning_rate": 8.019320336448743e-06, "loss": 0.4276, "step": 10272 }, { "epoch": 0.3148522741203874, "grad_norm": 1.2060133133370765, "learning_rate": 8.018924712920961e-06, "loss": 0.7416, "step": 10273 }, { "epoch": 0.3148829226431286, "grad_norm": 1.2468948434326588, "learning_rate": 8.018529059646941e-06, "loss": 0.6215, "step": 10274 }, { "epoch": 0.3149135711658698, "grad_norm": 0.5226405098012602, "learning_rate": 8.018133376630582e-06, "loss": 0.4393, "step": 10275 }, { "epoch": 0.314944219688611, "grad_norm": 1.2339781857122396, "learning_rate": 8.017737663875782e-06, "loss": 0.7864, "step": 10276 }, { "epoch": 0.31497486821135223, "grad_norm": 1.3130482924539653, "learning_rate": 8.01734192138644e-06, "loss": 0.7397, "step": 10277 }, { "epoch": 0.31500551673409344, "grad_norm": 1.0706563418671373, "learning_rate": 8.016946149166458e-06, "loss": 0.5121, "step": 10278 }, { "epoch": 0.31503616525683464, "grad_norm": 1.179063608963751, "learning_rate": 8.016550347219734e-06, "loss": 0.6659, "step": 10279 }, { "epoch": 0.31506681377957585, "grad_norm": 1.3021235951962573, "learning_rate": 8.016154515550165e-06, "loss": 0.6302, "step": 10280 }, { "epoch": 0.31509746230231706, "grad_norm": 1.1704685542798314, "learning_rate": 8.015758654161657e-06, "loss": 0.6121, "step": 10281 }, { "epoch": 0.3151281108250582, "grad_norm": 1.1947027981250817, "learning_rate": 8.015362763058105e-06, "loss": 0.7101, "step": 10282 }, { "epoch": 0.3151587593477994, "grad_norm": 1.2385222483150182, "learning_rate": 8.014966842243414e-06, "loss": 0.6787, "step": 10283 }, { "epoch": 0.3151894078705406, "grad_norm": 1.2017733652494793, "learning_rate": 8.014570891721481e-06, "loss": 0.6632, "step": 10284 }, { "epoch": 0.3152200563932818, "grad_norm": 1.290214142696104, "learning_rate": 8.014174911496213e-06, "loss": 0.7206, "step": 10285 }, { "epoch": 0.31525070491602303, "grad_norm": 1.1394555087445615, "learning_rate": 8.013778901571506e-06, "loss": 0.6717, "step": 10286 }, { "epoch": 0.31528135343876423, "grad_norm": 1.2356803109312555, "learning_rate": 8.013382861951264e-06, "loss": 0.6487, "step": 10287 }, { "epoch": 0.31531200196150544, "grad_norm": 1.3226586770362192, "learning_rate": 8.01298679263939e-06, "loss": 0.7134, "step": 10288 }, { "epoch": 0.31534265048424664, "grad_norm": 1.173382677981328, "learning_rate": 8.012590693639786e-06, "loss": 0.6443, "step": 10289 }, { "epoch": 0.31537329900698785, "grad_norm": 1.3086376278575529, "learning_rate": 8.012194564956357e-06, "loss": 0.7483, "step": 10290 }, { "epoch": 0.31540394752972906, "grad_norm": 1.2579975032248027, "learning_rate": 8.011798406593004e-06, "loss": 0.7786, "step": 10291 }, { "epoch": 0.31543459605247026, "grad_norm": 1.205414269526742, "learning_rate": 8.011402218553628e-06, "loss": 0.7137, "step": 10292 }, { "epoch": 0.31546524457521147, "grad_norm": 1.1098270145788167, "learning_rate": 8.011006000842137e-06, "loss": 0.7696, "step": 10293 }, { "epoch": 0.3154958930979527, "grad_norm": 1.1866194832557246, "learning_rate": 8.010609753462433e-06, "loss": 0.6295, "step": 10294 }, { "epoch": 0.3155265416206939, "grad_norm": 0.5796434766788479, "learning_rate": 8.010213476418422e-06, "loss": 0.434, "step": 10295 }, { "epoch": 0.3155571901434351, "grad_norm": 1.279151187494814, "learning_rate": 8.009817169714007e-06, "loss": 0.6273, "step": 10296 }, { "epoch": 0.3155878386661763, "grad_norm": 1.2382676511892792, "learning_rate": 8.009420833353094e-06, "loss": 0.714, "step": 10297 }, { "epoch": 0.3156184871889175, "grad_norm": 1.2575407500816205, "learning_rate": 8.009024467339586e-06, "loss": 0.6801, "step": 10298 }, { "epoch": 0.3156491357116587, "grad_norm": 1.299247580262184, "learning_rate": 8.00862807167739e-06, "loss": 0.645, "step": 10299 }, { "epoch": 0.3156797842343999, "grad_norm": 0.45144466253708293, "learning_rate": 8.008231646370412e-06, "loss": 0.4504, "step": 10300 }, { "epoch": 0.3157104327571411, "grad_norm": 1.1560665910081918, "learning_rate": 8.007835191422559e-06, "loss": 0.6617, "step": 10301 }, { "epoch": 0.3157410812798823, "grad_norm": 1.1779523396372877, "learning_rate": 8.007438706837735e-06, "loss": 0.708, "step": 10302 }, { "epoch": 0.3157717298026235, "grad_norm": 1.1778420273757488, "learning_rate": 8.007042192619849e-06, "loss": 0.6903, "step": 10303 }, { "epoch": 0.31580237832536473, "grad_norm": 1.3082484153939855, "learning_rate": 8.006645648772806e-06, "loss": 0.7344, "step": 10304 }, { "epoch": 0.31583302684810594, "grad_norm": 1.22272926351463, "learning_rate": 8.006249075300515e-06, "loss": 0.6191, "step": 10305 }, { "epoch": 0.31586367537084714, "grad_norm": 1.4047141638770908, "learning_rate": 8.005852472206883e-06, "loss": 0.6576, "step": 10306 }, { "epoch": 0.31589432389358835, "grad_norm": 1.3099568153077876, "learning_rate": 8.005455839495816e-06, "loss": 0.5974, "step": 10307 }, { "epoch": 0.31592497241632955, "grad_norm": 1.17855535144257, "learning_rate": 8.005059177171225e-06, "loss": 0.6289, "step": 10308 }, { "epoch": 0.31595562093907076, "grad_norm": 1.333437830820264, "learning_rate": 8.004662485237016e-06, "loss": 0.6077, "step": 10309 }, { "epoch": 0.31598626946181196, "grad_norm": 1.3398435362676435, "learning_rate": 8.004265763697099e-06, "loss": 0.6993, "step": 10310 }, { "epoch": 0.31601691798455317, "grad_norm": 1.2354837107627041, "learning_rate": 8.003869012555383e-06, "loss": 0.6795, "step": 10311 }, { "epoch": 0.3160475665072944, "grad_norm": 1.2980938881526016, "learning_rate": 8.00347223181578e-06, "loss": 0.6864, "step": 10312 }, { "epoch": 0.3160782150300355, "grad_norm": 0.5528508345809882, "learning_rate": 8.003075421482191e-06, "loss": 0.4216, "step": 10313 }, { "epoch": 0.31610886355277673, "grad_norm": 1.3301333022100177, "learning_rate": 8.002678581558534e-06, "loss": 0.7184, "step": 10314 }, { "epoch": 0.31613951207551794, "grad_norm": 1.1486677137166892, "learning_rate": 8.002281712048717e-06, "loss": 0.6932, "step": 10315 }, { "epoch": 0.31617016059825914, "grad_norm": 1.203177180334929, "learning_rate": 8.00188481295665e-06, "loss": 0.6532, "step": 10316 }, { "epoch": 0.31620080912100035, "grad_norm": 1.3029810208648323, "learning_rate": 8.001487884286245e-06, "loss": 0.6589, "step": 10317 }, { "epoch": 0.31623145764374155, "grad_norm": 1.160910252552714, "learning_rate": 8.00109092604141e-06, "loss": 0.6016, "step": 10318 }, { "epoch": 0.31626210616648276, "grad_norm": 1.2148837782842212, "learning_rate": 8.00069393822606e-06, "loss": 0.6572, "step": 10319 }, { "epoch": 0.31629275468922396, "grad_norm": 0.4554687447700743, "learning_rate": 8.000296920844102e-06, "loss": 0.4388, "step": 10320 }, { "epoch": 0.31632340321196517, "grad_norm": 1.3788143955354797, "learning_rate": 7.999899873899453e-06, "loss": 0.7115, "step": 10321 }, { "epoch": 0.3163540517347064, "grad_norm": 1.3252404320463336, "learning_rate": 7.999502797396024e-06, "loss": 0.7633, "step": 10322 }, { "epoch": 0.3163847002574476, "grad_norm": 1.08619943800102, "learning_rate": 7.999105691337725e-06, "loss": 0.6197, "step": 10323 }, { "epoch": 0.3164153487801888, "grad_norm": 1.256711292811607, "learning_rate": 7.99870855572847e-06, "loss": 0.7047, "step": 10324 }, { "epoch": 0.31644599730293, "grad_norm": 1.4462909011959262, "learning_rate": 7.998311390572173e-06, "loss": 0.8623, "step": 10325 }, { "epoch": 0.3164766458256712, "grad_norm": 1.342730408751489, "learning_rate": 7.997914195872746e-06, "loss": 0.757, "step": 10326 }, { "epoch": 0.3165072943484124, "grad_norm": 1.0590707099198782, "learning_rate": 7.997516971634106e-06, "loss": 0.695, "step": 10327 }, { "epoch": 0.3165379428711536, "grad_norm": 1.3884697147122216, "learning_rate": 7.99711971786016e-06, "loss": 0.8532, "step": 10328 }, { "epoch": 0.3165685913938948, "grad_norm": 1.1584379911744718, "learning_rate": 7.996722434554828e-06, "loss": 0.6428, "step": 10329 }, { "epoch": 0.316599239916636, "grad_norm": 1.2156018893956184, "learning_rate": 7.996325121722024e-06, "loss": 0.6016, "step": 10330 }, { "epoch": 0.3166298884393772, "grad_norm": 0.49012621655081656, "learning_rate": 7.995927779365662e-06, "loss": 0.4349, "step": 10331 }, { "epoch": 0.31666053696211843, "grad_norm": 1.171592045876744, "learning_rate": 7.995530407489659e-06, "loss": 0.599, "step": 10332 }, { "epoch": 0.31669118548485964, "grad_norm": 1.1564598916806421, "learning_rate": 7.995133006097923e-06, "loss": 0.7055, "step": 10333 }, { "epoch": 0.31672183400760084, "grad_norm": 1.5009196109850296, "learning_rate": 7.99473557519438e-06, "loss": 0.652, "step": 10334 }, { "epoch": 0.31675248253034205, "grad_norm": 0.46625154089969956, "learning_rate": 7.99433811478294e-06, "loss": 0.4323, "step": 10335 }, { "epoch": 0.31678313105308326, "grad_norm": 1.215009256784411, "learning_rate": 7.99394062486752e-06, "loss": 0.731, "step": 10336 }, { "epoch": 0.31681377957582446, "grad_norm": 1.2917593458718657, "learning_rate": 7.993543105452036e-06, "loss": 0.6486, "step": 10337 }, { "epoch": 0.31684442809856567, "grad_norm": 1.3793154684692297, "learning_rate": 7.993145556540407e-06, "loss": 0.6246, "step": 10338 }, { "epoch": 0.3168750766213069, "grad_norm": 0.465831648659385, "learning_rate": 7.99274797813655e-06, "loss": 0.4264, "step": 10339 }, { "epoch": 0.3169057251440481, "grad_norm": 1.376645333606266, "learning_rate": 7.99235037024438e-06, "loss": 0.7003, "step": 10340 }, { "epoch": 0.3169363736667893, "grad_norm": 1.455190761025933, "learning_rate": 7.991952732867817e-06, "loss": 0.7301, "step": 10341 }, { "epoch": 0.3169670221895305, "grad_norm": 2.1958120051666343, "learning_rate": 7.991555066010777e-06, "loss": 0.6768, "step": 10342 }, { "epoch": 0.3169976707122717, "grad_norm": 1.165910251803104, "learning_rate": 7.99115736967718e-06, "loss": 0.5864, "step": 10343 }, { "epoch": 0.31702831923501285, "grad_norm": 1.151243437993046, "learning_rate": 7.990759643870944e-06, "loss": 0.6123, "step": 10344 }, { "epoch": 0.31705896775775405, "grad_norm": 0.4684286881265136, "learning_rate": 7.990361888595987e-06, "loss": 0.4411, "step": 10345 }, { "epoch": 0.31708961628049526, "grad_norm": 1.2060969478942822, "learning_rate": 7.989964103856232e-06, "loss": 0.6258, "step": 10346 }, { "epoch": 0.31712026480323646, "grad_norm": 1.7299823992880596, "learning_rate": 7.989566289655596e-06, "loss": 0.6838, "step": 10347 }, { "epoch": 0.31715091332597767, "grad_norm": 1.2063348836733814, "learning_rate": 7.989168445997994e-06, "loss": 0.7261, "step": 10348 }, { "epoch": 0.3171815618487189, "grad_norm": 1.3058556355838697, "learning_rate": 7.988770572887353e-06, "loss": 0.6287, "step": 10349 }, { "epoch": 0.3172122103714601, "grad_norm": 1.266897164154356, "learning_rate": 7.988372670327591e-06, "loss": 0.6515, "step": 10350 }, { "epoch": 0.3172428588942013, "grad_norm": 1.3109959013930679, "learning_rate": 7.987974738322629e-06, "loss": 0.6311, "step": 10351 }, { "epoch": 0.3172735074169425, "grad_norm": 1.5143564664614162, "learning_rate": 7.987576776876387e-06, "loss": 0.6618, "step": 10352 }, { "epoch": 0.3173041559396837, "grad_norm": 1.2381216607947494, "learning_rate": 7.987178785992787e-06, "loss": 0.7239, "step": 10353 }, { "epoch": 0.3173348044624249, "grad_norm": 1.0801507615962467, "learning_rate": 7.98678076567575e-06, "loss": 0.638, "step": 10354 }, { "epoch": 0.3173654529851661, "grad_norm": 1.5647076775915771, "learning_rate": 7.986382715929196e-06, "loss": 0.8361, "step": 10355 }, { "epoch": 0.3173961015079073, "grad_norm": 1.3306066354567196, "learning_rate": 7.985984636757051e-06, "loss": 0.7329, "step": 10356 }, { "epoch": 0.3174267500306485, "grad_norm": 1.2565034199295206, "learning_rate": 7.985586528163234e-06, "loss": 0.7008, "step": 10357 }, { "epoch": 0.3174573985533897, "grad_norm": 1.4126911709461714, "learning_rate": 7.98518839015167e-06, "loss": 0.821, "step": 10358 }, { "epoch": 0.31748804707613093, "grad_norm": 1.2307880187817135, "learning_rate": 7.984790222726281e-06, "loss": 0.6638, "step": 10359 }, { "epoch": 0.31751869559887214, "grad_norm": 1.3205994012756197, "learning_rate": 7.984392025890991e-06, "loss": 0.7546, "step": 10360 }, { "epoch": 0.31754934412161334, "grad_norm": 1.2774832740786757, "learning_rate": 7.98399379964972e-06, "loss": 0.6277, "step": 10361 }, { "epoch": 0.31757999264435455, "grad_norm": 1.462459971283194, "learning_rate": 7.983595544006398e-06, "loss": 0.7631, "step": 10362 }, { "epoch": 0.31761064116709575, "grad_norm": 1.1647906235802987, "learning_rate": 7.983197258964943e-06, "loss": 0.645, "step": 10363 }, { "epoch": 0.31764128968983696, "grad_norm": 1.348777566851076, "learning_rate": 7.982798944529284e-06, "loss": 0.7085, "step": 10364 }, { "epoch": 0.31767193821257816, "grad_norm": 1.2906207667345155, "learning_rate": 7.982400600703344e-06, "loss": 0.652, "step": 10365 }, { "epoch": 0.31770258673531937, "grad_norm": 1.1607551453539597, "learning_rate": 7.982002227491045e-06, "loss": 0.6682, "step": 10366 }, { "epoch": 0.3177332352580606, "grad_norm": 1.1822974106021682, "learning_rate": 7.981603824896319e-06, "loss": 0.6052, "step": 10367 }, { "epoch": 0.3177638837808018, "grad_norm": 1.060954705558918, "learning_rate": 7.981205392923085e-06, "loss": 0.63, "step": 10368 }, { "epoch": 0.317794532303543, "grad_norm": 1.2405021155762022, "learning_rate": 7.980806931575273e-06, "loss": 0.6129, "step": 10369 }, { "epoch": 0.3178251808262842, "grad_norm": 1.2280859086680405, "learning_rate": 7.980408440856806e-06, "loss": 0.6602, "step": 10370 }, { "epoch": 0.3178558293490254, "grad_norm": 1.2280301581208677, "learning_rate": 7.980009920771613e-06, "loss": 0.6979, "step": 10371 }, { "epoch": 0.3178864778717666, "grad_norm": 1.2427695384473447, "learning_rate": 7.979611371323619e-06, "loss": 0.6757, "step": 10372 }, { "epoch": 0.3179171263945078, "grad_norm": 1.0267330821534633, "learning_rate": 7.979212792516752e-06, "loss": 0.6141, "step": 10373 }, { "epoch": 0.317947774917249, "grad_norm": 1.3584067437337066, "learning_rate": 7.978814184354941e-06, "loss": 0.6991, "step": 10374 }, { "epoch": 0.31797842343999017, "grad_norm": 1.1914611168417326, "learning_rate": 7.978415546842108e-06, "loss": 0.5357, "step": 10375 }, { "epoch": 0.31800907196273137, "grad_norm": 1.201230182674995, "learning_rate": 7.978016879982188e-06, "loss": 0.7155, "step": 10376 }, { "epoch": 0.3180397204854726, "grad_norm": 0.497883184266813, "learning_rate": 7.977618183779106e-06, "loss": 0.4199, "step": 10377 }, { "epoch": 0.3180703690082138, "grad_norm": 1.3107880179268245, "learning_rate": 7.977219458236787e-06, "loss": 0.7306, "step": 10378 }, { "epoch": 0.318101017530955, "grad_norm": 1.3894753928289505, "learning_rate": 7.976820703359166e-06, "loss": 0.6669, "step": 10379 }, { "epoch": 0.3181316660536962, "grad_norm": 1.3592626251881055, "learning_rate": 7.976421919150165e-06, "loss": 0.6788, "step": 10380 }, { "epoch": 0.3181623145764374, "grad_norm": 1.3347796316523661, "learning_rate": 7.976023105613722e-06, "loss": 0.6297, "step": 10381 }, { "epoch": 0.3181929630991786, "grad_norm": 1.3927942045903199, "learning_rate": 7.975624262753758e-06, "loss": 0.7394, "step": 10382 }, { "epoch": 0.3182236116219198, "grad_norm": 1.1967325878542232, "learning_rate": 7.97522539057421e-06, "loss": 0.6222, "step": 10383 }, { "epoch": 0.318254260144661, "grad_norm": 1.1952965055194384, "learning_rate": 7.974826489079002e-06, "loss": 0.6316, "step": 10384 }, { "epoch": 0.3182849086674022, "grad_norm": 1.4078258016789151, "learning_rate": 7.97442755827207e-06, "loss": 0.8076, "step": 10385 }, { "epoch": 0.31831555719014343, "grad_norm": 1.2451478886169087, "learning_rate": 7.97402859815734e-06, "loss": 0.7049, "step": 10386 }, { "epoch": 0.31834620571288463, "grad_norm": 1.2083362865933696, "learning_rate": 7.973629608738746e-06, "loss": 0.6878, "step": 10387 }, { "epoch": 0.31837685423562584, "grad_norm": 1.289599358142908, "learning_rate": 7.97323059002022e-06, "loss": 0.6151, "step": 10388 }, { "epoch": 0.31840750275836704, "grad_norm": 1.3090171217211124, "learning_rate": 7.972831542005692e-06, "loss": 0.7057, "step": 10389 }, { "epoch": 0.31843815128110825, "grad_norm": 1.394369103791647, "learning_rate": 7.972432464699093e-06, "loss": 0.7303, "step": 10390 }, { "epoch": 0.31846879980384946, "grad_norm": 1.4297329461050692, "learning_rate": 7.972033358104355e-06, "loss": 0.7101, "step": 10391 }, { "epoch": 0.31849944832659066, "grad_norm": 1.3791566598662572, "learning_rate": 7.971634222225416e-06, "loss": 0.6448, "step": 10392 }, { "epoch": 0.31853009684933187, "grad_norm": 1.2715602023030523, "learning_rate": 7.971235057066202e-06, "loss": 0.7435, "step": 10393 }, { "epoch": 0.3185607453720731, "grad_norm": 1.3168331653671026, "learning_rate": 7.97083586263065e-06, "loss": 0.6659, "step": 10394 }, { "epoch": 0.3185913938948143, "grad_norm": 0.4703435881779382, "learning_rate": 7.970436638922691e-06, "loss": 0.4443, "step": 10395 }, { "epoch": 0.3186220424175555, "grad_norm": 0.44895819593397507, "learning_rate": 7.97003738594626e-06, "loss": 0.4375, "step": 10396 }, { "epoch": 0.3186526909402967, "grad_norm": 1.2588210682622465, "learning_rate": 7.969638103705291e-06, "loss": 0.7178, "step": 10397 }, { "epoch": 0.3186833394630379, "grad_norm": 1.3183485378276585, "learning_rate": 7.969238792203719e-06, "loss": 0.7203, "step": 10398 }, { "epoch": 0.3187139879857791, "grad_norm": 1.3054875714189937, "learning_rate": 7.968839451445477e-06, "loss": 0.6209, "step": 10399 }, { "epoch": 0.3187446365085203, "grad_norm": 1.1207579476814296, "learning_rate": 7.968440081434499e-06, "loss": 0.7025, "step": 10400 }, { "epoch": 0.3187752850312615, "grad_norm": 1.3716092475843784, "learning_rate": 7.96804068217472e-06, "loss": 0.7322, "step": 10401 }, { "epoch": 0.3188059335540027, "grad_norm": 1.2778286723300805, "learning_rate": 7.96764125367008e-06, "loss": 0.6837, "step": 10402 }, { "epoch": 0.3188365820767439, "grad_norm": 1.2079496469865996, "learning_rate": 7.96724179592451e-06, "loss": 0.6529, "step": 10403 }, { "epoch": 0.31886723059948513, "grad_norm": 1.297816580183321, "learning_rate": 7.966842308941948e-06, "loss": 0.809, "step": 10404 }, { "epoch": 0.31889787912222634, "grad_norm": 1.3233303361648012, "learning_rate": 7.966442792726328e-06, "loss": 0.6308, "step": 10405 }, { "epoch": 0.3189285276449675, "grad_norm": 0.5473010427025213, "learning_rate": 7.96604324728159e-06, "loss": 0.4702, "step": 10406 }, { "epoch": 0.3189591761677087, "grad_norm": 1.2975856052816617, "learning_rate": 7.965643672611667e-06, "loss": 0.7084, "step": 10407 }, { "epoch": 0.3189898246904499, "grad_norm": 1.2997928630572744, "learning_rate": 7.965244068720501e-06, "loss": 0.5831, "step": 10408 }, { "epoch": 0.3190204732131911, "grad_norm": 1.2053206848810596, "learning_rate": 7.964844435612025e-06, "loss": 0.7056, "step": 10409 }, { "epoch": 0.3190511217359323, "grad_norm": 0.4566410609786508, "learning_rate": 7.964444773290177e-06, "loss": 0.4453, "step": 10410 }, { "epoch": 0.3190817702586735, "grad_norm": 1.1995388686418953, "learning_rate": 7.964045081758898e-06, "loss": 0.6992, "step": 10411 }, { "epoch": 0.3191124187814147, "grad_norm": 1.5258809501381128, "learning_rate": 7.963645361022123e-06, "loss": 0.6944, "step": 10412 }, { "epoch": 0.3191430673041559, "grad_norm": 0.4751276697480437, "learning_rate": 7.963245611083792e-06, "loss": 0.4345, "step": 10413 }, { "epoch": 0.31917371582689713, "grad_norm": 1.2466238403086067, "learning_rate": 7.962845831947845e-06, "loss": 0.5978, "step": 10414 }, { "epoch": 0.31920436434963834, "grad_norm": 1.1957012263450069, "learning_rate": 7.96244602361822e-06, "loss": 0.6624, "step": 10415 }, { "epoch": 0.31923501287237954, "grad_norm": 1.028091008111202, "learning_rate": 7.962046186098854e-06, "loss": 0.6514, "step": 10416 }, { "epoch": 0.31926566139512075, "grad_norm": 0.4965463157463059, "learning_rate": 7.961646319393693e-06, "loss": 0.4605, "step": 10417 }, { "epoch": 0.31929630991786195, "grad_norm": 1.069593883428117, "learning_rate": 7.96124642350667e-06, "loss": 0.526, "step": 10418 }, { "epoch": 0.31932695844060316, "grad_norm": 1.270601011944556, "learning_rate": 7.96084649844173e-06, "loss": 0.6797, "step": 10419 }, { "epoch": 0.31935760696334436, "grad_norm": 1.3356229502885246, "learning_rate": 7.96044654420281e-06, "loss": 0.6552, "step": 10420 }, { "epoch": 0.31938825548608557, "grad_norm": 1.0648583526097812, "learning_rate": 7.960046560793854e-06, "loss": 0.6388, "step": 10421 }, { "epoch": 0.3194189040088268, "grad_norm": 1.3254870418666163, "learning_rate": 7.959646548218802e-06, "loss": 0.7527, "step": 10422 }, { "epoch": 0.319449552531568, "grad_norm": 0.4863549596378163, "learning_rate": 7.959246506481595e-06, "loss": 0.4165, "step": 10423 }, { "epoch": 0.3194802010543092, "grad_norm": 1.1429540826017979, "learning_rate": 7.958846435586175e-06, "loss": 0.6181, "step": 10424 }, { "epoch": 0.3195108495770504, "grad_norm": 1.427950238206476, "learning_rate": 7.958446335536484e-06, "loss": 0.6385, "step": 10425 }, { "epoch": 0.3195414980997916, "grad_norm": 1.219667275344693, "learning_rate": 7.958046206336463e-06, "loss": 0.6484, "step": 10426 }, { "epoch": 0.3195721466225328, "grad_norm": 1.273144246043749, "learning_rate": 7.957646047990058e-06, "loss": 0.5577, "step": 10427 }, { "epoch": 0.319602795145274, "grad_norm": 1.220324532578421, "learning_rate": 7.957245860501209e-06, "loss": 0.6065, "step": 10428 }, { "epoch": 0.3196334436680152, "grad_norm": 1.0730930418493845, "learning_rate": 7.956845643873861e-06, "loss": 0.5772, "step": 10429 }, { "epoch": 0.3196640921907564, "grad_norm": 1.8099683573469718, "learning_rate": 7.956445398111954e-06, "loss": 0.7045, "step": 10430 }, { "epoch": 0.3196947407134976, "grad_norm": 1.2629277276639883, "learning_rate": 7.956045123219436e-06, "loss": 0.789, "step": 10431 }, { "epoch": 0.31972538923623883, "grad_norm": 1.4815555791615285, "learning_rate": 7.955644819200248e-06, "loss": 0.6497, "step": 10432 }, { "epoch": 0.31975603775898004, "grad_norm": 1.288326223813294, "learning_rate": 7.955244486058335e-06, "loss": 0.6761, "step": 10433 }, { "epoch": 0.31978668628172124, "grad_norm": 1.2422200304034328, "learning_rate": 7.954844123797642e-06, "loss": 0.6778, "step": 10434 }, { "epoch": 0.31981733480446245, "grad_norm": 0.49124627524788994, "learning_rate": 7.954443732422116e-06, "loss": 0.4329, "step": 10435 }, { "epoch": 0.31984798332720366, "grad_norm": 0.4875901486060125, "learning_rate": 7.954043311935697e-06, "loss": 0.4233, "step": 10436 }, { "epoch": 0.3198786318499448, "grad_norm": 1.095734739464761, "learning_rate": 7.953642862342335e-06, "loss": 0.6201, "step": 10437 }, { "epoch": 0.319909280372686, "grad_norm": 1.2895753811348474, "learning_rate": 7.953242383645974e-06, "loss": 0.7429, "step": 10438 }, { "epoch": 0.3199399288954272, "grad_norm": 1.2106451724622362, "learning_rate": 7.952841875850562e-06, "loss": 0.6091, "step": 10439 }, { "epoch": 0.3199705774181684, "grad_norm": 1.2701341390500536, "learning_rate": 7.95244133896004e-06, "loss": 0.5752, "step": 10440 }, { "epoch": 0.32000122594090963, "grad_norm": 1.1668601883472673, "learning_rate": 7.95204077297836e-06, "loss": 0.6431, "step": 10441 }, { "epoch": 0.32003187446365083, "grad_norm": 1.1529708666410656, "learning_rate": 7.951640177909467e-06, "loss": 0.6729, "step": 10442 }, { "epoch": 0.32006252298639204, "grad_norm": 1.5042274260505826, "learning_rate": 7.951239553757308e-06, "loss": 0.8029, "step": 10443 }, { "epoch": 0.32009317150913325, "grad_norm": 1.3070388688956787, "learning_rate": 7.95083890052583e-06, "loss": 0.6853, "step": 10444 }, { "epoch": 0.32012382003187445, "grad_norm": 1.2412501806206138, "learning_rate": 7.95043821821898e-06, "loss": 0.6387, "step": 10445 }, { "epoch": 0.32015446855461566, "grad_norm": 1.169991399234905, "learning_rate": 7.95003750684071e-06, "loss": 0.6848, "step": 10446 }, { "epoch": 0.32018511707735686, "grad_norm": 1.2750730284039056, "learning_rate": 7.949636766394966e-06, "loss": 0.6487, "step": 10447 }, { "epoch": 0.32021576560009807, "grad_norm": 1.113404374239898, "learning_rate": 7.949235996885694e-06, "loss": 0.5911, "step": 10448 }, { "epoch": 0.3202464141228393, "grad_norm": 1.367566424261089, "learning_rate": 7.948835198316845e-06, "loss": 0.796, "step": 10449 }, { "epoch": 0.3202770626455805, "grad_norm": 1.2272710891405492, "learning_rate": 7.94843437069237e-06, "loss": 0.5934, "step": 10450 }, { "epoch": 0.3203077111683217, "grad_norm": 1.2616625090860356, "learning_rate": 7.948033514016216e-06, "loss": 0.5903, "step": 10451 }, { "epoch": 0.3203383596910629, "grad_norm": 0.5871393051774398, "learning_rate": 7.947632628292334e-06, "loss": 0.4265, "step": 10452 }, { "epoch": 0.3203690082138041, "grad_norm": 1.2914002121564978, "learning_rate": 7.947231713524672e-06, "loss": 0.6827, "step": 10453 }, { "epoch": 0.3203996567365453, "grad_norm": 0.47253274249993354, "learning_rate": 7.946830769717184e-06, "loss": 0.4316, "step": 10454 }, { "epoch": 0.3204303052592865, "grad_norm": 1.344368941659791, "learning_rate": 7.946429796873816e-06, "loss": 0.6675, "step": 10455 }, { "epoch": 0.3204609537820277, "grad_norm": 1.3148556695388574, "learning_rate": 7.946028794998524e-06, "loss": 0.7521, "step": 10456 }, { "epoch": 0.3204916023047689, "grad_norm": 0.4607874361935077, "learning_rate": 7.945627764095253e-06, "loss": 0.4366, "step": 10457 }, { "epoch": 0.3205222508275101, "grad_norm": 1.252970088028585, "learning_rate": 7.945226704167963e-06, "loss": 0.6014, "step": 10458 }, { "epoch": 0.32055289935025133, "grad_norm": 1.1836482346104338, "learning_rate": 7.944825615220598e-06, "loss": 0.7135, "step": 10459 }, { "epoch": 0.32058354787299254, "grad_norm": 1.220615451489076, "learning_rate": 7.944424497257111e-06, "loss": 0.7753, "step": 10460 }, { "epoch": 0.32061419639573374, "grad_norm": 1.1450810251841412, "learning_rate": 7.944023350281458e-06, "loss": 0.6961, "step": 10461 }, { "epoch": 0.32064484491847495, "grad_norm": 1.2312606987205623, "learning_rate": 7.94362217429759e-06, "loss": 0.6773, "step": 10462 }, { "epoch": 0.32067549344121615, "grad_norm": 0.5299784589104264, "learning_rate": 7.943220969309458e-06, "loss": 0.4183, "step": 10463 }, { "epoch": 0.32070614196395736, "grad_norm": 0.4877601774551149, "learning_rate": 7.94281973532102e-06, "loss": 0.46, "step": 10464 }, { "epoch": 0.32073679048669856, "grad_norm": 1.278267464161068, "learning_rate": 7.942418472336222e-06, "loss": 0.7277, "step": 10465 }, { "epoch": 0.32076743900943977, "grad_norm": 0.4251178519134276, "learning_rate": 7.942017180359025e-06, "loss": 0.4533, "step": 10466 }, { "epoch": 0.320798087532181, "grad_norm": 1.2870373787045772, "learning_rate": 7.941615859393379e-06, "loss": 0.6686, "step": 10467 }, { "epoch": 0.3208287360549221, "grad_norm": 1.2563064934536872, "learning_rate": 7.941214509443237e-06, "loss": 0.6795, "step": 10468 }, { "epoch": 0.32085938457766333, "grad_norm": 1.204559720907132, "learning_rate": 7.940813130512559e-06, "loss": 0.6102, "step": 10469 }, { "epoch": 0.32089003310040454, "grad_norm": 1.2535876307693485, "learning_rate": 7.940411722605296e-06, "loss": 0.6761, "step": 10470 }, { "epoch": 0.32092068162314574, "grad_norm": 0.5552590454143692, "learning_rate": 7.940010285725403e-06, "loss": 0.4368, "step": 10471 }, { "epoch": 0.32095133014588695, "grad_norm": 1.358649620617282, "learning_rate": 7.939608819876837e-06, "loss": 0.7587, "step": 10472 }, { "epoch": 0.32098197866862815, "grad_norm": 1.077674799238062, "learning_rate": 7.939207325063553e-06, "loss": 0.6589, "step": 10473 }, { "epoch": 0.32101262719136936, "grad_norm": 1.1906758042110315, "learning_rate": 7.938805801289509e-06, "loss": 0.7787, "step": 10474 }, { "epoch": 0.32104327571411057, "grad_norm": 0.46451886283738214, "learning_rate": 7.938404248558658e-06, "loss": 0.4319, "step": 10475 }, { "epoch": 0.32107392423685177, "grad_norm": 0.46704802530933764, "learning_rate": 7.938002666874958e-06, "loss": 0.4417, "step": 10476 }, { "epoch": 0.321104572759593, "grad_norm": 1.1761889749969194, "learning_rate": 7.937601056242365e-06, "loss": 0.6944, "step": 10477 }, { "epoch": 0.3211352212823342, "grad_norm": 1.267936631688847, "learning_rate": 7.937199416664839e-06, "loss": 0.6461, "step": 10478 }, { "epoch": 0.3211658698050754, "grad_norm": 1.2208743753668163, "learning_rate": 7.936797748146335e-06, "loss": 0.6716, "step": 10479 }, { "epoch": 0.3211965183278166, "grad_norm": 1.2474467985076407, "learning_rate": 7.936396050690812e-06, "loss": 0.6713, "step": 10480 }, { "epoch": 0.3212271668505578, "grad_norm": 0.5762650329526224, "learning_rate": 7.935994324302226e-06, "loss": 0.4399, "step": 10481 }, { "epoch": 0.321257815373299, "grad_norm": 1.169743426526108, "learning_rate": 7.935592568984537e-06, "loss": 0.704, "step": 10482 }, { "epoch": 0.3212884638960402, "grad_norm": 1.252326871669237, "learning_rate": 7.935190784741705e-06, "loss": 0.7069, "step": 10483 }, { "epoch": 0.3213191124187814, "grad_norm": 1.2622020715141828, "learning_rate": 7.934788971577685e-06, "loss": 0.6901, "step": 10484 }, { "epoch": 0.3213497609415226, "grad_norm": 0.4737134980000125, "learning_rate": 7.93438712949644e-06, "loss": 0.4409, "step": 10485 }, { "epoch": 0.3213804094642638, "grad_norm": 0.4420094142293267, "learning_rate": 7.933985258501926e-06, "loss": 0.4293, "step": 10486 }, { "epoch": 0.32141105798700503, "grad_norm": 1.2609919534038514, "learning_rate": 7.933583358598107e-06, "loss": 0.6796, "step": 10487 }, { "epoch": 0.32144170650974624, "grad_norm": 1.175400768033989, "learning_rate": 7.933181429788937e-06, "loss": 0.6874, "step": 10488 }, { "epoch": 0.32147235503248744, "grad_norm": 1.394444900342321, "learning_rate": 7.932779472078384e-06, "loss": 0.6558, "step": 10489 }, { "epoch": 0.32150300355522865, "grad_norm": 1.167206950475265, "learning_rate": 7.932377485470402e-06, "loss": 0.6803, "step": 10490 }, { "epoch": 0.32153365207796986, "grad_norm": 0.5301698698309367, "learning_rate": 7.931975469968956e-06, "loss": 0.4154, "step": 10491 }, { "epoch": 0.32156430060071106, "grad_norm": 0.5361905854811313, "learning_rate": 7.931573425578003e-06, "loss": 0.4214, "step": 10492 }, { "epoch": 0.32159494912345227, "grad_norm": 1.3357435316584982, "learning_rate": 7.93117135230151e-06, "loss": 0.7648, "step": 10493 }, { "epoch": 0.3216255976461935, "grad_norm": 1.3251302183676117, "learning_rate": 7.930769250143433e-06, "loss": 0.6743, "step": 10494 }, { "epoch": 0.3216562461689347, "grad_norm": 1.113159176447759, "learning_rate": 7.930367119107738e-06, "loss": 0.6332, "step": 10495 }, { "epoch": 0.3216868946916759, "grad_norm": 1.2242726793674739, "learning_rate": 7.929964959198387e-06, "loss": 0.7699, "step": 10496 }, { "epoch": 0.3217175432144171, "grad_norm": 1.2597302855127839, "learning_rate": 7.92956277041934e-06, "loss": 0.6671, "step": 10497 }, { "epoch": 0.3217481917371583, "grad_norm": 1.148453946081623, "learning_rate": 7.929160552774561e-06, "loss": 0.6368, "step": 10498 }, { "epoch": 0.32177884025989945, "grad_norm": 1.2988435657079176, "learning_rate": 7.928758306268014e-06, "loss": 0.7126, "step": 10499 }, { "epoch": 0.32180948878264065, "grad_norm": 1.6824520322395142, "learning_rate": 7.928356030903663e-06, "loss": 0.6935, "step": 10500 }, { "epoch": 0.32184013730538186, "grad_norm": 1.0204577650535183, "learning_rate": 7.927953726685472e-06, "loss": 0.6773, "step": 10501 }, { "epoch": 0.32187078582812306, "grad_norm": 0.7257095391706427, "learning_rate": 7.927551393617401e-06, "loss": 0.4432, "step": 10502 }, { "epoch": 0.32190143435086427, "grad_norm": 1.2770353421778315, "learning_rate": 7.927149031703418e-06, "loss": 0.7186, "step": 10503 }, { "epoch": 0.3219320828736055, "grad_norm": 1.2011746159945909, "learning_rate": 7.926746640947487e-06, "loss": 0.6288, "step": 10504 }, { "epoch": 0.3219627313963467, "grad_norm": 1.3640740281921597, "learning_rate": 7.926344221353573e-06, "loss": 0.7006, "step": 10505 }, { "epoch": 0.3219933799190879, "grad_norm": 1.159146028214944, "learning_rate": 7.925941772925639e-06, "loss": 0.7076, "step": 10506 }, { "epoch": 0.3220240284418291, "grad_norm": 1.3415893903973053, "learning_rate": 7.925539295667654e-06, "loss": 0.7625, "step": 10507 }, { "epoch": 0.3220546769645703, "grad_norm": 1.1783462800757172, "learning_rate": 7.925136789583581e-06, "loss": 0.6683, "step": 10508 }, { "epoch": 0.3220853254873115, "grad_norm": 1.290259295050479, "learning_rate": 7.924734254677386e-06, "loss": 0.7232, "step": 10509 }, { "epoch": 0.3221159740100527, "grad_norm": 1.1055628678039469, "learning_rate": 7.924331690953038e-06, "loss": 0.6678, "step": 10510 }, { "epoch": 0.3221466225327939, "grad_norm": 1.1806927851195077, "learning_rate": 7.9239290984145e-06, "loss": 0.6094, "step": 10511 }, { "epoch": 0.3221772710555351, "grad_norm": 1.4275635077532918, "learning_rate": 7.92352647706574e-06, "loss": 0.7033, "step": 10512 }, { "epoch": 0.3222079195782763, "grad_norm": 0.509183721221722, "learning_rate": 7.923123826910726e-06, "loss": 0.3878, "step": 10513 }, { "epoch": 0.32223856810101753, "grad_norm": 1.329205176071575, "learning_rate": 7.922721147953425e-06, "loss": 0.7179, "step": 10514 }, { "epoch": 0.32226921662375874, "grad_norm": 1.2264764301742714, "learning_rate": 7.922318440197805e-06, "loss": 0.6678, "step": 10515 }, { "epoch": 0.32229986514649994, "grad_norm": 0.4731244640465623, "learning_rate": 7.921915703647836e-06, "loss": 0.4308, "step": 10516 }, { "epoch": 0.32233051366924115, "grad_norm": 1.22703856599072, "learning_rate": 7.921512938307481e-06, "loss": 0.6974, "step": 10517 }, { "epoch": 0.32236116219198235, "grad_norm": 1.0228245732275905, "learning_rate": 7.921110144180712e-06, "loss": 0.6861, "step": 10518 }, { "epoch": 0.32239181071472356, "grad_norm": 1.2430556302358162, "learning_rate": 7.920707321271497e-06, "loss": 0.6883, "step": 10519 }, { "epoch": 0.32242245923746476, "grad_norm": 0.4638404883470302, "learning_rate": 7.920304469583808e-06, "loss": 0.4272, "step": 10520 }, { "epoch": 0.32245310776020597, "grad_norm": 1.2007160542409985, "learning_rate": 7.91990158912161e-06, "loss": 0.642, "step": 10521 }, { "epoch": 0.3224837562829472, "grad_norm": 0.49052393363334984, "learning_rate": 7.919498679888873e-06, "loss": 0.4446, "step": 10522 }, { "epoch": 0.3225144048056884, "grad_norm": 1.3568888009069702, "learning_rate": 7.919095741889572e-06, "loss": 0.7052, "step": 10523 }, { "epoch": 0.3225450533284296, "grad_norm": 1.0721592481967288, "learning_rate": 7.91869277512767e-06, "loss": 0.669, "step": 10524 }, { "epoch": 0.3225757018511708, "grad_norm": 1.2868707003575575, "learning_rate": 7.918289779607144e-06, "loss": 0.6988, "step": 10525 }, { "epoch": 0.322606350373912, "grad_norm": 1.4242612296358415, "learning_rate": 7.91788675533196e-06, "loss": 0.7015, "step": 10526 }, { "epoch": 0.3226369988966532, "grad_norm": 1.249261770069547, "learning_rate": 7.917483702306094e-06, "loss": 0.6634, "step": 10527 }, { "epoch": 0.3226676474193944, "grad_norm": 2.0475732803358886, "learning_rate": 7.917080620533513e-06, "loss": 0.6972, "step": 10528 }, { "epoch": 0.3226982959421356, "grad_norm": 1.1579880063853933, "learning_rate": 7.91667751001819e-06, "loss": 0.5403, "step": 10529 }, { "epoch": 0.32272894446487677, "grad_norm": 1.3321455811459544, "learning_rate": 7.916274370764098e-06, "loss": 0.7427, "step": 10530 }, { "epoch": 0.32275959298761797, "grad_norm": 1.197239196646212, "learning_rate": 7.915871202775209e-06, "loss": 0.7119, "step": 10531 }, { "epoch": 0.3227902415103592, "grad_norm": 0.5032948394081324, "learning_rate": 7.915468006055493e-06, "loss": 0.4157, "step": 10532 }, { "epoch": 0.3228208900331004, "grad_norm": 1.1763691084356391, "learning_rate": 7.915064780608926e-06, "loss": 0.7146, "step": 10533 }, { "epoch": 0.3228515385558416, "grad_norm": 1.3249218419922544, "learning_rate": 7.91466152643948e-06, "loss": 0.6784, "step": 10534 }, { "epoch": 0.3228821870785828, "grad_norm": 1.3421564391422012, "learning_rate": 7.914258243551129e-06, "loss": 0.7219, "step": 10535 }, { "epoch": 0.322912835601324, "grad_norm": 1.1818337725069563, "learning_rate": 7.913854931947844e-06, "loss": 0.6817, "step": 10536 }, { "epoch": 0.3229434841240652, "grad_norm": 1.2743940132534903, "learning_rate": 7.913451591633602e-06, "loss": 0.6794, "step": 10537 }, { "epoch": 0.3229741326468064, "grad_norm": 1.3979748629872717, "learning_rate": 7.913048222612376e-06, "loss": 0.6951, "step": 10538 }, { "epoch": 0.3230047811695476, "grad_norm": 1.192426584597537, "learning_rate": 7.91264482488814e-06, "loss": 0.6614, "step": 10539 }, { "epoch": 0.3230354296922888, "grad_norm": 1.1458921176916317, "learning_rate": 7.91224139846487e-06, "loss": 0.7277, "step": 10540 }, { "epoch": 0.32306607821503003, "grad_norm": 1.2494515704308438, "learning_rate": 7.911837943346538e-06, "loss": 0.6572, "step": 10541 }, { "epoch": 0.32309672673777123, "grad_norm": 1.2853358168457056, "learning_rate": 7.911434459537124e-06, "loss": 0.6099, "step": 10542 }, { "epoch": 0.32312737526051244, "grad_norm": 1.178858602421056, "learning_rate": 7.911030947040602e-06, "loss": 0.6589, "step": 10543 }, { "epoch": 0.32315802378325365, "grad_norm": 1.383912870361646, "learning_rate": 7.910627405860947e-06, "loss": 0.6052, "step": 10544 }, { "epoch": 0.32318867230599485, "grad_norm": 1.1708364554561088, "learning_rate": 7.910223836002133e-06, "loss": 0.6308, "step": 10545 }, { "epoch": 0.32321932082873606, "grad_norm": 1.2719376604891481, "learning_rate": 7.909820237468141e-06, "loss": 0.704, "step": 10546 }, { "epoch": 0.32324996935147726, "grad_norm": 1.2529005175626333, "learning_rate": 7.909416610262945e-06, "loss": 0.6324, "step": 10547 }, { "epoch": 0.32328061787421847, "grad_norm": 1.2571703311651916, "learning_rate": 7.909012954390526e-06, "loss": 0.6833, "step": 10548 }, { "epoch": 0.3233112663969597, "grad_norm": 1.356636383501665, "learning_rate": 7.908609269854852e-06, "loss": 0.5869, "step": 10549 }, { "epoch": 0.3233419149197009, "grad_norm": 1.2011704801995886, "learning_rate": 7.908205556659911e-06, "loss": 0.6762, "step": 10550 }, { "epoch": 0.3233725634424421, "grad_norm": 1.3049824628063516, "learning_rate": 7.907801814809674e-06, "loss": 0.6755, "step": 10551 }, { "epoch": 0.3234032119651833, "grad_norm": 1.2570088040379848, "learning_rate": 7.907398044308123e-06, "loss": 0.7474, "step": 10552 }, { "epoch": 0.3234338604879245, "grad_norm": 1.2477341089095038, "learning_rate": 7.906994245159235e-06, "loss": 0.6424, "step": 10553 }, { "epoch": 0.3234645090106657, "grad_norm": 1.3974731934644906, "learning_rate": 7.90659041736699e-06, "loss": 0.7736, "step": 10554 }, { "epoch": 0.3234951575334069, "grad_norm": 1.2888393996804244, "learning_rate": 7.906186560935366e-06, "loss": 0.6644, "step": 10555 }, { "epoch": 0.3235258060561481, "grad_norm": 1.2573636564631108, "learning_rate": 7.905782675868341e-06, "loss": 0.6354, "step": 10556 }, { "epoch": 0.3235564545788893, "grad_norm": 1.233403256226581, "learning_rate": 7.905378762169896e-06, "loss": 0.7324, "step": 10557 }, { "epoch": 0.3235871031016305, "grad_norm": 1.2300002647471069, "learning_rate": 7.904974819844012e-06, "loss": 0.6509, "step": 10558 }, { "epoch": 0.32361775162437173, "grad_norm": 1.0739732301182512, "learning_rate": 7.904570848894666e-06, "loss": 0.6357, "step": 10559 }, { "epoch": 0.32364840014711294, "grad_norm": 1.0718122261659548, "learning_rate": 7.90416684932584e-06, "loss": 0.7249, "step": 10560 }, { "epoch": 0.3236790486698541, "grad_norm": 0.49088375501088827, "learning_rate": 7.903762821141516e-06, "loss": 0.4244, "step": 10561 }, { "epoch": 0.3237096971925953, "grad_norm": 1.2256441088192758, "learning_rate": 7.903358764345674e-06, "loss": 0.6415, "step": 10562 }, { "epoch": 0.3237403457153365, "grad_norm": 1.1689378952918057, "learning_rate": 7.902954678942296e-06, "loss": 0.615, "step": 10563 }, { "epoch": 0.3237709942380777, "grad_norm": 1.3173282064418956, "learning_rate": 7.902550564935363e-06, "loss": 0.6331, "step": 10564 }, { "epoch": 0.3238016427608189, "grad_norm": 1.3555710510622012, "learning_rate": 7.902146422328853e-06, "loss": 0.6184, "step": 10565 }, { "epoch": 0.3238322912835601, "grad_norm": 1.3317422596413295, "learning_rate": 7.901742251126755e-06, "loss": 0.7184, "step": 10566 }, { "epoch": 0.3238629398063013, "grad_norm": 1.4781183609939266, "learning_rate": 7.901338051333047e-06, "loss": 0.6675, "step": 10567 }, { "epoch": 0.3238935883290425, "grad_norm": 1.222967356905346, "learning_rate": 7.900933822951714e-06, "loss": 0.7292, "step": 10568 }, { "epoch": 0.32392423685178373, "grad_norm": 1.2366977408288187, "learning_rate": 7.900529565986737e-06, "loss": 0.7166, "step": 10569 }, { "epoch": 0.32395488537452494, "grad_norm": 1.285631411210493, "learning_rate": 7.9001252804421e-06, "loss": 0.7309, "step": 10570 }, { "epoch": 0.32398553389726614, "grad_norm": 1.2063645297183974, "learning_rate": 7.899720966321786e-06, "loss": 0.7843, "step": 10571 }, { "epoch": 0.32401618242000735, "grad_norm": 1.2244076675653066, "learning_rate": 7.89931662362978e-06, "loss": 0.6545, "step": 10572 }, { "epoch": 0.32404683094274855, "grad_norm": 1.2008114892853035, "learning_rate": 7.898912252370066e-06, "loss": 0.7537, "step": 10573 }, { "epoch": 0.32407747946548976, "grad_norm": 1.3125801265761798, "learning_rate": 7.898507852546628e-06, "loss": 0.7267, "step": 10574 }, { "epoch": 0.32410812798823097, "grad_norm": 1.3484041650050793, "learning_rate": 7.89810342416345e-06, "loss": 0.6427, "step": 10575 }, { "epoch": 0.32413877651097217, "grad_norm": 1.41933108951626, "learning_rate": 7.897698967224517e-06, "loss": 0.6574, "step": 10576 }, { "epoch": 0.3241694250337134, "grad_norm": 1.2447721025449756, "learning_rate": 7.897294481733816e-06, "loss": 0.7004, "step": 10577 }, { "epoch": 0.3242000735564546, "grad_norm": 1.2641666693031566, "learning_rate": 7.896889967695329e-06, "loss": 0.7228, "step": 10578 }, { "epoch": 0.3242307220791958, "grad_norm": 1.177663060758689, "learning_rate": 7.896485425113045e-06, "loss": 0.644, "step": 10579 }, { "epoch": 0.324261370601937, "grad_norm": 1.2552687120558694, "learning_rate": 7.896080853990951e-06, "loss": 0.6972, "step": 10580 }, { "epoch": 0.3242920191246782, "grad_norm": 1.4934248516247357, "learning_rate": 7.895676254333029e-06, "loss": 0.6853, "step": 10581 }, { "epoch": 0.3243226676474194, "grad_norm": 1.2135537439918196, "learning_rate": 7.895271626143268e-06, "loss": 0.6541, "step": 10582 }, { "epoch": 0.3243533161701606, "grad_norm": 1.1705969842354944, "learning_rate": 7.894866969425656e-06, "loss": 0.7331, "step": 10583 }, { "epoch": 0.3243839646929018, "grad_norm": 1.1628913005979216, "learning_rate": 7.894462284184178e-06, "loss": 0.6871, "step": 10584 }, { "epoch": 0.324414613215643, "grad_norm": 1.1877523407397528, "learning_rate": 7.894057570422824e-06, "loss": 0.6066, "step": 10585 }, { "epoch": 0.3244452617383842, "grad_norm": 1.3494770229180406, "learning_rate": 7.893652828145579e-06, "loss": 0.7357, "step": 10586 }, { "epoch": 0.32447591026112543, "grad_norm": 1.2767462202626845, "learning_rate": 7.893248057356433e-06, "loss": 0.7289, "step": 10587 }, { "epoch": 0.32450655878386664, "grad_norm": 1.232180368804434, "learning_rate": 7.892843258059373e-06, "loss": 0.7117, "step": 10588 }, { "epoch": 0.32453720730660784, "grad_norm": 1.1531375575853986, "learning_rate": 7.892438430258388e-06, "loss": 0.7661, "step": 10589 }, { "epoch": 0.32456785582934905, "grad_norm": 1.307609823951853, "learning_rate": 7.892033573957467e-06, "loss": 0.6491, "step": 10590 }, { "epoch": 0.32459850435209026, "grad_norm": 1.2838177144173948, "learning_rate": 7.8916286891606e-06, "loss": 0.7491, "step": 10591 }, { "epoch": 0.3246291528748314, "grad_norm": 1.1316540269694468, "learning_rate": 7.891223775871776e-06, "loss": 0.6941, "step": 10592 }, { "epoch": 0.3246598013975726, "grad_norm": 1.052559100628433, "learning_rate": 7.890818834094985e-06, "loss": 0.639, "step": 10593 }, { "epoch": 0.3246904499203138, "grad_norm": 1.3532770713752917, "learning_rate": 7.890413863834214e-06, "loss": 0.7141, "step": 10594 }, { "epoch": 0.324721098443055, "grad_norm": 0.48169167334230484, "learning_rate": 7.890008865093458e-06, "loss": 0.452, "step": 10595 }, { "epoch": 0.32475174696579623, "grad_norm": 1.302020883113378, "learning_rate": 7.889603837876702e-06, "loss": 0.6563, "step": 10596 }, { "epoch": 0.32478239548853743, "grad_norm": 1.2623685445532915, "learning_rate": 7.889198782187944e-06, "loss": 0.6268, "step": 10597 }, { "epoch": 0.32481304401127864, "grad_norm": 1.4373956180254013, "learning_rate": 7.888793698031167e-06, "loss": 0.8246, "step": 10598 }, { "epoch": 0.32484369253401985, "grad_norm": 1.4155458461506163, "learning_rate": 7.88838858541037e-06, "loss": 0.7219, "step": 10599 }, { "epoch": 0.32487434105676105, "grad_norm": 1.2512777998724214, "learning_rate": 7.88798344432954e-06, "loss": 0.6103, "step": 10600 }, { "epoch": 0.32490498957950226, "grad_norm": 0.4696973240006153, "learning_rate": 7.88757827479267e-06, "loss": 0.4339, "step": 10601 }, { "epoch": 0.32493563810224346, "grad_norm": 1.2335207582192191, "learning_rate": 7.887173076803753e-06, "loss": 0.6453, "step": 10602 }, { "epoch": 0.32496628662498467, "grad_norm": 1.3194737638685379, "learning_rate": 7.886767850366781e-06, "loss": 0.6559, "step": 10603 }, { "epoch": 0.3249969351477259, "grad_norm": 0.4622596898463914, "learning_rate": 7.886362595485747e-06, "loss": 0.4366, "step": 10604 }, { "epoch": 0.3250275836704671, "grad_norm": 1.309540517852968, "learning_rate": 7.885957312164643e-06, "loss": 0.7802, "step": 10605 }, { "epoch": 0.3250582321932083, "grad_norm": 1.1470855039547512, "learning_rate": 7.885552000407463e-06, "loss": 0.6001, "step": 10606 }, { "epoch": 0.3250888807159495, "grad_norm": 1.2409170413864732, "learning_rate": 7.885146660218202e-06, "loss": 0.6791, "step": 10607 }, { "epoch": 0.3251195292386907, "grad_norm": 1.3593801787651665, "learning_rate": 7.884741291600853e-06, "loss": 0.7114, "step": 10608 }, { "epoch": 0.3251501777614319, "grad_norm": 1.3962803395231391, "learning_rate": 7.884335894559408e-06, "loss": 0.7559, "step": 10609 }, { "epoch": 0.3251808262841731, "grad_norm": 1.2984922908106433, "learning_rate": 7.883930469097864e-06, "loss": 0.7391, "step": 10610 }, { "epoch": 0.3252114748069143, "grad_norm": 1.1306975822590262, "learning_rate": 7.883525015220215e-06, "loss": 0.6561, "step": 10611 }, { "epoch": 0.3252421233296555, "grad_norm": 1.0747285712226067, "learning_rate": 7.883119532930458e-06, "loss": 0.6109, "step": 10612 }, { "epoch": 0.3252727718523967, "grad_norm": 1.3004234119290177, "learning_rate": 7.882714022232585e-06, "loss": 0.706, "step": 10613 }, { "epoch": 0.32530342037513793, "grad_norm": 1.287207439173189, "learning_rate": 7.882308483130594e-06, "loss": 0.7178, "step": 10614 }, { "epoch": 0.32533406889787914, "grad_norm": 1.2518388454242408, "learning_rate": 7.88190291562848e-06, "loss": 0.6192, "step": 10615 }, { "epoch": 0.32536471742062034, "grad_norm": 0.5287183212014344, "learning_rate": 7.881497319730239e-06, "loss": 0.4542, "step": 10616 }, { "epoch": 0.32539536594336155, "grad_norm": 1.2994124931005975, "learning_rate": 7.881091695439867e-06, "loss": 0.7131, "step": 10617 }, { "epoch": 0.32542601446610275, "grad_norm": 1.0999856797298517, "learning_rate": 7.880686042761363e-06, "loss": 0.6049, "step": 10618 }, { "epoch": 0.32545666298884396, "grad_norm": 1.0417242325557718, "learning_rate": 7.88028036169872e-06, "loss": 0.5914, "step": 10619 }, { "epoch": 0.32548731151158516, "grad_norm": 1.2285093951885566, "learning_rate": 7.879874652255938e-06, "loss": 0.7087, "step": 10620 }, { "epoch": 0.32551796003432637, "grad_norm": 1.154325943039858, "learning_rate": 7.879468914437016e-06, "loss": 0.6568, "step": 10621 }, { "epoch": 0.3255486085570676, "grad_norm": 1.2220780677849823, "learning_rate": 7.879063148245949e-06, "loss": 0.7037, "step": 10622 }, { "epoch": 0.3255792570798087, "grad_norm": 1.299089383061952, "learning_rate": 7.878657353686736e-06, "loss": 0.6752, "step": 10623 }, { "epoch": 0.32560990560254993, "grad_norm": 1.1186982107170331, "learning_rate": 7.878251530763377e-06, "loss": 0.6176, "step": 10624 }, { "epoch": 0.32564055412529114, "grad_norm": 1.1032938605714937, "learning_rate": 7.877845679479868e-06, "loss": 0.6806, "step": 10625 }, { "epoch": 0.32567120264803234, "grad_norm": 1.3450825650341425, "learning_rate": 7.87743979984021e-06, "loss": 0.684, "step": 10626 }, { "epoch": 0.32570185117077355, "grad_norm": 1.2747865992989995, "learning_rate": 7.8770338918484e-06, "loss": 0.6607, "step": 10627 }, { "epoch": 0.32573249969351475, "grad_norm": 1.3565265391379728, "learning_rate": 7.87662795550844e-06, "loss": 0.6513, "step": 10628 }, { "epoch": 0.32576314821625596, "grad_norm": 1.1424852108660866, "learning_rate": 7.876221990824329e-06, "loss": 0.6685, "step": 10629 }, { "epoch": 0.32579379673899717, "grad_norm": 0.5020190676349418, "learning_rate": 7.875815997800064e-06, "loss": 0.449, "step": 10630 }, { "epoch": 0.32582444526173837, "grad_norm": 1.3348605373794444, "learning_rate": 7.875409976439651e-06, "loss": 0.6938, "step": 10631 }, { "epoch": 0.3258550937844796, "grad_norm": 1.1438983989635836, "learning_rate": 7.875003926747087e-06, "loss": 0.6809, "step": 10632 }, { "epoch": 0.3258857423072208, "grad_norm": 1.1353438442493333, "learning_rate": 7.874597848726375e-06, "loss": 0.6529, "step": 10633 }, { "epoch": 0.325916390829962, "grad_norm": 1.116471308965278, "learning_rate": 7.874191742381514e-06, "loss": 0.7096, "step": 10634 }, { "epoch": 0.3259470393527032, "grad_norm": 0.45871857306065894, "learning_rate": 7.873785607716507e-06, "loss": 0.4392, "step": 10635 }, { "epoch": 0.3259776878754444, "grad_norm": 1.4345451205562154, "learning_rate": 7.873379444735354e-06, "loss": 0.761, "step": 10636 }, { "epoch": 0.3260083363981856, "grad_norm": 1.2389027307584417, "learning_rate": 7.872973253442058e-06, "loss": 0.7736, "step": 10637 }, { "epoch": 0.3260389849209268, "grad_norm": 0.5011569301805954, "learning_rate": 7.872567033840621e-06, "loss": 0.4485, "step": 10638 }, { "epoch": 0.326069633443668, "grad_norm": 1.1821723453729385, "learning_rate": 7.872160785935047e-06, "loss": 0.6741, "step": 10639 }, { "epoch": 0.3261002819664092, "grad_norm": 1.2437138425995073, "learning_rate": 7.87175450972934e-06, "loss": 0.689, "step": 10640 }, { "epoch": 0.32613093048915043, "grad_norm": 1.3094170139899255, "learning_rate": 7.871348205227498e-06, "loss": 0.719, "step": 10641 }, { "epoch": 0.32616157901189163, "grad_norm": 1.4498157068915725, "learning_rate": 7.870941872433527e-06, "loss": 0.6303, "step": 10642 }, { "epoch": 0.32619222753463284, "grad_norm": 1.1866683334109858, "learning_rate": 7.870535511351433e-06, "loss": 0.6758, "step": 10643 }, { "epoch": 0.32622287605737404, "grad_norm": 1.215626176876729, "learning_rate": 7.870129121985218e-06, "loss": 0.5952, "step": 10644 }, { "epoch": 0.32625352458011525, "grad_norm": 1.2640523672571053, "learning_rate": 7.869722704338887e-06, "loss": 0.6898, "step": 10645 }, { "epoch": 0.32628417310285646, "grad_norm": 1.158730629715728, "learning_rate": 7.869316258416442e-06, "loss": 0.6218, "step": 10646 }, { "epoch": 0.32631482162559766, "grad_norm": 1.4003496190963804, "learning_rate": 7.868909784221891e-06, "loss": 0.7647, "step": 10647 }, { "epoch": 0.32634547014833887, "grad_norm": 1.189062438116954, "learning_rate": 7.868503281759238e-06, "loss": 0.6218, "step": 10648 }, { "epoch": 0.3263761186710801, "grad_norm": 1.1619605150465422, "learning_rate": 7.868096751032489e-06, "loss": 0.7447, "step": 10649 }, { "epoch": 0.3264067671938213, "grad_norm": 1.271054985761126, "learning_rate": 7.867690192045646e-06, "loss": 0.7564, "step": 10650 }, { "epoch": 0.3264374157165625, "grad_norm": 1.5006392035703866, "learning_rate": 7.86728360480272e-06, "loss": 0.7398, "step": 10651 }, { "epoch": 0.3264680642393037, "grad_norm": 1.281944485810855, "learning_rate": 7.866876989307715e-06, "loss": 0.6671, "step": 10652 }, { "epoch": 0.3264987127620449, "grad_norm": 1.2118157114493735, "learning_rate": 7.866470345564636e-06, "loss": 0.6552, "step": 10653 }, { "epoch": 0.32652936128478605, "grad_norm": 0.5376601686317769, "learning_rate": 7.866063673577492e-06, "loss": 0.4337, "step": 10654 }, { "epoch": 0.32656000980752725, "grad_norm": 1.2865241671029994, "learning_rate": 7.865656973350291e-06, "loss": 0.6792, "step": 10655 }, { "epoch": 0.32659065833026846, "grad_norm": 1.237936109010734, "learning_rate": 7.865250244887038e-06, "loss": 0.7075, "step": 10656 }, { "epoch": 0.32662130685300966, "grad_norm": 1.3363932677598431, "learning_rate": 7.86484348819174e-06, "loss": 0.7443, "step": 10657 }, { "epoch": 0.32665195537575087, "grad_norm": 1.190808627418972, "learning_rate": 7.864436703268407e-06, "loss": 0.5522, "step": 10658 }, { "epoch": 0.3266826038984921, "grad_norm": 1.4868485078274583, "learning_rate": 7.864029890121045e-06, "loss": 0.6943, "step": 10659 }, { "epoch": 0.3267132524212333, "grad_norm": 1.202197977267171, "learning_rate": 7.863623048753665e-06, "loss": 0.6456, "step": 10660 }, { "epoch": 0.3267439009439745, "grad_norm": 1.1524312183023158, "learning_rate": 7.863216179170274e-06, "loss": 0.7564, "step": 10661 }, { "epoch": 0.3267745494667157, "grad_norm": 1.3042287061670799, "learning_rate": 7.862809281374882e-06, "loss": 0.6943, "step": 10662 }, { "epoch": 0.3268051979894569, "grad_norm": 1.406423203565765, "learning_rate": 7.862402355371496e-06, "loss": 0.7336, "step": 10663 }, { "epoch": 0.3268358465121981, "grad_norm": 1.2849533073858386, "learning_rate": 7.861995401164128e-06, "loss": 0.7302, "step": 10664 }, { "epoch": 0.3268664950349393, "grad_norm": 1.1244930857247455, "learning_rate": 7.861588418756787e-06, "loss": 0.6166, "step": 10665 }, { "epoch": 0.3268971435576805, "grad_norm": 1.1234121426830446, "learning_rate": 7.861181408153485e-06, "loss": 0.6057, "step": 10666 }, { "epoch": 0.3269277920804217, "grad_norm": 0.4897048749377236, "learning_rate": 7.860774369358229e-06, "loss": 0.4204, "step": 10667 }, { "epoch": 0.3269584406031629, "grad_norm": 1.2170466013341836, "learning_rate": 7.86036730237503e-06, "loss": 0.5679, "step": 10668 }, { "epoch": 0.32698908912590413, "grad_norm": 1.2786858283342708, "learning_rate": 7.859960207207901e-06, "loss": 0.7325, "step": 10669 }, { "epoch": 0.32701973764864534, "grad_norm": 1.2611445358495372, "learning_rate": 7.859553083860854e-06, "loss": 0.6639, "step": 10670 }, { "epoch": 0.32705038617138654, "grad_norm": 0.46151385753390683, "learning_rate": 7.859145932337897e-06, "loss": 0.4414, "step": 10671 }, { "epoch": 0.32708103469412775, "grad_norm": 0.44539028288466936, "learning_rate": 7.858738752643043e-06, "loss": 0.4226, "step": 10672 }, { "epoch": 0.32711168321686895, "grad_norm": 1.2213585203308004, "learning_rate": 7.858331544780306e-06, "loss": 0.6812, "step": 10673 }, { "epoch": 0.32714233173961016, "grad_norm": 1.4630851818299055, "learning_rate": 7.857924308753698e-06, "loss": 0.6283, "step": 10674 }, { "epoch": 0.32717298026235136, "grad_norm": 1.4018534759528352, "learning_rate": 7.857517044567228e-06, "loss": 0.7838, "step": 10675 }, { "epoch": 0.32720362878509257, "grad_norm": 1.1804943859578974, "learning_rate": 7.857109752224911e-06, "loss": 0.5748, "step": 10676 }, { "epoch": 0.3272342773078338, "grad_norm": 0.46661823626521326, "learning_rate": 7.856702431730763e-06, "loss": 0.4419, "step": 10677 }, { "epoch": 0.327264925830575, "grad_norm": 0.4994594339038064, "learning_rate": 7.856295083088793e-06, "loss": 0.449, "step": 10678 }, { "epoch": 0.3272955743533162, "grad_norm": 1.1316987365383018, "learning_rate": 7.85588770630302e-06, "loss": 0.646, "step": 10679 }, { "epoch": 0.3273262228760574, "grad_norm": 0.4476521969022276, "learning_rate": 7.855480301377451e-06, "loss": 0.4425, "step": 10680 }, { "epoch": 0.3273568713987986, "grad_norm": 1.3248510834792928, "learning_rate": 7.855072868316107e-06, "loss": 0.6816, "step": 10681 }, { "epoch": 0.3273875199215398, "grad_norm": 1.3256594811985538, "learning_rate": 7.854665407122998e-06, "loss": 0.721, "step": 10682 }, { "epoch": 0.327418168444281, "grad_norm": 1.3811864108984062, "learning_rate": 7.854257917802141e-06, "loss": 0.7899, "step": 10683 }, { "epoch": 0.3274488169670222, "grad_norm": 0.5030309270265227, "learning_rate": 7.85385040035755e-06, "loss": 0.4584, "step": 10684 }, { "epoch": 0.32747946548976337, "grad_norm": 1.2123335838189333, "learning_rate": 7.853442854793241e-06, "loss": 0.7469, "step": 10685 }, { "epoch": 0.32751011401250457, "grad_norm": 1.2716047900809286, "learning_rate": 7.853035281113228e-06, "loss": 0.6317, "step": 10686 }, { "epoch": 0.3275407625352458, "grad_norm": 1.1003625390173162, "learning_rate": 7.852627679321529e-06, "loss": 0.6039, "step": 10687 }, { "epoch": 0.327571411057987, "grad_norm": 1.2480558932630106, "learning_rate": 7.85222004942216e-06, "loss": 0.6945, "step": 10688 }, { "epoch": 0.3276020595807282, "grad_norm": 1.1716532515951377, "learning_rate": 7.851812391419139e-06, "loss": 0.6268, "step": 10689 }, { "epoch": 0.3276327081034694, "grad_norm": 1.2351219115169638, "learning_rate": 7.851404705316478e-06, "loss": 0.682, "step": 10690 }, { "epoch": 0.3276633566262106, "grad_norm": 0.49174311053703373, "learning_rate": 7.850996991118199e-06, "loss": 0.455, "step": 10691 }, { "epoch": 0.3276940051489518, "grad_norm": 1.0511189798391807, "learning_rate": 7.850589248828316e-06, "loss": 0.5769, "step": 10692 }, { "epoch": 0.327724653671693, "grad_norm": 1.1374346695197537, "learning_rate": 7.850181478450847e-06, "loss": 0.7185, "step": 10693 }, { "epoch": 0.3277553021944342, "grad_norm": 1.2937017344433264, "learning_rate": 7.849773679989814e-06, "loss": 0.6907, "step": 10694 }, { "epoch": 0.3277859507171754, "grad_norm": 1.169517242012755, "learning_rate": 7.849365853449228e-06, "loss": 0.654, "step": 10695 }, { "epoch": 0.32781659923991663, "grad_norm": 1.2907901142958156, "learning_rate": 7.848957998833113e-06, "loss": 0.715, "step": 10696 }, { "epoch": 0.32784724776265783, "grad_norm": 1.2174003582535433, "learning_rate": 7.848550116145486e-06, "loss": 0.6605, "step": 10697 }, { "epoch": 0.32787789628539904, "grad_norm": 0.4523445204264603, "learning_rate": 7.848142205390364e-06, "loss": 0.4323, "step": 10698 }, { "epoch": 0.32790854480814025, "grad_norm": 1.2198602667633878, "learning_rate": 7.847734266571769e-06, "loss": 0.6493, "step": 10699 }, { "epoch": 0.32793919333088145, "grad_norm": 1.178806157256582, "learning_rate": 7.847326299693721e-06, "loss": 0.723, "step": 10700 }, { "epoch": 0.32796984185362266, "grad_norm": 1.213385644004742, "learning_rate": 7.846918304760239e-06, "loss": 0.6499, "step": 10701 }, { "epoch": 0.32800049037636386, "grad_norm": 0.4679124340453637, "learning_rate": 7.84651028177534e-06, "loss": 0.4186, "step": 10702 }, { "epoch": 0.32803113889910507, "grad_norm": 1.192723747702269, "learning_rate": 7.846102230743049e-06, "loss": 0.7183, "step": 10703 }, { "epoch": 0.3280617874218463, "grad_norm": 1.3128430028595015, "learning_rate": 7.845694151667382e-06, "loss": 0.6543, "step": 10704 }, { "epoch": 0.3280924359445875, "grad_norm": 1.225360074079184, "learning_rate": 7.845286044552365e-06, "loss": 0.678, "step": 10705 }, { "epoch": 0.3281230844673287, "grad_norm": 1.379132122077682, "learning_rate": 7.844877909402015e-06, "loss": 0.6319, "step": 10706 }, { "epoch": 0.3281537329900699, "grad_norm": 0.4983012910017784, "learning_rate": 7.844469746220356e-06, "loss": 0.4518, "step": 10707 }, { "epoch": 0.3281843815128111, "grad_norm": 1.3796463970433737, "learning_rate": 7.844061555011408e-06, "loss": 0.7274, "step": 10708 }, { "epoch": 0.3282150300355523, "grad_norm": 1.2334432807064832, "learning_rate": 7.843653335779194e-06, "loss": 0.6147, "step": 10709 }, { "epoch": 0.3282456785582935, "grad_norm": 0.45355732976930213, "learning_rate": 7.843245088527736e-06, "loss": 0.4254, "step": 10710 }, { "epoch": 0.3282763270810347, "grad_norm": 0.4487024003491734, "learning_rate": 7.842836813261057e-06, "loss": 0.4448, "step": 10711 }, { "epoch": 0.3283069756037759, "grad_norm": 1.2481387555867622, "learning_rate": 7.84242850998318e-06, "loss": 0.67, "step": 10712 }, { "epoch": 0.3283376241265171, "grad_norm": 1.1991186756569594, "learning_rate": 7.842020178698126e-06, "loss": 0.6857, "step": 10713 }, { "epoch": 0.32836827264925833, "grad_norm": 1.2654838647173465, "learning_rate": 7.841611819409922e-06, "loss": 0.6876, "step": 10714 }, { "epoch": 0.32839892117199954, "grad_norm": 1.4140851017348908, "learning_rate": 7.841203432122588e-06, "loss": 0.6474, "step": 10715 }, { "epoch": 0.3284295696947407, "grad_norm": 1.0676580152288528, "learning_rate": 7.840795016840151e-06, "loss": 0.6601, "step": 10716 }, { "epoch": 0.3284602182174819, "grad_norm": 1.2319099028143479, "learning_rate": 7.840386573566634e-06, "loss": 0.686, "step": 10717 }, { "epoch": 0.3284908667402231, "grad_norm": 1.236203746841551, "learning_rate": 7.83997810230606e-06, "loss": 0.5731, "step": 10718 }, { "epoch": 0.3285215152629643, "grad_norm": 1.561153322301643, "learning_rate": 7.839569603062456e-06, "loss": 0.6472, "step": 10719 }, { "epoch": 0.3285521637857055, "grad_norm": 0.5588447263748723, "learning_rate": 7.839161075839846e-06, "loss": 0.4691, "step": 10720 }, { "epoch": 0.3285828123084467, "grad_norm": 1.501310954256428, "learning_rate": 7.838752520642256e-06, "loss": 0.7432, "step": 10721 }, { "epoch": 0.3286134608311879, "grad_norm": 1.1683675655937893, "learning_rate": 7.83834393747371e-06, "loss": 0.6107, "step": 10722 }, { "epoch": 0.3286441093539291, "grad_norm": 1.431727547301519, "learning_rate": 7.837935326338236e-06, "loss": 0.6954, "step": 10723 }, { "epoch": 0.32867475787667033, "grad_norm": 1.3246383807681572, "learning_rate": 7.837526687239858e-06, "loss": 0.6614, "step": 10724 }, { "epoch": 0.32870540639941154, "grad_norm": 1.4497768671959497, "learning_rate": 7.837118020182606e-06, "loss": 0.781, "step": 10725 }, { "epoch": 0.32873605492215274, "grad_norm": 1.149665096786497, "learning_rate": 7.8367093251705e-06, "loss": 0.6271, "step": 10726 }, { "epoch": 0.32876670344489395, "grad_norm": 0.44961606037700175, "learning_rate": 7.836300602207574e-06, "loss": 0.4301, "step": 10727 }, { "epoch": 0.32879735196763515, "grad_norm": 1.1661945642856333, "learning_rate": 7.835891851297852e-06, "loss": 0.6729, "step": 10728 }, { "epoch": 0.32882800049037636, "grad_norm": 1.323768396132092, "learning_rate": 7.835483072445363e-06, "loss": 0.6028, "step": 10729 }, { "epoch": 0.32885864901311757, "grad_norm": 0.46738236062032945, "learning_rate": 7.835074265654133e-06, "loss": 0.45, "step": 10730 }, { "epoch": 0.32888929753585877, "grad_norm": 1.320181187443418, "learning_rate": 7.83466543092819e-06, "loss": 0.6325, "step": 10731 }, { "epoch": 0.3289199460586, "grad_norm": 1.19410148990801, "learning_rate": 7.834256568271564e-06, "loss": 0.6064, "step": 10732 }, { "epoch": 0.3289505945813412, "grad_norm": 1.4020405372921196, "learning_rate": 7.833847677688282e-06, "loss": 0.7792, "step": 10733 }, { "epoch": 0.3289812431040824, "grad_norm": 0.463825933214378, "learning_rate": 7.833438759182375e-06, "loss": 0.4361, "step": 10734 }, { "epoch": 0.3290118916268236, "grad_norm": 1.2937298923223974, "learning_rate": 7.833029812757871e-06, "loss": 0.5429, "step": 10735 }, { "epoch": 0.3290425401495648, "grad_norm": 1.2103340471147337, "learning_rate": 7.832620838418798e-06, "loss": 0.7339, "step": 10736 }, { "epoch": 0.329073188672306, "grad_norm": 0.4600972905958834, "learning_rate": 7.832211836169188e-06, "loss": 0.4164, "step": 10737 }, { "epoch": 0.3291038371950472, "grad_norm": 1.1862718398669914, "learning_rate": 7.831802806013072e-06, "loss": 0.6288, "step": 10738 }, { "epoch": 0.3291344857177884, "grad_norm": 1.3319092931794378, "learning_rate": 7.831393747954477e-06, "loss": 0.748, "step": 10739 }, { "epoch": 0.3291651342405296, "grad_norm": 1.2174250170442784, "learning_rate": 7.830984661997434e-06, "loss": 0.6814, "step": 10740 }, { "epoch": 0.32919578276327083, "grad_norm": 1.2380354329411887, "learning_rate": 7.830575548145975e-06, "loss": 0.7125, "step": 10741 }, { "epoch": 0.32922643128601203, "grad_norm": 1.383854909353046, "learning_rate": 7.83016640640413e-06, "loss": 0.6551, "step": 10742 }, { "epoch": 0.32925707980875324, "grad_norm": 1.267744544750974, "learning_rate": 7.829757236775934e-06, "loss": 0.7152, "step": 10743 }, { "epoch": 0.32928772833149444, "grad_norm": 0.4894985185660591, "learning_rate": 7.829348039265413e-06, "loss": 0.4333, "step": 10744 }, { "epoch": 0.32931837685423565, "grad_norm": 1.2057013306792312, "learning_rate": 7.828938813876603e-06, "loss": 0.6611, "step": 10745 }, { "epoch": 0.32934902537697686, "grad_norm": 1.031508038958969, "learning_rate": 7.828529560613536e-06, "loss": 0.6247, "step": 10746 }, { "epoch": 0.329379673899718, "grad_norm": 1.313037382942955, "learning_rate": 7.828120279480242e-06, "loss": 0.6703, "step": 10747 }, { "epoch": 0.3294103224224592, "grad_norm": 1.3079835201981058, "learning_rate": 7.827710970480757e-06, "loss": 0.7201, "step": 10748 }, { "epoch": 0.3294409709452004, "grad_norm": 1.3369694611091123, "learning_rate": 7.827301633619112e-06, "loss": 0.6605, "step": 10749 }, { "epoch": 0.3294716194679416, "grad_norm": 1.2718648719063537, "learning_rate": 7.826892268899338e-06, "loss": 0.6096, "step": 10750 }, { "epoch": 0.32950226799068283, "grad_norm": 1.2890560425522082, "learning_rate": 7.826482876325474e-06, "loss": 0.7045, "step": 10751 }, { "epoch": 0.32953291651342403, "grad_norm": 1.3178832922213326, "learning_rate": 7.82607345590155e-06, "loss": 0.6851, "step": 10752 }, { "epoch": 0.32956356503616524, "grad_norm": 1.0509426371697104, "learning_rate": 7.825664007631601e-06, "loss": 0.6713, "step": 10753 }, { "epoch": 0.32959421355890645, "grad_norm": 1.262258377253102, "learning_rate": 7.825254531519663e-06, "loss": 0.7202, "step": 10754 }, { "epoch": 0.32962486208164765, "grad_norm": 1.2474987897083272, "learning_rate": 7.824845027569769e-06, "loss": 0.701, "step": 10755 }, { "epoch": 0.32965551060438886, "grad_norm": 1.3133028814183312, "learning_rate": 7.824435495785953e-06, "loss": 0.6513, "step": 10756 }, { "epoch": 0.32968615912713006, "grad_norm": 1.2465931771395062, "learning_rate": 7.82402593617225e-06, "loss": 0.6469, "step": 10757 }, { "epoch": 0.32971680764987127, "grad_norm": 1.4102124766053217, "learning_rate": 7.8236163487327e-06, "loss": 0.6783, "step": 10758 }, { "epoch": 0.3297474561726125, "grad_norm": 1.0931623423810395, "learning_rate": 7.823206733471333e-06, "loss": 0.5162, "step": 10759 }, { "epoch": 0.3297781046953537, "grad_norm": 1.2497684046101953, "learning_rate": 7.82279709039219e-06, "loss": 0.5997, "step": 10760 }, { "epoch": 0.3298087532180949, "grad_norm": 0.5298731441745675, "learning_rate": 7.822387419499304e-06, "loss": 0.4314, "step": 10761 }, { "epoch": 0.3298394017408361, "grad_norm": 1.158908134577596, "learning_rate": 7.821977720796713e-06, "loss": 0.6082, "step": 10762 }, { "epoch": 0.3298700502635773, "grad_norm": 1.1891874529342283, "learning_rate": 7.821567994288452e-06, "loss": 0.6682, "step": 10763 }, { "epoch": 0.3299006987863185, "grad_norm": 1.5556684472267976, "learning_rate": 7.821158239978561e-06, "loss": 0.8277, "step": 10764 }, { "epoch": 0.3299313473090597, "grad_norm": 1.214887076325954, "learning_rate": 7.820748457871077e-06, "loss": 0.6042, "step": 10765 }, { "epoch": 0.3299619958318009, "grad_norm": 1.2401342670891635, "learning_rate": 7.820338647970036e-06, "loss": 0.7075, "step": 10766 }, { "epoch": 0.3299926443545421, "grad_norm": 1.2927856386324779, "learning_rate": 7.819928810279476e-06, "loss": 0.676, "step": 10767 }, { "epoch": 0.3300232928772833, "grad_norm": 0.47635593611451876, "learning_rate": 7.819518944803434e-06, "loss": 0.4483, "step": 10768 }, { "epoch": 0.33005394140002453, "grad_norm": 1.231305377682773, "learning_rate": 7.819109051545955e-06, "loss": 0.6789, "step": 10769 }, { "epoch": 0.33008458992276574, "grad_norm": 1.2498098867688179, "learning_rate": 7.81869913051107e-06, "loss": 0.6886, "step": 10770 }, { "epoch": 0.33011523844550694, "grad_norm": 1.2577746865237756, "learning_rate": 7.818289181702822e-06, "loss": 0.7071, "step": 10771 }, { "epoch": 0.33014588696824815, "grad_norm": 1.1908576956054222, "learning_rate": 7.81787920512525e-06, "loss": 0.6334, "step": 10772 }, { "epoch": 0.33017653549098935, "grad_norm": 0.46757592458667785, "learning_rate": 7.817469200782394e-06, "loss": 0.4361, "step": 10773 }, { "epoch": 0.33020718401373056, "grad_norm": 1.3320184878941193, "learning_rate": 7.81705916867829e-06, "loss": 0.6239, "step": 10774 }, { "epoch": 0.33023783253647176, "grad_norm": 0.4557714935244385, "learning_rate": 7.816649108816982e-06, "loss": 0.4283, "step": 10775 }, { "epoch": 0.33026848105921297, "grad_norm": 1.1096779375128099, "learning_rate": 7.816239021202512e-06, "loss": 0.6314, "step": 10776 }, { "epoch": 0.3302991295819542, "grad_norm": 1.2402291106367418, "learning_rate": 7.815828905838917e-06, "loss": 0.6227, "step": 10777 }, { "epoch": 0.3303297781046953, "grad_norm": 1.3189929800925997, "learning_rate": 7.81541876273024e-06, "loss": 0.6882, "step": 10778 }, { "epoch": 0.33036042662743653, "grad_norm": 1.1659036866412595, "learning_rate": 7.81500859188052e-06, "loss": 0.6448, "step": 10779 }, { "epoch": 0.33039107515017774, "grad_norm": 1.1901999316905414, "learning_rate": 7.814598393293802e-06, "loss": 0.6166, "step": 10780 }, { "epoch": 0.33042172367291894, "grad_norm": 1.1869711795323754, "learning_rate": 7.814188166974125e-06, "loss": 0.6807, "step": 10781 }, { "epoch": 0.33045237219566015, "grad_norm": 1.3574729025454504, "learning_rate": 7.813777912925533e-06, "loss": 0.7217, "step": 10782 }, { "epoch": 0.33048302071840135, "grad_norm": 1.228853962971427, "learning_rate": 7.813367631152066e-06, "loss": 0.7307, "step": 10783 }, { "epoch": 0.33051366924114256, "grad_norm": 1.1534747875897078, "learning_rate": 7.812957321657769e-06, "loss": 0.6601, "step": 10784 }, { "epoch": 0.33054431776388377, "grad_norm": 1.3946400815305118, "learning_rate": 7.812546984446681e-06, "loss": 0.6707, "step": 10785 }, { "epoch": 0.33057496628662497, "grad_norm": 1.2585032104768321, "learning_rate": 7.81213661952285e-06, "loss": 0.6869, "step": 10786 }, { "epoch": 0.3306056148093662, "grad_norm": 0.56012852321826, "learning_rate": 7.811726226890317e-06, "loss": 0.4572, "step": 10787 }, { "epoch": 0.3306362633321074, "grad_norm": 1.2292062039611305, "learning_rate": 7.811315806553126e-06, "loss": 0.6469, "step": 10788 }, { "epoch": 0.3306669118548486, "grad_norm": 1.1949013901359393, "learning_rate": 7.810905358515323e-06, "loss": 0.6493, "step": 10789 }, { "epoch": 0.3306975603775898, "grad_norm": 1.358147102791242, "learning_rate": 7.810494882780947e-06, "loss": 0.7264, "step": 10790 }, { "epoch": 0.330728208900331, "grad_norm": 0.469135354338558, "learning_rate": 7.810084379354049e-06, "loss": 0.4341, "step": 10791 }, { "epoch": 0.3307588574230722, "grad_norm": 0.44419522299558145, "learning_rate": 7.809673848238668e-06, "loss": 0.4257, "step": 10792 }, { "epoch": 0.3307895059458134, "grad_norm": 1.2815912995063972, "learning_rate": 7.809263289438855e-06, "loss": 0.6722, "step": 10793 }, { "epoch": 0.3308201544685546, "grad_norm": 1.2246701454269422, "learning_rate": 7.80885270295865e-06, "loss": 0.6285, "step": 10794 }, { "epoch": 0.3308508029912958, "grad_norm": 1.1864269612894631, "learning_rate": 7.8084420888021e-06, "loss": 0.7115, "step": 10795 }, { "epoch": 0.33088145151403703, "grad_norm": 1.2231571835097113, "learning_rate": 7.80803144697325e-06, "loss": 0.7287, "step": 10796 }, { "epoch": 0.33091210003677823, "grad_norm": 1.3385893843975836, "learning_rate": 7.807620777476151e-06, "loss": 0.7429, "step": 10797 }, { "epoch": 0.33094274855951944, "grad_norm": 1.110136735133154, "learning_rate": 7.807210080314844e-06, "loss": 0.5639, "step": 10798 }, { "epoch": 0.33097339708226065, "grad_norm": 1.3886263857803272, "learning_rate": 7.80679935549338e-06, "loss": 0.6881, "step": 10799 }, { "epoch": 0.33100404560500185, "grad_norm": 1.4900291840620465, "learning_rate": 7.806388603015802e-06, "loss": 0.7334, "step": 10800 }, { "epoch": 0.33103469412774306, "grad_norm": 1.3459228179704672, "learning_rate": 7.805977822886159e-06, "loss": 0.7101, "step": 10801 }, { "epoch": 0.33106534265048426, "grad_norm": 1.2752628832569561, "learning_rate": 7.8055670151085e-06, "loss": 0.6476, "step": 10802 }, { "epoch": 0.33109599117322547, "grad_norm": 1.3005996891727736, "learning_rate": 7.80515617968687e-06, "loss": 0.6374, "step": 10803 }, { "epoch": 0.3311266396959667, "grad_norm": 1.4989836314159082, "learning_rate": 7.80474531662532e-06, "loss": 0.7928, "step": 10804 }, { "epoch": 0.3311572882187079, "grad_norm": 1.3765856507919572, "learning_rate": 7.804334425927896e-06, "loss": 0.6506, "step": 10805 }, { "epoch": 0.3311879367414491, "grad_norm": 0.5312584654987215, "learning_rate": 7.803923507598645e-06, "loss": 0.4036, "step": 10806 }, { "epoch": 0.3312185852641903, "grad_norm": 1.3429239006320899, "learning_rate": 7.803512561641622e-06, "loss": 0.6666, "step": 10807 }, { "epoch": 0.3312492337869315, "grad_norm": 1.1821638732975073, "learning_rate": 7.803101588060871e-06, "loss": 0.6365, "step": 10808 }, { "epoch": 0.33127988230967265, "grad_norm": 1.331218103762544, "learning_rate": 7.802690586860442e-06, "loss": 0.7892, "step": 10809 }, { "epoch": 0.33131053083241385, "grad_norm": 1.3085670372762073, "learning_rate": 7.802279558044385e-06, "loss": 0.6696, "step": 10810 }, { "epoch": 0.33134117935515506, "grad_norm": 1.2265141439870393, "learning_rate": 7.801868501616752e-06, "loss": 0.6678, "step": 10811 }, { "epoch": 0.33137182787789626, "grad_norm": 1.1624012675734934, "learning_rate": 7.801457417581592e-06, "loss": 0.6945, "step": 10812 }, { "epoch": 0.33140247640063747, "grad_norm": 0.4703358698824132, "learning_rate": 7.801046305942954e-06, "loss": 0.4429, "step": 10813 }, { "epoch": 0.3314331249233787, "grad_norm": 1.2947572944004067, "learning_rate": 7.80063516670489e-06, "loss": 0.669, "step": 10814 }, { "epoch": 0.3314637734461199, "grad_norm": 0.4874022845639821, "learning_rate": 7.800223999871452e-06, "loss": 0.4356, "step": 10815 }, { "epoch": 0.3314944219688611, "grad_norm": 1.3620114673999726, "learning_rate": 7.799812805446691e-06, "loss": 0.7211, "step": 10816 }, { "epoch": 0.3315250704916023, "grad_norm": 1.2973762435428247, "learning_rate": 7.799401583434659e-06, "loss": 0.5974, "step": 10817 }, { "epoch": 0.3315557190143435, "grad_norm": 1.3471205627475649, "learning_rate": 7.798990333839405e-06, "loss": 0.7817, "step": 10818 }, { "epoch": 0.3315863675370847, "grad_norm": 1.174901406677855, "learning_rate": 7.798579056664984e-06, "loss": 0.7227, "step": 10819 }, { "epoch": 0.3316170160598259, "grad_norm": 1.2403889465256057, "learning_rate": 7.798167751915446e-06, "loss": 0.6974, "step": 10820 }, { "epoch": 0.3316476645825671, "grad_norm": 1.216226616285098, "learning_rate": 7.797756419594846e-06, "loss": 0.7467, "step": 10821 }, { "epoch": 0.3316783131053083, "grad_norm": 1.3292582382755613, "learning_rate": 7.797345059707236e-06, "loss": 0.6384, "step": 10822 }, { "epoch": 0.3317089616280495, "grad_norm": 1.377250724341159, "learning_rate": 7.79693367225667e-06, "loss": 0.6981, "step": 10823 }, { "epoch": 0.33173961015079073, "grad_norm": 1.326540599652534, "learning_rate": 7.7965222572472e-06, "loss": 0.6744, "step": 10824 }, { "epoch": 0.33177025867353194, "grad_norm": 0.4678293438871121, "learning_rate": 7.796110814682882e-06, "loss": 0.4155, "step": 10825 }, { "epoch": 0.33180090719627314, "grad_norm": 1.4676922357041482, "learning_rate": 7.79569934456777e-06, "loss": 0.6814, "step": 10826 }, { "epoch": 0.33183155571901435, "grad_norm": 1.3748913836573484, "learning_rate": 7.795287846905912e-06, "loss": 0.6899, "step": 10827 }, { "epoch": 0.33186220424175555, "grad_norm": 1.1753283461498105, "learning_rate": 7.794876321701372e-06, "loss": 0.6788, "step": 10828 }, { "epoch": 0.33189285276449676, "grad_norm": 1.3268543653817229, "learning_rate": 7.794464768958198e-06, "loss": 0.655, "step": 10829 }, { "epoch": 0.33192350128723797, "grad_norm": 1.2002721739277034, "learning_rate": 7.79405318868045e-06, "loss": 0.6393, "step": 10830 }, { "epoch": 0.33195414980997917, "grad_norm": 1.2391592426877451, "learning_rate": 7.79364158087218e-06, "loss": 0.7045, "step": 10831 }, { "epoch": 0.3319847983327204, "grad_norm": 0.4924639762387126, "learning_rate": 7.793229945537444e-06, "loss": 0.4357, "step": 10832 }, { "epoch": 0.3320154468554616, "grad_norm": 1.1143264304130445, "learning_rate": 7.792818282680299e-06, "loss": 0.5693, "step": 10833 }, { "epoch": 0.3320460953782028, "grad_norm": 1.2084674767876336, "learning_rate": 7.792406592304802e-06, "loss": 0.6627, "step": 10834 }, { "epoch": 0.332076743900944, "grad_norm": 1.3571455405892223, "learning_rate": 7.791994874415008e-06, "loss": 0.7144, "step": 10835 }, { "epoch": 0.3321073924236852, "grad_norm": 1.2310617355963012, "learning_rate": 7.791583129014973e-06, "loss": 0.6395, "step": 10836 }, { "epoch": 0.3321380409464264, "grad_norm": 1.2677078576889445, "learning_rate": 7.791171356108755e-06, "loss": 0.6572, "step": 10837 }, { "epoch": 0.3321686894691676, "grad_norm": 1.1040358901004135, "learning_rate": 7.790759555700413e-06, "loss": 0.6556, "step": 10838 }, { "epoch": 0.3321993379919088, "grad_norm": 1.5803911592348907, "learning_rate": 7.790347727794003e-06, "loss": 0.7378, "step": 10839 }, { "epoch": 0.33222998651464997, "grad_norm": 1.080518934672692, "learning_rate": 7.789935872393582e-06, "loss": 0.572, "step": 10840 }, { "epoch": 0.33226063503739117, "grad_norm": 1.22214849262787, "learning_rate": 7.78952398950321e-06, "loss": 0.6659, "step": 10841 }, { "epoch": 0.3322912835601324, "grad_norm": 0.4974055990240088, "learning_rate": 7.789112079126942e-06, "loss": 0.425, "step": 10842 }, { "epoch": 0.3323219320828736, "grad_norm": 0.47547365108659656, "learning_rate": 7.788700141268842e-06, "loss": 0.4461, "step": 10843 }, { "epoch": 0.3323525806056148, "grad_norm": 1.3788189854212636, "learning_rate": 7.788288175932965e-06, "loss": 0.7117, "step": 10844 }, { "epoch": 0.332383229128356, "grad_norm": 0.42613228561172944, "learning_rate": 7.787876183123371e-06, "loss": 0.4362, "step": 10845 }, { "epoch": 0.3324138776510972, "grad_norm": 1.1494816396497527, "learning_rate": 7.787464162844118e-06, "loss": 0.5682, "step": 10846 }, { "epoch": 0.3324445261738384, "grad_norm": 1.2134476155142209, "learning_rate": 7.78705211509927e-06, "loss": 0.7483, "step": 10847 }, { "epoch": 0.3324751746965796, "grad_norm": 1.3311621769547133, "learning_rate": 7.786640039892884e-06, "loss": 0.6437, "step": 10848 }, { "epoch": 0.3325058232193208, "grad_norm": 0.5386546305611544, "learning_rate": 7.78622793722902e-06, "loss": 0.4491, "step": 10849 }, { "epoch": 0.332536471742062, "grad_norm": 1.207577816677862, "learning_rate": 7.78581580711174e-06, "loss": 0.7067, "step": 10850 }, { "epoch": 0.33256712026480323, "grad_norm": 1.2726514673886178, "learning_rate": 7.785403649545103e-06, "loss": 0.6748, "step": 10851 }, { "epoch": 0.33259776878754443, "grad_norm": 0.5281508259924723, "learning_rate": 7.784991464533171e-06, "loss": 0.438, "step": 10852 }, { "epoch": 0.33262841731028564, "grad_norm": 1.0595686326971123, "learning_rate": 7.784579252080006e-06, "loss": 0.6758, "step": 10853 }, { "epoch": 0.33265906583302685, "grad_norm": 1.1948975590726547, "learning_rate": 7.78416701218967e-06, "loss": 0.6316, "step": 10854 }, { "epoch": 0.33268971435576805, "grad_norm": 1.3027081100056306, "learning_rate": 7.783754744866223e-06, "loss": 0.6623, "step": 10855 }, { "epoch": 0.33272036287850926, "grad_norm": 0.4792177549055398, "learning_rate": 7.783342450113727e-06, "loss": 0.4328, "step": 10856 }, { "epoch": 0.33275101140125046, "grad_norm": 1.2710848207262866, "learning_rate": 7.782930127936248e-06, "loss": 0.7044, "step": 10857 }, { "epoch": 0.33278165992399167, "grad_norm": 1.2455010576060368, "learning_rate": 7.782517778337845e-06, "loss": 0.6101, "step": 10858 }, { "epoch": 0.3328123084467329, "grad_norm": 1.279444379446432, "learning_rate": 7.782105401322584e-06, "loss": 0.6726, "step": 10859 }, { "epoch": 0.3328429569694741, "grad_norm": 1.2629209441561762, "learning_rate": 7.781692996894526e-06, "loss": 0.6514, "step": 10860 }, { "epoch": 0.3328736054922153, "grad_norm": 1.2894490019483587, "learning_rate": 7.781280565057734e-06, "loss": 0.7506, "step": 10861 }, { "epoch": 0.3329042540149565, "grad_norm": 0.5438680628852615, "learning_rate": 7.780868105816275e-06, "loss": 0.4568, "step": 10862 }, { "epoch": 0.3329349025376977, "grad_norm": 1.2866249629051774, "learning_rate": 7.78045561917421e-06, "loss": 0.6593, "step": 10863 }, { "epoch": 0.3329655510604389, "grad_norm": 0.48421340502913923, "learning_rate": 7.780043105135604e-06, "loss": 0.4643, "step": 10864 }, { "epoch": 0.3329961995831801, "grad_norm": 0.45903799329127515, "learning_rate": 7.779630563704522e-06, "loss": 0.431, "step": 10865 }, { "epoch": 0.3330268481059213, "grad_norm": 1.301721409303311, "learning_rate": 7.779217994885028e-06, "loss": 0.7696, "step": 10866 }, { "epoch": 0.3330574966286625, "grad_norm": 1.2941297046542948, "learning_rate": 7.778805398681191e-06, "loss": 0.5642, "step": 10867 }, { "epoch": 0.3330881451514037, "grad_norm": 1.2728622798303788, "learning_rate": 7.77839277509707e-06, "loss": 0.7146, "step": 10868 }, { "epoch": 0.33311879367414493, "grad_norm": 1.3068958660575158, "learning_rate": 7.777980124136735e-06, "loss": 0.6918, "step": 10869 }, { "epoch": 0.33314944219688614, "grad_norm": 0.496710908125753, "learning_rate": 7.777567445804253e-06, "loss": 0.4339, "step": 10870 }, { "epoch": 0.3331800907196273, "grad_norm": 0.5085636319958126, "learning_rate": 7.777154740103687e-06, "loss": 0.4347, "step": 10871 }, { "epoch": 0.3332107392423685, "grad_norm": 1.2419028150044236, "learning_rate": 7.776742007039104e-06, "loss": 0.6641, "step": 10872 }, { "epoch": 0.3332413877651097, "grad_norm": 1.476755368186847, "learning_rate": 7.776329246614574e-06, "loss": 0.7051, "step": 10873 }, { "epoch": 0.3332720362878509, "grad_norm": 1.4413703243303073, "learning_rate": 7.77591645883416e-06, "loss": 0.6158, "step": 10874 }, { "epoch": 0.3333026848105921, "grad_norm": 1.3307637010867077, "learning_rate": 7.77550364370193e-06, "loss": 0.6933, "step": 10875 }, { "epoch": 0.3333333333333333, "grad_norm": 1.2698177222933498, "learning_rate": 7.775090801221953e-06, "loss": 0.6606, "step": 10876 }, { "epoch": 0.3333639818560745, "grad_norm": 1.2586039382853846, "learning_rate": 7.774677931398295e-06, "loss": 0.6424, "step": 10877 }, { "epoch": 0.3333946303788157, "grad_norm": 1.474229900694958, "learning_rate": 7.774265034235029e-06, "loss": 0.695, "step": 10878 }, { "epoch": 0.33342527890155693, "grad_norm": 1.4122630546752157, "learning_rate": 7.773852109736217e-06, "loss": 0.663, "step": 10879 }, { "epoch": 0.33345592742429814, "grad_norm": 0.646025383000072, "learning_rate": 7.773439157905931e-06, "loss": 0.4492, "step": 10880 }, { "epoch": 0.33348657594703934, "grad_norm": 1.2982042234626334, "learning_rate": 7.773026178748239e-06, "loss": 0.6452, "step": 10881 }, { "epoch": 0.33351722446978055, "grad_norm": 1.380951165023459, "learning_rate": 7.77261317226721e-06, "loss": 0.6923, "step": 10882 }, { "epoch": 0.33354787299252175, "grad_norm": 0.46414951227871654, "learning_rate": 7.772200138466917e-06, "loss": 0.4212, "step": 10883 }, { "epoch": 0.33357852151526296, "grad_norm": 1.3959094274297077, "learning_rate": 7.771787077351425e-06, "loss": 0.6823, "step": 10884 }, { "epoch": 0.33360917003800417, "grad_norm": 0.4457913743721687, "learning_rate": 7.771373988924806e-06, "loss": 0.4155, "step": 10885 }, { "epoch": 0.33363981856074537, "grad_norm": 1.2673430557504963, "learning_rate": 7.770960873191128e-06, "loss": 0.7138, "step": 10886 }, { "epoch": 0.3336704670834866, "grad_norm": 1.1596801174291487, "learning_rate": 7.770547730154465e-06, "loss": 0.6197, "step": 10887 }, { "epoch": 0.3337011156062278, "grad_norm": 1.3705838858002453, "learning_rate": 7.770134559818888e-06, "loss": 0.7617, "step": 10888 }, { "epoch": 0.333731764128969, "grad_norm": 1.1444937465924265, "learning_rate": 7.769721362188465e-06, "loss": 0.6214, "step": 10889 }, { "epoch": 0.3337624126517102, "grad_norm": 1.2457608852680055, "learning_rate": 7.769308137267268e-06, "loss": 0.6575, "step": 10890 }, { "epoch": 0.3337930611744514, "grad_norm": 1.459953004801463, "learning_rate": 7.76889488505937e-06, "loss": 0.6665, "step": 10891 }, { "epoch": 0.3338237096971926, "grad_norm": 1.2372581480626588, "learning_rate": 7.768481605568843e-06, "loss": 0.6316, "step": 10892 }, { "epoch": 0.3338543582199338, "grad_norm": 1.2158058973831147, "learning_rate": 7.768068298799758e-06, "loss": 0.6529, "step": 10893 }, { "epoch": 0.333885006742675, "grad_norm": 0.5517374807057484, "learning_rate": 7.767654964756186e-06, "loss": 0.4125, "step": 10894 }, { "epoch": 0.3339156552654162, "grad_norm": 1.178590529338833, "learning_rate": 7.767241603442204e-06, "loss": 0.6167, "step": 10895 }, { "epoch": 0.33394630378815743, "grad_norm": 1.3717751163787915, "learning_rate": 7.76682821486188e-06, "loss": 0.6391, "step": 10896 }, { "epoch": 0.33397695231089863, "grad_norm": 1.3771189809646123, "learning_rate": 7.766414799019294e-06, "loss": 0.6907, "step": 10897 }, { "epoch": 0.33400760083363984, "grad_norm": 1.0987732532611514, "learning_rate": 7.76600135591851e-06, "loss": 0.7087, "step": 10898 }, { "epoch": 0.33403824935638105, "grad_norm": 0.4554563155622439, "learning_rate": 7.765587885563609e-06, "loss": 0.45, "step": 10899 }, { "epoch": 0.33406889787912225, "grad_norm": 1.235291491586151, "learning_rate": 7.765174387958663e-06, "loss": 0.6463, "step": 10900 }, { "epoch": 0.33409954640186346, "grad_norm": 1.2589607606744382, "learning_rate": 7.764760863107748e-06, "loss": 0.6457, "step": 10901 }, { "epoch": 0.3341301949246046, "grad_norm": 1.3321237342145675, "learning_rate": 7.764347311014935e-06, "loss": 0.663, "step": 10902 }, { "epoch": 0.3341608434473458, "grad_norm": 1.148212856254185, "learning_rate": 7.7639337316843e-06, "loss": 0.5643, "step": 10903 }, { "epoch": 0.334191491970087, "grad_norm": 1.405442163778176, "learning_rate": 7.763520125119918e-06, "loss": 0.6706, "step": 10904 }, { "epoch": 0.3342221404928282, "grad_norm": 1.3617006617351468, "learning_rate": 7.763106491325869e-06, "loss": 0.7469, "step": 10905 }, { "epoch": 0.33425278901556943, "grad_norm": 1.144457841521919, "learning_rate": 7.762692830306223e-06, "loss": 0.7299, "step": 10906 }, { "epoch": 0.33428343753831063, "grad_norm": 1.5676071440264818, "learning_rate": 7.762279142065055e-06, "loss": 0.7351, "step": 10907 }, { "epoch": 0.33431408606105184, "grad_norm": 1.2805086990902237, "learning_rate": 7.761865426606447e-06, "loss": 0.6409, "step": 10908 }, { "epoch": 0.33434473458379305, "grad_norm": 1.2478719928217503, "learning_rate": 7.76145168393447e-06, "loss": 0.6385, "step": 10909 }, { "epoch": 0.33437538310653425, "grad_norm": 0.6402293333007182, "learning_rate": 7.761037914053205e-06, "loss": 0.4287, "step": 10910 }, { "epoch": 0.33440603162927546, "grad_norm": 1.4316668586670447, "learning_rate": 7.760624116966726e-06, "loss": 0.68, "step": 10911 }, { "epoch": 0.33443668015201666, "grad_norm": 1.3036217077694179, "learning_rate": 7.760210292679114e-06, "loss": 0.6517, "step": 10912 }, { "epoch": 0.33446732867475787, "grad_norm": 1.4149386340017092, "learning_rate": 7.75979644119444e-06, "loss": 0.6969, "step": 10913 }, { "epoch": 0.3344979771974991, "grad_norm": 1.2654695917221648, "learning_rate": 7.759382562516786e-06, "loss": 0.7537, "step": 10914 }, { "epoch": 0.3345286257202403, "grad_norm": 1.344281579214011, "learning_rate": 7.758968656650231e-06, "loss": 0.6727, "step": 10915 }, { "epoch": 0.3345592742429815, "grad_norm": 1.1857851853043464, "learning_rate": 7.758554723598852e-06, "loss": 0.5747, "step": 10916 }, { "epoch": 0.3345899227657227, "grad_norm": 1.3855650696907114, "learning_rate": 7.758140763366726e-06, "loss": 0.6922, "step": 10917 }, { "epoch": 0.3346205712884639, "grad_norm": 1.4730476635675505, "learning_rate": 7.757726775957935e-06, "loss": 0.7596, "step": 10918 }, { "epoch": 0.3346512198112051, "grad_norm": 1.2162424275816446, "learning_rate": 7.757312761376555e-06, "loss": 0.6116, "step": 10919 }, { "epoch": 0.3346818683339463, "grad_norm": 1.0570274182237496, "learning_rate": 7.75689871962667e-06, "loss": 0.5752, "step": 10920 }, { "epoch": 0.3347125168566875, "grad_norm": 1.2959375594819103, "learning_rate": 7.756484650712352e-06, "loss": 0.7093, "step": 10921 }, { "epoch": 0.3347431653794287, "grad_norm": 1.1866253305595482, "learning_rate": 7.756070554637689e-06, "loss": 0.7244, "step": 10922 }, { "epoch": 0.3347738139021699, "grad_norm": 1.2083485727041783, "learning_rate": 7.755656431406756e-06, "loss": 0.6817, "step": 10923 }, { "epoch": 0.33480446242491113, "grad_norm": 1.2683469651942751, "learning_rate": 7.755242281023634e-06, "loss": 0.6423, "step": 10924 }, { "epoch": 0.33483511094765234, "grad_norm": 0.6553146947449155, "learning_rate": 7.754828103492407e-06, "loss": 0.4404, "step": 10925 }, { "epoch": 0.33486575947039354, "grad_norm": 1.3726018064071044, "learning_rate": 7.754413898817152e-06, "loss": 0.5031, "step": 10926 }, { "epoch": 0.33489640799313475, "grad_norm": 1.13974640842874, "learning_rate": 7.753999667001952e-06, "loss": 0.6823, "step": 10927 }, { "epoch": 0.33492705651587595, "grad_norm": 1.1973792771703349, "learning_rate": 7.753585408050892e-06, "loss": 0.7652, "step": 10928 }, { "epoch": 0.33495770503861716, "grad_norm": 1.1899875236271387, "learning_rate": 7.753171121968048e-06, "loss": 0.7214, "step": 10929 }, { "epoch": 0.33498835356135837, "grad_norm": 1.4045700129564123, "learning_rate": 7.752756808757505e-06, "loss": 0.7917, "step": 10930 }, { "epoch": 0.33501900208409957, "grad_norm": 1.1189635287110207, "learning_rate": 7.752342468423343e-06, "loss": 0.6443, "step": 10931 }, { "epoch": 0.3350496506068408, "grad_norm": 1.2986346062427534, "learning_rate": 7.75192810096965e-06, "loss": 0.6594, "step": 10932 }, { "epoch": 0.3350802991295819, "grad_norm": 1.1878269988409975, "learning_rate": 7.751513706400502e-06, "loss": 0.6193, "step": 10933 }, { "epoch": 0.33511094765232313, "grad_norm": 1.3191288255647, "learning_rate": 7.751099284719988e-06, "loss": 0.6666, "step": 10934 }, { "epoch": 0.33514159617506434, "grad_norm": 1.1684389350631879, "learning_rate": 7.750684835932185e-06, "loss": 0.6331, "step": 10935 }, { "epoch": 0.33517224469780554, "grad_norm": 1.271097759317321, "learning_rate": 7.750270360041185e-06, "loss": 0.6247, "step": 10936 }, { "epoch": 0.33520289322054675, "grad_norm": 1.1650520514797666, "learning_rate": 7.749855857051065e-06, "loss": 0.669, "step": 10937 }, { "epoch": 0.33523354174328795, "grad_norm": 1.2500358673489949, "learning_rate": 7.749441326965912e-06, "loss": 0.6701, "step": 10938 }, { "epoch": 0.33526419026602916, "grad_norm": 0.5246082283629275, "learning_rate": 7.749026769789811e-06, "loss": 0.4329, "step": 10939 }, { "epoch": 0.33529483878877037, "grad_norm": 1.1619513079429726, "learning_rate": 7.748612185526845e-06, "loss": 0.6159, "step": 10940 }, { "epoch": 0.33532548731151157, "grad_norm": 1.3234340582165582, "learning_rate": 7.7481975741811e-06, "loss": 0.6716, "step": 10941 }, { "epoch": 0.3353561358342528, "grad_norm": 1.3216405361874821, "learning_rate": 7.747782935756662e-06, "loss": 0.6278, "step": 10942 }, { "epoch": 0.335386784356994, "grad_norm": 1.3274536725108352, "learning_rate": 7.747368270257616e-06, "loss": 0.6653, "step": 10943 }, { "epoch": 0.3354174328797352, "grad_norm": 1.243104279817613, "learning_rate": 7.746953577688046e-06, "loss": 0.6779, "step": 10944 }, { "epoch": 0.3354480814024764, "grad_norm": 1.3714875219562963, "learning_rate": 7.74653885805204e-06, "loss": 0.7441, "step": 10945 }, { "epoch": 0.3354787299252176, "grad_norm": 1.3721325718180952, "learning_rate": 7.746124111353683e-06, "loss": 0.6705, "step": 10946 }, { "epoch": 0.3355093784479588, "grad_norm": 0.47127095375653216, "learning_rate": 7.745709337597062e-06, "loss": 0.418, "step": 10947 }, { "epoch": 0.3355400269707, "grad_norm": 1.156044795348294, "learning_rate": 7.745294536786268e-06, "loss": 0.6773, "step": 10948 }, { "epoch": 0.3355706754934412, "grad_norm": 1.225579592773415, "learning_rate": 7.74487970892538e-06, "loss": 0.6956, "step": 10949 }, { "epoch": 0.3356013240161824, "grad_norm": 1.1730696559376081, "learning_rate": 7.744464854018494e-06, "loss": 0.6547, "step": 10950 }, { "epoch": 0.33563197253892363, "grad_norm": 1.2398408307163855, "learning_rate": 7.74404997206969e-06, "loss": 0.6691, "step": 10951 }, { "epoch": 0.33566262106166483, "grad_norm": 1.109076721467245, "learning_rate": 7.743635063083062e-06, "loss": 0.5819, "step": 10952 }, { "epoch": 0.33569326958440604, "grad_norm": 1.2713593164807284, "learning_rate": 7.743220127062696e-06, "loss": 0.6757, "step": 10953 }, { "epoch": 0.33572391810714725, "grad_norm": 1.343106983113172, "learning_rate": 7.742805164012679e-06, "loss": 0.7595, "step": 10954 }, { "epoch": 0.33575456662988845, "grad_norm": 1.1625659845837173, "learning_rate": 7.742390173937103e-06, "loss": 0.6625, "step": 10955 }, { "epoch": 0.33578521515262966, "grad_norm": 1.271138028230735, "learning_rate": 7.74197515684005e-06, "loss": 0.621, "step": 10956 }, { "epoch": 0.33581586367537086, "grad_norm": 1.2213695849757116, "learning_rate": 7.741560112725619e-06, "loss": 0.6532, "step": 10957 }, { "epoch": 0.33584651219811207, "grad_norm": 1.2352420460147855, "learning_rate": 7.741145041597892e-06, "loss": 0.6931, "step": 10958 }, { "epoch": 0.3358771607208533, "grad_norm": 1.2277033185118325, "learning_rate": 7.740729943460965e-06, "loss": 0.6996, "step": 10959 }, { "epoch": 0.3359078092435945, "grad_norm": 1.4824039054452016, "learning_rate": 7.740314818318921e-06, "loss": 0.7554, "step": 10960 }, { "epoch": 0.3359384577663357, "grad_norm": 1.3353825745880226, "learning_rate": 7.739899666175856e-06, "loss": 0.6749, "step": 10961 }, { "epoch": 0.3359691062890769, "grad_norm": 1.196049870839765, "learning_rate": 7.739484487035858e-06, "loss": 0.6423, "step": 10962 }, { "epoch": 0.3359997548118181, "grad_norm": 0.5016413092955122, "learning_rate": 7.739069280903017e-06, "loss": 0.4414, "step": 10963 }, { "epoch": 0.33603040333455925, "grad_norm": 1.1593232180292188, "learning_rate": 7.738654047781427e-06, "loss": 0.7106, "step": 10964 }, { "epoch": 0.33606105185730045, "grad_norm": 1.1688005459635156, "learning_rate": 7.738238787675178e-06, "loss": 0.5605, "step": 10965 }, { "epoch": 0.33609170038004166, "grad_norm": 1.2791867073595942, "learning_rate": 7.737823500588361e-06, "loss": 0.6965, "step": 10966 }, { "epoch": 0.33612234890278286, "grad_norm": 1.4667262633002434, "learning_rate": 7.73740818652507e-06, "loss": 0.7159, "step": 10967 }, { "epoch": 0.33615299742552407, "grad_norm": 1.4383460743116765, "learning_rate": 7.736992845489394e-06, "loss": 0.7285, "step": 10968 }, { "epoch": 0.3361836459482653, "grad_norm": 0.44312638886746886, "learning_rate": 7.736577477485427e-06, "loss": 0.4222, "step": 10969 }, { "epoch": 0.3362142944710065, "grad_norm": 1.322576870445608, "learning_rate": 7.736162082517265e-06, "loss": 0.7912, "step": 10970 }, { "epoch": 0.3362449429937477, "grad_norm": 1.2988746725804914, "learning_rate": 7.735746660588993e-06, "loss": 0.6903, "step": 10971 }, { "epoch": 0.3362755915164889, "grad_norm": 1.1565643529799627, "learning_rate": 7.735331211704713e-06, "loss": 0.6097, "step": 10972 }, { "epoch": 0.3363062400392301, "grad_norm": 1.3927628006073438, "learning_rate": 7.734915735868513e-06, "loss": 0.7712, "step": 10973 }, { "epoch": 0.3363368885619713, "grad_norm": 1.1622891214940805, "learning_rate": 7.73450023308449e-06, "loss": 0.6268, "step": 10974 }, { "epoch": 0.3363675370847125, "grad_norm": 1.2417266983000788, "learning_rate": 7.734084703356736e-06, "loss": 0.6525, "step": 10975 }, { "epoch": 0.3363981856074537, "grad_norm": 1.326168310829567, "learning_rate": 7.733669146689344e-06, "loss": 0.7416, "step": 10976 }, { "epoch": 0.3364288341301949, "grad_norm": 1.201594711038567, "learning_rate": 7.733253563086413e-06, "loss": 0.6668, "step": 10977 }, { "epoch": 0.3364594826529361, "grad_norm": 1.1965192932012592, "learning_rate": 7.732837952552035e-06, "loss": 0.6644, "step": 10978 }, { "epoch": 0.33649013117567733, "grad_norm": 1.3377197479625376, "learning_rate": 7.732422315090304e-06, "loss": 0.6995, "step": 10979 }, { "epoch": 0.33652077969841854, "grad_norm": 1.2917700298250627, "learning_rate": 7.732006650705318e-06, "loss": 0.7179, "step": 10980 }, { "epoch": 0.33655142822115974, "grad_norm": 0.5056642376471376, "learning_rate": 7.73159095940117e-06, "loss": 0.4589, "step": 10981 }, { "epoch": 0.33658207674390095, "grad_norm": 1.122626485883214, "learning_rate": 7.731175241181959e-06, "loss": 0.6818, "step": 10982 }, { "epoch": 0.33661272526664215, "grad_norm": 1.2647523558111464, "learning_rate": 7.730759496051778e-06, "loss": 0.752, "step": 10983 }, { "epoch": 0.33664337378938336, "grad_norm": 1.3978087739310792, "learning_rate": 7.730343724014726e-06, "loss": 0.627, "step": 10984 }, { "epoch": 0.33667402231212457, "grad_norm": 1.449086229625434, "learning_rate": 7.729927925074898e-06, "loss": 0.7221, "step": 10985 }, { "epoch": 0.33670467083486577, "grad_norm": 1.3213813217205697, "learning_rate": 7.729512099236394e-06, "loss": 0.6798, "step": 10986 }, { "epoch": 0.336735319357607, "grad_norm": 1.2837213284711129, "learning_rate": 7.729096246503307e-06, "loss": 0.7451, "step": 10987 }, { "epoch": 0.3367659678803482, "grad_norm": 1.2418446004972155, "learning_rate": 7.728680366879736e-06, "loss": 0.7106, "step": 10988 }, { "epoch": 0.3367966164030894, "grad_norm": 1.2610481122992643, "learning_rate": 7.728264460369781e-06, "loss": 0.6459, "step": 10989 }, { "epoch": 0.3368272649258306, "grad_norm": 1.312528366428491, "learning_rate": 7.727848526977535e-06, "loss": 0.6235, "step": 10990 }, { "epoch": 0.3368579134485718, "grad_norm": 1.2854022095071385, "learning_rate": 7.727432566707103e-06, "loss": 0.7135, "step": 10991 }, { "epoch": 0.336888561971313, "grad_norm": 1.0119979370847085, "learning_rate": 7.727016579562578e-06, "loss": 0.5329, "step": 10992 }, { "epoch": 0.3369192104940542, "grad_norm": 1.2763770425355956, "learning_rate": 7.726600565548061e-06, "loss": 0.6895, "step": 10993 }, { "epoch": 0.3369498590167954, "grad_norm": 1.9951257572272831, "learning_rate": 7.726184524667653e-06, "loss": 0.7286, "step": 10994 }, { "epoch": 0.33698050753953657, "grad_norm": 1.3508954114378227, "learning_rate": 7.72576845692545e-06, "loss": 0.6698, "step": 10995 }, { "epoch": 0.3370111560622778, "grad_norm": 1.3515442060212515, "learning_rate": 7.72535236232555e-06, "loss": 0.6733, "step": 10996 }, { "epoch": 0.337041804585019, "grad_norm": 1.2694160026746737, "learning_rate": 7.72493624087206e-06, "loss": 0.717, "step": 10997 }, { "epoch": 0.3370724531077602, "grad_norm": 1.374906584585336, "learning_rate": 7.724520092569075e-06, "loss": 0.6946, "step": 10998 }, { "epoch": 0.3371031016305014, "grad_norm": 1.3883914515854439, "learning_rate": 7.724103917420695e-06, "loss": 0.7552, "step": 10999 }, { "epoch": 0.3371337501532426, "grad_norm": 0.5058812644507713, "learning_rate": 7.723687715431024e-06, "loss": 0.4128, "step": 11000 }, { "epoch": 0.3371643986759838, "grad_norm": 1.6424804620433662, "learning_rate": 7.723271486604162e-06, "loss": 0.7415, "step": 11001 }, { "epoch": 0.337195047198725, "grad_norm": 1.3562800287540964, "learning_rate": 7.722855230944206e-06, "loss": 0.7309, "step": 11002 }, { "epoch": 0.3372256957214662, "grad_norm": 1.4121742207969807, "learning_rate": 7.722438948455263e-06, "loss": 0.7197, "step": 11003 }, { "epoch": 0.3372563442442074, "grad_norm": 1.2033949624585556, "learning_rate": 7.722022639141431e-06, "loss": 0.6722, "step": 11004 }, { "epoch": 0.3372869927669486, "grad_norm": 1.2688767724567571, "learning_rate": 7.721606303006815e-06, "loss": 0.6308, "step": 11005 }, { "epoch": 0.33731764128968983, "grad_norm": 1.1693910006005754, "learning_rate": 7.721189940055513e-06, "loss": 0.6803, "step": 11006 }, { "epoch": 0.33734828981243103, "grad_norm": 1.3063101205167558, "learning_rate": 7.720773550291634e-06, "loss": 0.6417, "step": 11007 }, { "epoch": 0.33737893833517224, "grad_norm": 1.328550491342817, "learning_rate": 7.720357133719274e-06, "loss": 0.6784, "step": 11008 }, { "epoch": 0.33740958685791345, "grad_norm": 1.2617373520225197, "learning_rate": 7.719940690342543e-06, "loss": 0.6647, "step": 11009 }, { "epoch": 0.33744023538065465, "grad_norm": 1.3651233009759536, "learning_rate": 7.719524220165537e-06, "loss": 0.7376, "step": 11010 }, { "epoch": 0.33747088390339586, "grad_norm": 1.5032229374675212, "learning_rate": 7.719107723192363e-06, "loss": 0.7786, "step": 11011 }, { "epoch": 0.33750153242613706, "grad_norm": 1.2436721511356812, "learning_rate": 7.718691199427126e-06, "loss": 0.6707, "step": 11012 }, { "epoch": 0.33753218094887827, "grad_norm": 1.505207252531559, "learning_rate": 7.718274648873929e-06, "loss": 0.6273, "step": 11013 }, { "epoch": 0.3375628294716195, "grad_norm": 1.2750453063524398, "learning_rate": 7.717858071536877e-06, "loss": 0.7394, "step": 11014 }, { "epoch": 0.3375934779943607, "grad_norm": 1.1031830019372266, "learning_rate": 7.717441467420072e-06, "loss": 0.6732, "step": 11015 }, { "epoch": 0.3376241265171019, "grad_norm": 1.2492660030622298, "learning_rate": 7.717024836527623e-06, "loss": 0.7086, "step": 11016 }, { "epoch": 0.3376547750398431, "grad_norm": 1.0434045393641984, "learning_rate": 7.716608178863631e-06, "loss": 0.6202, "step": 11017 }, { "epoch": 0.3376854235625843, "grad_norm": 1.1549442657465838, "learning_rate": 7.716191494432206e-06, "loss": 0.5699, "step": 11018 }, { "epoch": 0.3377160720853255, "grad_norm": 1.4636452982938082, "learning_rate": 7.71577478323745e-06, "loss": 0.722, "step": 11019 }, { "epoch": 0.3377467206080667, "grad_norm": 1.1774025204817729, "learning_rate": 7.71535804528347e-06, "loss": 0.6712, "step": 11020 }, { "epoch": 0.3377773691308079, "grad_norm": 1.2883850669047743, "learning_rate": 7.714941280574373e-06, "loss": 0.7286, "step": 11021 }, { "epoch": 0.3378080176535491, "grad_norm": 0.5546125089074253, "learning_rate": 7.714524489114264e-06, "loss": 0.4509, "step": 11022 }, { "epoch": 0.3378386661762903, "grad_norm": 1.091057031679343, "learning_rate": 7.714107670907252e-06, "loss": 0.6782, "step": 11023 }, { "epoch": 0.33786931469903153, "grad_norm": 1.1984760425591732, "learning_rate": 7.713690825957442e-06, "loss": 0.6463, "step": 11024 }, { "epoch": 0.33789996322177274, "grad_norm": 1.2604798559819639, "learning_rate": 7.713273954268942e-06, "loss": 0.7571, "step": 11025 }, { "epoch": 0.3379306117445139, "grad_norm": 1.19980904876096, "learning_rate": 7.712857055845859e-06, "loss": 0.5988, "step": 11026 }, { "epoch": 0.3379612602672551, "grad_norm": 1.2008981968399897, "learning_rate": 7.712440130692302e-06, "loss": 0.6273, "step": 11027 }, { "epoch": 0.3379919087899963, "grad_norm": 1.2313995235879642, "learning_rate": 7.712023178812378e-06, "loss": 0.7496, "step": 11028 }, { "epoch": 0.3380225573127375, "grad_norm": 1.2453707823619045, "learning_rate": 7.711606200210195e-06, "loss": 0.7192, "step": 11029 }, { "epoch": 0.3380532058354787, "grad_norm": 1.2579740349053976, "learning_rate": 7.711189194889864e-06, "loss": 0.7646, "step": 11030 }, { "epoch": 0.3380838543582199, "grad_norm": 1.3208739090212247, "learning_rate": 7.710772162855492e-06, "loss": 0.7193, "step": 11031 }, { "epoch": 0.3381145028809611, "grad_norm": 0.523743690188427, "learning_rate": 7.710355104111186e-06, "loss": 0.4441, "step": 11032 }, { "epoch": 0.3381451514037023, "grad_norm": 1.1787849272337791, "learning_rate": 7.70993801866106e-06, "loss": 0.6194, "step": 11033 }, { "epoch": 0.33817579992644353, "grad_norm": 1.2348998995383083, "learning_rate": 7.70952090650922e-06, "loss": 0.6553, "step": 11034 }, { "epoch": 0.33820644844918474, "grad_norm": 0.47725927014022096, "learning_rate": 7.709103767659779e-06, "loss": 0.4379, "step": 11035 }, { "epoch": 0.33823709697192594, "grad_norm": 1.2793297557812529, "learning_rate": 7.708686602116843e-06, "loss": 0.5918, "step": 11036 }, { "epoch": 0.33826774549466715, "grad_norm": 1.1158844681292954, "learning_rate": 7.708269409884528e-06, "loss": 0.6829, "step": 11037 }, { "epoch": 0.33829839401740835, "grad_norm": 1.1016626682619948, "learning_rate": 7.707852190966937e-06, "loss": 0.5971, "step": 11038 }, { "epoch": 0.33832904254014956, "grad_norm": 1.1776978216745628, "learning_rate": 7.70743494536819e-06, "loss": 0.7495, "step": 11039 }, { "epoch": 0.33835969106289077, "grad_norm": 1.1852264820317546, "learning_rate": 7.707017673092391e-06, "loss": 0.663, "step": 11040 }, { "epoch": 0.33839033958563197, "grad_norm": 1.1494084074028776, "learning_rate": 7.706600374143655e-06, "loss": 0.6905, "step": 11041 }, { "epoch": 0.3384209881083732, "grad_norm": 1.1340989396082564, "learning_rate": 7.706183048526095e-06, "loss": 0.6594, "step": 11042 }, { "epoch": 0.3384516366311144, "grad_norm": 1.1098722028367796, "learning_rate": 7.70576569624382e-06, "loss": 0.6294, "step": 11043 }, { "epoch": 0.3384822851538556, "grad_norm": 1.2036233606909552, "learning_rate": 7.705348317300943e-06, "loss": 0.7079, "step": 11044 }, { "epoch": 0.3385129336765968, "grad_norm": 1.317494888013875, "learning_rate": 7.704930911701575e-06, "loss": 0.7061, "step": 11045 }, { "epoch": 0.338543582199338, "grad_norm": 1.120448493399669, "learning_rate": 7.704513479449831e-06, "loss": 0.648, "step": 11046 }, { "epoch": 0.3385742307220792, "grad_norm": 1.2609220158660561, "learning_rate": 7.704096020549824e-06, "loss": 0.6049, "step": 11047 }, { "epoch": 0.3386048792448204, "grad_norm": 1.210838865955077, "learning_rate": 7.70367853500567e-06, "loss": 0.7163, "step": 11048 }, { "epoch": 0.3386355277675616, "grad_norm": 1.1849950734524135, "learning_rate": 7.703261022821476e-06, "loss": 0.6752, "step": 11049 }, { "epoch": 0.3386661762903028, "grad_norm": 1.4173167384929668, "learning_rate": 7.702843484001361e-06, "loss": 0.7113, "step": 11050 }, { "epoch": 0.33869682481304403, "grad_norm": 1.3202688908905535, "learning_rate": 7.70242591854944e-06, "loss": 0.6691, "step": 11051 }, { "epoch": 0.33872747333578523, "grad_norm": 1.3006573667211339, "learning_rate": 7.70200832646982e-06, "loss": 0.6988, "step": 11052 }, { "epoch": 0.33875812185852644, "grad_norm": 1.1889512509887723, "learning_rate": 7.701590707766624e-06, "loss": 0.6636, "step": 11053 }, { "epoch": 0.33878877038126765, "grad_norm": 1.2986760470340037, "learning_rate": 7.701173062443963e-06, "loss": 0.7355, "step": 11054 }, { "epoch": 0.33881941890400885, "grad_norm": 1.3414890091954528, "learning_rate": 7.700755390505952e-06, "loss": 0.7467, "step": 11055 }, { "epoch": 0.33885006742675006, "grad_norm": 0.5708450586351967, "learning_rate": 7.700337691956708e-06, "loss": 0.4508, "step": 11056 }, { "epoch": 0.3388807159494912, "grad_norm": 1.2711494606529357, "learning_rate": 7.699919966800344e-06, "loss": 0.6369, "step": 11057 }, { "epoch": 0.3389113644722324, "grad_norm": 1.4075686080274872, "learning_rate": 7.69950221504098e-06, "loss": 0.7251, "step": 11058 }, { "epoch": 0.3389420129949736, "grad_norm": 1.155760488095677, "learning_rate": 7.699084436682728e-06, "loss": 0.5214, "step": 11059 }, { "epoch": 0.3389726615177148, "grad_norm": 1.414876039921882, "learning_rate": 7.698666631729708e-06, "loss": 0.6371, "step": 11060 }, { "epoch": 0.33900331004045603, "grad_norm": 1.1913628176623567, "learning_rate": 7.698248800186035e-06, "loss": 0.6616, "step": 11061 }, { "epoch": 0.33903395856319724, "grad_norm": 1.1882799840268343, "learning_rate": 7.697830942055825e-06, "loss": 0.6903, "step": 11062 }, { "epoch": 0.33906460708593844, "grad_norm": 0.47591648465884673, "learning_rate": 7.697413057343198e-06, "loss": 0.4228, "step": 11063 }, { "epoch": 0.33909525560867965, "grad_norm": 1.3801779470877737, "learning_rate": 7.696995146052269e-06, "loss": 0.7477, "step": 11064 }, { "epoch": 0.33912590413142085, "grad_norm": 0.4687400260991524, "learning_rate": 7.696577208187157e-06, "loss": 0.4404, "step": 11065 }, { "epoch": 0.33915655265416206, "grad_norm": 1.35163026043883, "learning_rate": 7.69615924375198e-06, "loss": 0.693, "step": 11066 }, { "epoch": 0.33918720117690326, "grad_norm": 1.1631317298075576, "learning_rate": 7.695741252750857e-06, "loss": 0.5971, "step": 11067 }, { "epoch": 0.33921784969964447, "grad_norm": 1.2037803910495586, "learning_rate": 7.695323235187904e-06, "loss": 0.6242, "step": 11068 }, { "epoch": 0.3392484982223857, "grad_norm": 1.2174887736269617, "learning_rate": 7.694905191067241e-06, "loss": 0.6973, "step": 11069 }, { "epoch": 0.3392791467451269, "grad_norm": 1.3648074413507099, "learning_rate": 7.69448712039299e-06, "loss": 0.671, "step": 11070 }, { "epoch": 0.3393097952678681, "grad_norm": 0.4893209030780892, "learning_rate": 7.694069023169267e-06, "loss": 0.4383, "step": 11071 }, { "epoch": 0.3393404437906093, "grad_norm": 1.1622448215478258, "learning_rate": 7.69365089940019e-06, "loss": 0.7703, "step": 11072 }, { "epoch": 0.3393710923133505, "grad_norm": 1.5733343661602024, "learning_rate": 7.693232749089886e-06, "loss": 0.6719, "step": 11073 }, { "epoch": 0.3394017408360917, "grad_norm": 1.3946260340727734, "learning_rate": 7.69281457224247e-06, "loss": 0.7518, "step": 11074 }, { "epoch": 0.3394323893588329, "grad_norm": 1.1499680039870306, "learning_rate": 7.69239636886206e-06, "loss": 0.676, "step": 11075 }, { "epoch": 0.3394630378815741, "grad_norm": 1.219917477661171, "learning_rate": 7.691978138952782e-06, "loss": 0.7205, "step": 11076 }, { "epoch": 0.3394936864043153, "grad_norm": 1.132304029626606, "learning_rate": 7.691559882518753e-06, "loss": 0.7017, "step": 11077 }, { "epoch": 0.3395243349270565, "grad_norm": 1.1298363804071707, "learning_rate": 7.691141599564098e-06, "loss": 0.669, "step": 11078 }, { "epoch": 0.33955498344979773, "grad_norm": 1.1720186984992786, "learning_rate": 7.690723290092933e-06, "loss": 0.6849, "step": 11079 }, { "epoch": 0.33958563197253894, "grad_norm": 1.1274472539056317, "learning_rate": 7.690304954109387e-06, "loss": 0.6579, "step": 11080 }, { "epoch": 0.33961628049528014, "grad_norm": 1.3846979375132056, "learning_rate": 7.689886591617574e-06, "loss": 0.721, "step": 11081 }, { "epoch": 0.33964692901802135, "grad_norm": 1.1405816751915563, "learning_rate": 7.689468202621623e-06, "loss": 0.6328, "step": 11082 }, { "epoch": 0.33967757754076255, "grad_norm": 1.0719964705334977, "learning_rate": 7.689049787125654e-06, "loss": 0.6248, "step": 11083 }, { "epoch": 0.33970822606350376, "grad_norm": 1.189291543157103, "learning_rate": 7.688631345133787e-06, "loss": 0.66, "step": 11084 }, { "epoch": 0.33973887458624497, "grad_norm": 1.313550660390885, "learning_rate": 7.688212876650149e-06, "loss": 0.6769, "step": 11085 }, { "epoch": 0.33976952310898617, "grad_norm": 1.265413380920595, "learning_rate": 7.687794381678861e-06, "loss": 0.5923, "step": 11086 }, { "epoch": 0.3398001716317274, "grad_norm": 1.284928955641122, "learning_rate": 7.687375860224047e-06, "loss": 0.6908, "step": 11087 }, { "epoch": 0.3398308201544685, "grad_norm": 1.212032603050945, "learning_rate": 7.686957312289833e-06, "loss": 0.7131, "step": 11088 }, { "epoch": 0.33986146867720973, "grad_norm": 1.3150503231643478, "learning_rate": 7.686538737880339e-06, "loss": 0.6549, "step": 11089 }, { "epoch": 0.33989211719995094, "grad_norm": 1.2023043591292435, "learning_rate": 7.686120136999692e-06, "loss": 0.6728, "step": 11090 }, { "epoch": 0.33992276572269214, "grad_norm": 1.1736337136361878, "learning_rate": 7.685701509652017e-06, "loss": 0.6633, "step": 11091 }, { "epoch": 0.33995341424543335, "grad_norm": 1.2336591339794352, "learning_rate": 7.685282855841438e-06, "loss": 0.7517, "step": 11092 }, { "epoch": 0.33998406276817456, "grad_norm": 1.1478692701697986, "learning_rate": 7.684864175572078e-06, "loss": 0.649, "step": 11093 }, { "epoch": 0.34001471129091576, "grad_norm": 1.244366868427622, "learning_rate": 7.684445468848064e-06, "loss": 0.679, "step": 11094 }, { "epoch": 0.34004535981365697, "grad_norm": 1.197555303562272, "learning_rate": 7.684026735673525e-06, "loss": 0.5869, "step": 11095 }, { "epoch": 0.3400760083363982, "grad_norm": 1.2089874910357499, "learning_rate": 7.68360797605258e-06, "loss": 0.693, "step": 11096 }, { "epoch": 0.3401066568591394, "grad_norm": 1.1913717244868307, "learning_rate": 7.683189189989364e-06, "loss": 0.6539, "step": 11097 }, { "epoch": 0.3401373053818806, "grad_norm": 1.3960713365633306, "learning_rate": 7.682770377487995e-06, "loss": 0.8165, "step": 11098 }, { "epoch": 0.3401679539046218, "grad_norm": 1.089187416497781, "learning_rate": 7.682351538552603e-06, "loss": 0.6303, "step": 11099 }, { "epoch": 0.340198602427363, "grad_norm": 1.3032120617273422, "learning_rate": 7.681932673187315e-06, "loss": 0.7044, "step": 11100 }, { "epoch": 0.3402292509501042, "grad_norm": 1.1346421129693045, "learning_rate": 7.68151378139626e-06, "loss": 0.6994, "step": 11101 }, { "epoch": 0.3402598994728454, "grad_norm": 1.3065045519528062, "learning_rate": 7.681094863183562e-06, "loss": 0.6669, "step": 11102 }, { "epoch": 0.3402905479955866, "grad_norm": 0.601943658336679, "learning_rate": 7.68067591855335e-06, "loss": 0.4633, "step": 11103 }, { "epoch": 0.3403211965183278, "grad_norm": 1.301917825071638, "learning_rate": 7.680256947509754e-06, "loss": 0.7154, "step": 11104 }, { "epoch": 0.340351845041069, "grad_norm": 1.329012754234123, "learning_rate": 7.679837950056899e-06, "loss": 0.8069, "step": 11105 }, { "epoch": 0.34038249356381023, "grad_norm": 1.1704076445226619, "learning_rate": 7.679418926198915e-06, "loss": 0.6479, "step": 11106 }, { "epoch": 0.34041314208655143, "grad_norm": 1.3530869918258488, "learning_rate": 7.678999875939931e-06, "loss": 0.7346, "step": 11107 }, { "epoch": 0.34044379060929264, "grad_norm": 1.3434566502886922, "learning_rate": 7.678580799284077e-06, "loss": 0.6385, "step": 11108 }, { "epoch": 0.34047443913203385, "grad_norm": 1.118970069301795, "learning_rate": 7.67816169623548e-06, "loss": 0.6093, "step": 11109 }, { "epoch": 0.34050508765477505, "grad_norm": 0.48899486775909184, "learning_rate": 7.67774256679827e-06, "loss": 0.4276, "step": 11110 }, { "epoch": 0.34053573617751626, "grad_norm": 1.1102356211895272, "learning_rate": 7.677323410976577e-06, "loss": 0.6409, "step": 11111 }, { "epoch": 0.34056638470025746, "grad_norm": 1.3504474557434802, "learning_rate": 7.676904228774533e-06, "loss": 0.6322, "step": 11112 }, { "epoch": 0.34059703322299867, "grad_norm": 1.215643212658937, "learning_rate": 7.676485020196266e-06, "loss": 0.7676, "step": 11113 }, { "epoch": 0.3406276817457399, "grad_norm": 1.1853360885223825, "learning_rate": 7.676065785245909e-06, "loss": 0.7481, "step": 11114 }, { "epoch": 0.3406583302684811, "grad_norm": 1.2926172019484585, "learning_rate": 7.675646523927588e-06, "loss": 0.7129, "step": 11115 }, { "epoch": 0.3406889787912223, "grad_norm": 1.1563693496004706, "learning_rate": 7.67522723624544e-06, "loss": 0.6441, "step": 11116 }, { "epoch": 0.3407196273139635, "grad_norm": 1.2340034696849829, "learning_rate": 7.674807922203593e-06, "loss": 0.7147, "step": 11117 }, { "epoch": 0.3407502758367047, "grad_norm": 1.3324645141471554, "learning_rate": 7.674388581806179e-06, "loss": 0.6074, "step": 11118 }, { "epoch": 0.34078092435944585, "grad_norm": 1.2257446896659074, "learning_rate": 7.673969215057328e-06, "loss": 0.5814, "step": 11119 }, { "epoch": 0.34081157288218705, "grad_norm": 1.143667575929759, "learning_rate": 7.673549821961176e-06, "loss": 0.643, "step": 11120 }, { "epoch": 0.34084222140492826, "grad_norm": 1.454542693202651, "learning_rate": 7.673130402521854e-06, "loss": 0.7185, "step": 11121 }, { "epoch": 0.34087286992766946, "grad_norm": 1.2689011895499838, "learning_rate": 7.672710956743491e-06, "loss": 0.6349, "step": 11122 }, { "epoch": 0.34090351845041067, "grad_norm": 0.5256111173442383, "learning_rate": 7.672291484630226e-06, "loss": 0.4472, "step": 11123 }, { "epoch": 0.3409341669731519, "grad_norm": 2.6252855855475303, "learning_rate": 7.67187198618619e-06, "loss": 0.6829, "step": 11124 }, { "epoch": 0.3409648154958931, "grad_norm": 1.5020142704654083, "learning_rate": 7.671452461415514e-06, "loss": 0.6965, "step": 11125 }, { "epoch": 0.3409954640186343, "grad_norm": 1.201012980438778, "learning_rate": 7.671032910322333e-06, "loss": 0.6677, "step": 11126 }, { "epoch": 0.3410261125413755, "grad_norm": 1.3678547466542612, "learning_rate": 7.670613332910784e-06, "loss": 0.9, "step": 11127 }, { "epoch": 0.3410567610641167, "grad_norm": 1.2633906521083857, "learning_rate": 7.670193729184997e-06, "loss": 0.7293, "step": 11128 }, { "epoch": 0.3410874095868579, "grad_norm": 1.3270922985065958, "learning_rate": 7.669774099149107e-06, "loss": 0.7089, "step": 11129 }, { "epoch": 0.3411180581095991, "grad_norm": 0.4498780729047122, "learning_rate": 7.66935444280725e-06, "loss": 0.4274, "step": 11130 }, { "epoch": 0.3411487066323403, "grad_norm": 1.1264258759929653, "learning_rate": 7.66893476016356e-06, "loss": 0.6077, "step": 11131 }, { "epoch": 0.3411793551550815, "grad_norm": 1.13940364899582, "learning_rate": 7.668515051222175e-06, "loss": 0.5909, "step": 11132 }, { "epoch": 0.3412100036778227, "grad_norm": 1.3584290441870308, "learning_rate": 7.668095315987227e-06, "loss": 0.782, "step": 11133 }, { "epoch": 0.34124065220056393, "grad_norm": 1.2632978454118915, "learning_rate": 7.667675554462854e-06, "loss": 0.662, "step": 11134 }, { "epoch": 0.34127130072330514, "grad_norm": 1.133661899766304, "learning_rate": 7.66725576665319e-06, "loss": 0.6858, "step": 11135 }, { "epoch": 0.34130194924604634, "grad_norm": 1.1085534112639979, "learning_rate": 7.666835952562375e-06, "loss": 0.5221, "step": 11136 }, { "epoch": 0.34133259776878755, "grad_norm": 1.2465522291736015, "learning_rate": 7.66641611219454e-06, "loss": 0.6844, "step": 11137 }, { "epoch": 0.34136324629152875, "grad_norm": 1.2518901732631826, "learning_rate": 7.665996245553826e-06, "loss": 0.7133, "step": 11138 }, { "epoch": 0.34139389481426996, "grad_norm": 0.47487047701588136, "learning_rate": 7.665576352644369e-06, "loss": 0.4321, "step": 11139 }, { "epoch": 0.34142454333701117, "grad_norm": 1.4169209117123838, "learning_rate": 7.665156433470307e-06, "loss": 0.6636, "step": 11140 }, { "epoch": 0.34145519185975237, "grad_norm": 1.2211758248512548, "learning_rate": 7.664736488035776e-06, "loss": 0.6041, "step": 11141 }, { "epoch": 0.3414858403824936, "grad_norm": 0.4359499773400828, "learning_rate": 7.664316516344914e-06, "loss": 0.4138, "step": 11142 }, { "epoch": 0.3415164889052348, "grad_norm": 1.2631848577568112, "learning_rate": 7.66389651840186e-06, "loss": 0.5979, "step": 11143 }, { "epoch": 0.341547137427976, "grad_norm": 1.271320721660571, "learning_rate": 7.663476494210753e-06, "loss": 0.6651, "step": 11144 }, { "epoch": 0.3415777859507172, "grad_norm": 1.2025281501502427, "learning_rate": 7.663056443775729e-06, "loss": 0.6051, "step": 11145 }, { "epoch": 0.3416084344734584, "grad_norm": 1.3103557618286639, "learning_rate": 7.66263636710093e-06, "loss": 0.6082, "step": 11146 }, { "epoch": 0.3416390829961996, "grad_norm": 1.1884476868296194, "learning_rate": 7.662216264190494e-06, "loss": 0.5937, "step": 11147 }, { "epoch": 0.3416697315189408, "grad_norm": 1.2752623741648035, "learning_rate": 7.661796135048559e-06, "loss": 0.6902, "step": 11148 }, { "epoch": 0.341700380041682, "grad_norm": 0.48174912977504575, "learning_rate": 7.661375979679265e-06, "loss": 0.4163, "step": 11149 }, { "epoch": 0.34173102856442317, "grad_norm": 1.5476372980229713, "learning_rate": 7.660955798086754e-06, "loss": 0.75, "step": 11150 }, { "epoch": 0.3417616770871644, "grad_norm": 1.2291583943719313, "learning_rate": 7.660535590275163e-06, "loss": 0.6495, "step": 11151 }, { "epoch": 0.3417923256099056, "grad_norm": 1.3646879186235783, "learning_rate": 7.660115356248635e-06, "loss": 0.658, "step": 11152 }, { "epoch": 0.3418229741326468, "grad_norm": 1.2715650660396849, "learning_rate": 7.659695096011308e-06, "loss": 0.5887, "step": 11153 }, { "epoch": 0.341853622655388, "grad_norm": 1.269404714299181, "learning_rate": 7.659274809567327e-06, "loss": 0.7239, "step": 11154 }, { "epoch": 0.3418842711781292, "grad_norm": 1.290555469142059, "learning_rate": 7.658854496920831e-06, "loss": 0.7336, "step": 11155 }, { "epoch": 0.3419149197008704, "grad_norm": 1.4378823888314742, "learning_rate": 7.658434158075961e-06, "loss": 0.6733, "step": 11156 }, { "epoch": 0.3419455682236116, "grad_norm": 1.3248709351869032, "learning_rate": 7.658013793036858e-06, "loss": 0.6726, "step": 11157 }, { "epoch": 0.3419762167463528, "grad_norm": 1.2697898399224332, "learning_rate": 7.657593401807667e-06, "loss": 0.677, "step": 11158 }, { "epoch": 0.342006865269094, "grad_norm": 1.3194064018161924, "learning_rate": 7.657172984392526e-06, "loss": 0.7371, "step": 11159 }, { "epoch": 0.3420375137918352, "grad_norm": 1.3092098812820288, "learning_rate": 7.65675254079558e-06, "loss": 0.6012, "step": 11160 }, { "epoch": 0.34206816231457643, "grad_norm": 1.1611461321590861, "learning_rate": 7.656332071020972e-06, "loss": 0.6157, "step": 11161 }, { "epoch": 0.34209881083731764, "grad_norm": 1.1610282400582583, "learning_rate": 7.655911575072846e-06, "loss": 0.598, "step": 11162 }, { "epoch": 0.34212945936005884, "grad_norm": 1.1182544260885667, "learning_rate": 7.655491052955341e-06, "loss": 0.6572, "step": 11163 }, { "epoch": 0.34216010788280005, "grad_norm": 1.3155999984804385, "learning_rate": 7.655070504672605e-06, "loss": 0.6766, "step": 11164 }, { "epoch": 0.34219075640554125, "grad_norm": 1.1971178084410363, "learning_rate": 7.65464993022878e-06, "loss": 0.7446, "step": 11165 }, { "epoch": 0.34222140492828246, "grad_norm": 1.4346678726977267, "learning_rate": 7.654229329628007e-06, "loss": 0.6683, "step": 11166 }, { "epoch": 0.34225205345102366, "grad_norm": 1.6322831723399953, "learning_rate": 7.653808702874436e-06, "loss": 0.7163, "step": 11167 }, { "epoch": 0.34228270197376487, "grad_norm": 1.2517601254717268, "learning_rate": 7.65338804997221e-06, "loss": 0.7589, "step": 11168 }, { "epoch": 0.3423133504965061, "grad_norm": 1.3043769951252049, "learning_rate": 7.652967370925471e-06, "loss": 0.6871, "step": 11169 }, { "epoch": 0.3423439990192473, "grad_norm": 1.3569990931816514, "learning_rate": 7.652546665738368e-06, "loss": 0.4289, "step": 11170 }, { "epoch": 0.3423746475419885, "grad_norm": 1.3721804054856865, "learning_rate": 7.65212593441504e-06, "loss": 0.6931, "step": 11171 }, { "epoch": 0.3424052960647297, "grad_norm": 0.5423257175707266, "learning_rate": 7.651705176959638e-06, "loss": 0.4348, "step": 11172 }, { "epoch": 0.3424359445874709, "grad_norm": 1.2279147824590557, "learning_rate": 7.651284393376307e-06, "loss": 0.6678, "step": 11173 }, { "epoch": 0.3424665931102121, "grad_norm": 1.1889122477225897, "learning_rate": 7.650863583669193e-06, "loss": 0.611, "step": 11174 }, { "epoch": 0.3424972416329533, "grad_norm": 1.3961066636087167, "learning_rate": 7.650442747842442e-06, "loss": 0.6557, "step": 11175 }, { "epoch": 0.3425278901556945, "grad_norm": 1.2714147091266346, "learning_rate": 7.6500218859002e-06, "loss": 0.6606, "step": 11176 }, { "epoch": 0.3425585386784357, "grad_norm": 1.29640851537591, "learning_rate": 7.649600997846613e-06, "loss": 0.7543, "step": 11177 }, { "epoch": 0.3425891872011769, "grad_norm": 1.1246144068345696, "learning_rate": 7.649180083685832e-06, "loss": 0.6426, "step": 11178 }, { "epoch": 0.34261983572391813, "grad_norm": 0.4327336932512467, "learning_rate": 7.648759143422e-06, "loss": 0.4365, "step": 11179 }, { "epoch": 0.34265048424665934, "grad_norm": 5.176406870104054, "learning_rate": 7.648338177059266e-06, "loss": 0.7035, "step": 11180 }, { "epoch": 0.3426811327694005, "grad_norm": 1.1830043147252833, "learning_rate": 7.64791718460178e-06, "loss": 0.752, "step": 11181 }, { "epoch": 0.3427117812921417, "grad_norm": 1.1517750027000506, "learning_rate": 7.647496166053687e-06, "loss": 0.6129, "step": 11182 }, { "epoch": 0.3427424298148829, "grad_norm": 1.2080363415763469, "learning_rate": 7.647075121419139e-06, "loss": 0.7349, "step": 11183 }, { "epoch": 0.3427730783376241, "grad_norm": 1.2833943261114515, "learning_rate": 7.646654050702281e-06, "loss": 0.7013, "step": 11184 }, { "epoch": 0.3428037268603653, "grad_norm": 1.2182145223360596, "learning_rate": 7.646232953907262e-06, "loss": 0.6905, "step": 11185 }, { "epoch": 0.3428343753831065, "grad_norm": 1.2655462325762818, "learning_rate": 7.645811831038235e-06, "loss": 0.6731, "step": 11186 }, { "epoch": 0.3428650239058477, "grad_norm": 1.180720813463141, "learning_rate": 7.645390682099346e-06, "loss": 0.6714, "step": 11187 }, { "epoch": 0.3428956724285889, "grad_norm": 1.2123859107158264, "learning_rate": 7.644969507094747e-06, "loss": 0.6925, "step": 11188 }, { "epoch": 0.34292632095133013, "grad_norm": 1.3202158875404786, "learning_rate": 7.644548306028585e-06, "loss": 0.6683, "step": 11189 }, { "epoch": 0.34295696947407134, "grad_norm": 0.4754389180237252, "learning_rate": 7.644127078905013e-06, "loss": 0.4069, "step": 11190 }, { "epoch": 0.34298761799681254, "grad_norm": 1.3675125367835008, "learning_rate": 7.643705825728178e-06, "loss": 0.7188, "step": 11191 }, { "epoch": 0.34301826651955375, "grad_norm": 1.140083018769128, "learning_rate": 7.643284546502237e-06, "loss": 0.6112, "step": 11192 }, { "epoch": 0.34304891504229496, "grad_norm": 1.206391883454162, "learning_rate": 7.642863241231332e-06, "loss": 0.637, "step": 11193 }, { "epoch": 0.34307956356503616, "grad_norm": 1.3402420982457883, "learning_rate": 7.642441909919625e-06, "loss": 0.6091, "step": 11194 }, { "epoch": 0.34311021208777737, "grad_norm": 1.2547946771789869, "learning_rate": 7.64202055257126e-06, "loss": 0.6673, "step": 11195 }, { "epoch": 0.34314086061051857, "grad_norm": 1.3499529503496626, "learning_rate": 7.641599169190388e-06, "loss": 0.7055, "step": 11196 }, { "epoch": 0.3431715091332598, "grad_norm": 0.4723063055944071, "learning_rate": 7.641177759781167e-06, "loss": 0.402, "step": 11197 }, { "epoch": 0.343202157656001, "grad_norm": 1.157255573778667, "learning_rate": 7.640756324347743e-06, "loss": 0.7005, "step": 11198 }, { "epoch": 0.3432328061787422, "grad_norm": 1.3415301273351392, "learning_rate": 7.640334862894273e-06, "loss": 0.7551, "step": 11199 }, { "epoch": 0.3432634547014834, "grad_norm": 1.5269824920443398, "learning_rate": 7.639913375424906e-06, "loss": 0.6966, "step": 11200 }, { "epoch": 0.3432941032242246, "grad_norm": 1.2029755914905063, "learning_rate": 7.639491861943799e-06, "loss": 0.6617, "step": 11201 }, { "epoch": 0.3433247517469658, "grad_norm": 1.2149907662816934, "learning_rate": 7.639070322455101e-06, "loss": 0.6863, "step": 11202 }, { "epoch": 0.343355400269707, "grad_norm": 1.3469261030477326, "learning_rate": 7.63864875696297e-06, "loss": 0.6622, "step": 11203 }, { "epoch": 0.3433860487924482, "grad_norm": 1.3094145552907988, "learning_rate": 7.638227165471559e-06, "loss": 0.7186, "step": 11204 }, { "epoch": 0.3434166973151894, "grad_norm": 0.5086495479027147, "learning_rate": 7.637805547985018e-06, "loss": 0.4394, "step": 11205 }, { "epoch": 0.34344734583793063, "grad_norm": 2.236863985887316, "learning_rate": 7.637383904507505e-06, "loss": 0.5802, "step": 11206 }, { "epoch": 0.34347799436067183, "grad_norm": 0.47240827873061747, "learning_rate": 7.636962235043173e-06, "loss": 0.438, "step": 11207 }, { "epoch": 0.34350864288341304, "grad_norm": 1.3226072709787027, "learning_rate": 7.636540539596178e-06, "loss": 0.6713, "step": 11208 }, { "epoch": 0.34353929140615425, "grad_norm": 1.1605723427080201, "learning_rate": 7.636118818170675e-06, "loss": 0.6787, "step": 11209 }, { "epoch": 0.34356993992889545, "grad_norm": 1.1914998853489756, "learning_rate": 7.635697070770818e-06, "loss": 0.6689, "step": 11210 }, { "epoch": 0.34360058845163666, "grad_norm": 1.1492033504932104, "learning_rate": 7.635275297400764e-06, "loss": 0.6523, "step": 11211 }, { "epoch": 0.3436312369743778, "grad_norm": 1.194573180920306, "learning_rate": 7.634853498064667e-06, "loss": 0.7624, "step": 11212 }, { "epoch": 0.343661885497119, "grad_norm": 1.1963205411352205, "learning_rate": 7.634431672766685e-06, "loss": 0.6768, "step": 11213 }, { "epoch": 0.3436925340198602, "grad_norm": 1.236683297082198, "learning_rate": 7.634009821510974e-06, "loss": 0.6742, "step": 11214 }, { "epoch": 0.3437231825426014, "grad_norm": 1.3054816769270234, "learning_rate": 7.63358794430169e-06, "loss": 0.6831, "step": 11215 }, { "epoch": 0.34375383106534263, "grad_norm": 1.3491132669397072, "learning_rate": 7.63316604114299e-06, "loss": 0.7016, "step": 11216 }, { "epoch": 0.34378447958808384, "grad_norm": 1.1289174816341976, "learning_rate": 7.632744112039033e-06, "loss": 0.6089, "step": 11217 }, { "epoch": 0.34381512811082504, "grad_norm": 1.3706413066576613, "learning_rate": 7.632322156993972e-06, "loss": 0.7066, "step": 11218 }, { "epoch": 0.34384577663356625, "grad_norm": 1.3120302458980175, "learning_rate": 7.631900176011968e-06, "loss": 0.6998, "step": 11219 }, { "epoch": 0.34387642515630745, "grad_norm": 1.4062504627350025, "learning_rate": 7.63147816909718e-06, "loss": 0.7391, "step": 11220 }, { "epoch": 0.34390707367904866, "grad_norm": 1.2653027845083207, "learning_rate": 7.631056136253762e-06, "loss": 0.7111, "step": 11221 }, { "epoch": 0.34393772220178986, "grad_norm": 1.2501681566886833, "learning_rate": 7.630634077485875e-06, "loss": 0.7082, "step": 11222 }, { "epoch": 0.34396837072453107, "grad_norm": 1.4009736294399515, "learning_rate": 7.630211992797679e-06, "loss": 0.7517, "step": 11223 }, { "epoch": 0.3439990192472723, "grad_norm": 1.243515059311429, "learning_rate": 7.62978988219333e-06, "loss": 0.7631, "step": 11224 }, { "epoch": 0.3440296677700135, "grad_norm": 1.4370677035211048, "learning_rate": 7.629367745676989e-06, "loss": 0.6905, "step": 11225 }, { "epoch": 0.3440603162927547, "grad_norm": 1.180378309320595, "learning_rate": 7.628945583252814e-06, "loss": 0.6124, "step": 11226 }, { "epoch": 0.3440909648154959, "grad_norm": 1.4481401353995804, "learning_rate": 7.628523394924966e-06, "loss": 0.7514, "step": 11227 }, { "epoch": 0.3441216133382371, "grad_norm": 1.2278222729915091, "learning_rate": 7.628101180697606e-06, "loss": 0.7711, "step": 11228 }, { "epoch": 0.3441522618609783, "grad_norm": 1.2877407322261245, "learning_rate": 7.62767894057489e-06, "loss": 0.6306, "step": 11229 }, { "epoch": 0.3441829103837195, "grad_norm": 1.126977261445393, "learning_rate": 7.627256674560983e-06, "loss": 0.6672, "step": 11230 }, { "epoch": 0.3442135589064607, "grad_norm": 1.2697812411615415, "learning_rate": 7.626834382660042e-06, "loss": 0.6328, "step": 11231 }, { "epoch": 0.3442442074292019, "grad_norm": 1.2920949132715036, "learning_rate": 7.626412064876231e-06, "loss": 0.605, "step": 11232 }, { "epoch": 0.3442748559519431, "grad_norm": 1.21014304195542, "learning_rate": 7.625989721213709e-06, "loss": 0.6073, "step": 11233 }, { "epoch": 0.34430550447468433, "grad_norm": 1.2980037751055704, "learning_rate": 7.62556735167664e-06, "loss": 0.7138, "step": 11234 }, { "epoch": 0.34433615299742554, "grad_norm": 1.3008665145513278, "learning_rate": 7.625144956269183e-06, "loss": 0.6668, "step": 11235 }, { "epoch": 0.34436680152016674, "grad_norm": 0.7103510602563391, "learning_rate": 7.6247225349955016e-06, "loss": 0.4496, "step": 11236 }, { "epoch": 0.34439745004290795, "grad_norm": 1.4861719228731336, "learning_rate": 7.624300087859757e-06, "loss": 0.7212, "step": 11237 }, { "epoch": 0.34442809856564915, "grad_norm": 1.3113463271557435, "learning_rate": 7.623877614866111e-06, "loss": 0.6868, "step": 11238 }, { "epoch": 0.34445874708839036, "grad_norm": 1.2674973382437473, "learning_rate": 7.62345511601873e-06, "loss": 0.7058, "step": 11239 }, { "epoch": 0.34448939561113157, "grad_norm": 1.1572887813507609, "learning_rate": 7.623032591321773e-06, "loss": 0.7114, "step": 11240 }, { "epoch": 0.34452004413387277, "grad_norm": 1.4201208759645643, "learning_rate": 7.6226100407794055e-06, "loss": 0.7284, "step": 11241 }, { "epoch": 0.344550692656614, "grad_norm": 1.232958311235271, "learning_rate": 7.62218746439579e-06, "loss": 0.7087, "step": 11242 }, { "epoch": 0.3445813411793552, "grad_norm": 1.3834902143429884, "learning_rate": 7.62176486217509e-06, "loss": 0.6587, "step": 11243 }, { "epoch": 0.34461198970209633, "grad_norm": 1.0701852108861352, "learning_rate": 7.6213422341214695e-06, "loss": 0.6237, "step": 11244 }, { "epoch": 0.34464263822483754, "grad_norm": 1.4210808387018483, "learning_rate": 7.620919580239094e-06, "loss": 0.6575, "step": 11245 }, { "epoch": 0.34467328674757874, "grad_norm": 1.3860489557344788, "learning_rate": 7.6204969005321284e-06, "loss": 0.704, "step": 11246 }, { "epoch": 0.34470393527031995, "grad_norm": 1.253087454181457, "learning_rate": 7.620074195004734e-06, "loss": 0.7084, "step": 11247 }, { "epoch": 0.34473458379306116, "grad_norm": 1.546422103326402, "learning_rate": 7.619651463661081e-06, "loss": 0.7244, "step": 11248 }, { "epoch": 0.34476523231580236, "grad_norm": 1.2727463004943, "learning_rate": 7.619228706505329e-06, "loss": 0.6247, "step": 11249 }, { "epoch": 0.34479588083854357, "grad_norm": 0.7935400580692409, "learning_rate": 7.618805923541648e-06, "loss": 0.4447, "step": 11250 }, { "epoch": 0.3448265293612848, "grad_norm": 1.3017724136924962, "learning_rate": 7.618383114774203e-06, "loss": 0.676, "step": 11251 }, { "epoch": 0.344857177884026, "grad_norm": 1.4882678278467787, "learning_rate": 7.617960280207158e-06, "loss": 0.7366, "step": 11252 }, { "epoch": 0.3448878264067672, "grad_norm": 1.2524920277758058, "learning_rate": 7.6175374198446805e-06, "loss": 0.7142, "step": 11253 }, { "epoch": 0.3449184749295084, "grad_norm": 1.30514817221804, "learning_rate": 7.617114533690937e-06, "loss": 0.6565, "step": 11254 }, { "epoch": 0.3449491234522496, "grad_norm": 0.45933491192673565, "learning_rate": 7.6166916217500945e-06, "loss": 0.4381, "step": 11255 }, { "epoch": 0.3449797719749908, "grad_norm": 1.159589855086836, "learning_rate": 7.6162686840263204e-06, "loss": 0.6451, "step": 11256 }, { "epoch": 0.345010420497732, "grad_norm": 1.2985177857742671, "learning_rate": 7.6158457205237804e-06, "loss": 0.7435, "step": 11257 }, { "epoch": 0.3450410690204732, "grad_norm": 1.440241970751857, "learning_rate": 7.6154227312466445e-06, "loss": 0.8047, "step": 11258 }, { "epoch": 0.3450717175432144, "grad_norm": 1.1296157742249533, "learning_rate": 7.61499971619908e-06, "loss": 0.5709, "step": 11259 }, { "epoch": 0.3451023660659556, "grad_norm": 1.2438179125615616, "learning_rate": 7.614576675385253e-06, "loss": 0.664, "step": 11260 }, { "epoch": 0.34513301458869683, "grad_norm": 1.3187377737222556, "learning_rate": 7.614153608809332e-06, "loss": 0.7094, "step": 11261 }, { "epoch": 0.34516366311143803, "grad_norm": 1.325585329783343, "learning_rate": 7.613730516475487e-06, "loss": 0.594, "step": 11262 }, { "epoch": 0.34519431163417924, "grad_norm": 1.1130970718088884, "learning_rate": 7.613307398387888e-06, "loss": 0.6295, "step": 11263 }, { "epoch": 0.34522496015692045, "grad_norm": 1.0524850710185163, "learning_rate": 7.612884254550701e-06, "loss": 0.6312, "step": 11264 }, { "epoch": 0.34525560867966165, "grad_norm": 1.3123876084284152, "learning_rate": 7.612461084968099e-06, "loss": 0.7388, "step": 11265 }, { "epoch": 0.34528625720240286, "grad_norm": 1.217747606470548, "learning_rate": 7.612037889644247e-06, "loss": 0.6066, "step": 11266 }, { "epoch": 0.34531690572514406, "grad_norm": 1.4448968797852138, "learning_rate": 7.611614668583321e-06, "loss": 0.7424, "step": 11267 }, { "epoch": 0.34534755424788527, "grad_norm": 0.705587068998308, "learning_rate": 7.611191421789483e-06, "loss": 0.4152, "step": 11268 }, { "epoch": 0.3453782027706265, "grad_norm": 1.2076301596453425, "learning_rate": 7.61076814926691e-06, "loss": 0.6124, "step": 11269 }, { "epoch": 0.3454088512933677, "grad_norm": 1.2335971229949463, "learning_rate": 7.61034485101977e-06, "loss": 0.6285, "step": 11270 }, { "epoch": 0.3454394998161089, "grad_norm": 1.1248627387786667, "learning_rate": 7.6099215270522344e-06, "loss": 0.612, "step": 11271 }, { "epoch": 0.3454701483388501, "grad_norm": 1.3800050085737376, "learning_rate": 7.609498177368475e-06, "loss": 0.7202, "step": 11272 }, { "epoch": 0.3455007968615913, "grad_norm": 1.2911684069533609, "learning_rate": 7.6090748019726624e-06, "loss": 0.6986, "step": 11273 }, { "epoch": 0.3455314453843325, "grad_norm": 1.2688042652773495, "learning_rate": 7.608651400868967e-06, "loss": 0.767, "step": 11274 }, { "epoch": 0.34556209390707365, "grad_norm": 1.1655205569051872, "learning_rate": 7.608227974061562e-06, "loss": 0.6723, "step": 11275 }, { "epoch": 0.34559274242981486, "grad_norm": 0.5215935744188473, "learning_rate": 7.60780452155462e-06, "loss": 0.453, "step": 11276 }, { "epoch": 0.34562339095255606, "grad_norm": 1.3220690808992306, "learning_rate": 7.6073810433523125e-06, "loss": 0.6786, "step": 11277 }, { "epoch": 0.34565403947529727, "grad_norm": 1.2060449533995585, "learning_rate": 7.606957539458813e-06, "loss": 0.5188, "step": 11278 }, { "epoch": 0.3456846879980385, "grad_norm": 1.1371632934046538, "learning_rate": 7.606534009878293e-06, "loss": 0.5764, "step": 11279 }, { "epoch": 0.3457153365207797, "grad_norm": 0.4686652329710627, "learning_rate": 7.606110454614928e-06, "loss": 0.4066, "step": 11280 }, { "epoch": 0.3457459850435209, "grad_norm": 1.2734072307013395, "learning_rate": 7.605686873672887e-06, "loss": 0.6976, "step": 11281 }, { "epoch": 0.3457766335662621, "grad_norm": 0.4726965953794728, "learning_rate": 7.605263267056349e-06, "loss": 0.4261, "step": 11282 }, { "epoch": 0.3458072820890033, "grad_norm": 0.4462039604684606, "learning_rate": 7.604839634769485e-06, "loss": 0.419, "step": 11283 }, { "epoch": 0.3458379306117445, "grad_norm": 1.1219448735348163, "learning_rate": 7.60441597681647e-06, "loss": 0.664, "step": 11284 }, { "epoch": 0.3458685791344857, "grad_norm": 1.2480322549043816, "learning_rate": 7.603992293201476e-06, "loss": 0.6588, "step": 11285 }, { "epoch": 0.3458992276572269, "grad_norm": 1.1553394570716036, "learning_rate": 7.603568583928682e-06, "loss": 0.575, "step": 11286 }, { "epoch": 0.3459298761799681, "grad_norm": 1.3378712236127697, "learning_rate": 7.6031448490022595e-06, "loss": 0.7432, "step": 11287 }, { "epoch": 0.3459605247027093, "grad_norm": 1.1963219639891995, "learning_rate": 7.602721088426385e-06, "loss": 0.6941, "step": 11288 }, { "epoch": 0.34599117322545053, "grad_norm": 1.1215059071223477, "learning_rate": 7.602297302205234e-06, "loss": 0.5421, "step": 11289 }, { "epoch": 0.34602182174819174, "grad_norm": 1.2730696519190945, "learning_rate": 7.601873490342982e-06, "loss": 0.656, "step": 11290 }, { "epoch": 0.34605247027093294, "grad_norm": 0.56200705739869, "learning_rate": 7.601449652843804e-06, "loss": 0.4161, "step": 11291 }, { "epoch": 0.34608311879367415, "grad_norm": 1.2501341808804962, "learning_rate": 7.601025789711877e-06, "loss": 0.6956, "step": 11292 }, { "epoch": 0.34611376731641535, "grad_norm": 1.3818799697718922, "learning_rate": 7.6006019009513775e-06, "loss": 0.7599, "step": 11293 }, { "epoch": 0.34614441583915656, "grad_norm": 1.197700070360127, "learning_rate": 7.600177986566483e-06, "loss": 0.6045, "step": 11294 }, { "epoch": 0.34617506436189777, "grad_norm": 1.3116646784276962, "learning_rate": 7.5997540465613686e-06, "loss": 0.6583, "step": 11295 }, { "epoch": 0.34620571288463897, "grad_norm": 1.3217362973375406, "learning_rate": 7.599330080940212e-06, "loss": 0.6611, "step": 11296 }, { "epoch": 0.3462363614073802, "grad_norm": 1.3390818090619094, "learning_rate": 7.598906089707192e-06, "loss": 0.7388, "step": 11297 }, { "epoch": 0.3462670099301214, "grad_norm": 1.1395365939550188, "learning_rate": 7.598482072866485e-06, "loss": 0.658, "step": 11298 }, { "epoch": 0.3462976584528626, "grad_norm": 1.2580384975875154, "learning_rate": 7.598058030422269e-06, "loss": 0.6635, "step": 11299 }, { "epoch": 0.3463283069756038, "grad_norm": 1.2065647213135986, "learning_rate": 7.597633962378722e-06, "loss": 0.6075, "step": 11300 }, { "epoch": 0.346358955498345, "grad_norm": 1.1529190108826937, "learning_rate": 7.597209868740024e-06, "loss": 0.6208, "step": 11301 }, { "epoch": 0.3463896040210862, "grad_norm": 1.136910590483231, "learning_rate": 7.596785749510352e-06, "loss": 0.6809, "step": 11302 }, { "epoch": 0.3464202525438274, "grad_norm": 1.1469981159570577, "learning_rate": 7.596361604693886e-06, "loss": 0.694, "step": 11303 }, { "epoch": 0.3464509010665686, "grad_norm": 1.2376510432378116, "learning_rate": 7.595937434294804e-06, "loss": 0.7272, "step": 11304 }, { "epoch": 0.3464815495893098, "grad_norm": 1.2665827922809307, "learning_rate": 7.595513238317288e-06, "loss": 0.6783, "step": 11305 }, { "epoch": 0.346512198112051, "grad_norm": 1.1782548850326988, "learning_rate": 7.595089016765516e-06, "loss": 0.5631, "step": 11306 }, { "epoch": 0.3465428466347922, "grad_norm": 2.924865045959349, "learning_rate": 7.5946647696436645e-06, "loss": 0.7046, "step": 11307 }, { "epoch": 0.3465734951575334, "grad_norm": 1.2726983594196575, "learning_rate": 7.59424049695592e-06, "loss": 0.6716, "step": 11308 }, { "epoch": 0.3466041436802746, "grad_norm": 1.1667622325800866, "learning_rate": 7.593816198706461e-06, "loss": 0.6423, "step": 11309 }, { "epoch": 0.3466347922030158, "grad_norm": 1.2681454033612087, "learning_rate": 7.593391874899465e-06, "loss": 0.6742, "step": 11310 }, { "epoch": 0.346665440725757, "grad_norm": 0.567203990920906, "learning_rate": 7.592967525539117e-06, "loss": 0.4416, "step": 11311 }, { "epoch": 0.3466960892484982, "grad_norm": 1.2417352754558253, "learning_rate": 7.592543150629595e-06, "loss": 0.6943, "step": 11312 }, { "epoch": 0.3467267377712394, "grad_norm": 1.1949856953106228, "learning_rate": 7.5921187501750836e-06, "loss": 0.7202, "step": 11313 }, { "epoch": 0.3467573862939806, "grad_norm": 0.48883610989973714, "learning_rate": 7.591694324179761e-06, "loss": 0.4294, "step": 11314 }, { "epoch": 0.3467880348167218, "grad_norm": 1.2263864757523357, "learning_rate": 7.591269872647813e-06, "loss": 0.678, "step": 11315 }, { "epoch": 0.34681868333946303, "grad_norm": 1.339754689299679, "learning_rate": 7.590845395583418e-06, "loss": 0.7332, "step": 11316 }, { "epoch": 0.34684933186220424, "grad_norm": 1.2893964401053755, "learning_rate": 7.590420892990763e-06, "loss": 0.7238, "step": 11317 }, { "epoch": 0.34687998038494544, "grad_norm": 1.3242056355685703, "learning_rate": 7.589996364874026e-06, "loss": 0.6531, "step": 11318 }, { "epoch": 0.34691062890768665, "grad_norm": 1.338795742087412, "learning_rate": 7.5895718112373925e-06, "loss": 0.7882, "step": 11319 }, { "epoch": 0.34694127743042785, "grad_norm": 1.6170040855521468, "learning_rate": 7.589147232085046e-06, "loss": 0.7374, "step": 11320 }, { "epoch": 0.34697192595316906, "grad_norm": 1.150739241383086, "learning_rate": 7.58872262742117e-06, "loss": 0.6794, "step": 11321 }, { "epoch": 0.34700257447591026, "grad_norm": 0.5617507899365919, "learning_rate": 7.588297997249946e-06, "loss": 0.4377, "step": 11322 }, { "epoch": 0.34703322299865147, "grad_norm": 0.625340266191565, "learning_rate": 7.58787334157556e-06, "loss": 0.42, "step": 11323 }, { "epoch": 0.3470638715213927, "grad_norm": 1.7021863122808487, "learning_rate": 7.587448660402197e-06, "loss": 0.701, "step": 11324 }, { "epoch": 0.3470945200441339, "grad_norm": 0.44036065572385985, "learning_rate": 7.587023953734041e-06, "loss": 0.4247, "step": 11325 }, { "epoch": 0.3471251685668751, "grad_norm": 1.442018314594026, "learning_rate": 7.5865992215752725e-06, "loss": 0.5662, "step": 11326 }, { "epoch": 0.3471558170896163, "grad_norm": 1.1853615387887013, "learning_rate": 7.5861744639300825e-06, "loss": 0.6079, "step": 11327 }, { "epoch": 0.3471864656123575, "grad_norm": 1.1374060221168603, "learning_rate": 7.585749680802654e-06, "loss": 0.6445, "step": 11328 }, { "epoch": 0.3472171141350987, "grad_norm": 1.269340130133271, "learning_rate": 7.58532487219717e-06, "loss": 0.6693, "step": 11329 }, { "epoch": 0.3472477626578399, "grad_norm": 1.243962236577875, "learning_rate": 7.584900038117822e-06, "loss": 0.6865, "step": 11330 }, { "epoch": 0.3472784111805811, "grad_norm": 0.6161119895135322, "learning_rate": 7.584475178568789e-06, "loss": 0.453, "step": 11331 }, { "epoch": 0.3473090597033223, "grad_norm": 1.2181187630729724, "learning_rate": 7.584050293554264e-06, "loss": 0.7034, "step": 11332 }, { "epoch": 0.3473397082260635, "grad_norm": 1.2107734218841966, "learning_rate": 7.583625383078428e-06, "loss": 0.7147, "step": 11333 }, { "epoch": 0.34737035674880473, "grad_norm": 1.2299416937577532, "learning_rate": 7.583200447145471e-06, "loss": 0.6268, "step": 11334 }, { "epoch": 0.34740100527154594, "grad_norm": 1.2663867803359206, "learning_rate": 7.582775485759579e-06, "loss": 0.6371, "step": 11335 }, { "epoch": 0.34743165379428714, "grad_norm": 1.1960635678166531, "learning_rate": 7.58235049892494e-06, "loss": 0.5786, "step": 11336 }, { "epoch": 0.3474623023170283, "grad_norm": 1.2780267144928454, "learning_rate": 7.58192548664574e-06, "loss": 0.7358, "step": 11337 }, { "epoch": 0.3474929508397695, "grad_norm": 1.2496920665767555, "learning_rate": 7.581500448926168e-06, "loss": 0.7968, "step": 11338 }, { "epoch": 0.3475235993625107, "grad_norm": 1.359947601718557, "learning_rate": 7.58107538577041e-06, "loss": 0.619, "step": 11339 }, { "epoch": 0.3475542478852519, "grad_norm": 1.2820064772144084, "learning_rate": 7.580650297182658e-06, "loss": 0.6955, "step": 11340 }, { "epoch": 0.3475848964079931, "grad_norm": 1.118562196143488, "learning_rate": 7.580225183167098e-06, "loss": 0.6275, "step": 11341 }, { "epoch": 0.3476155449307343, "grad_norm": 1.405959536889638, "learning_rate": 7.579800043727917e-06, "loss": 0.6346, "step": 11342 }, { "epoch": 0.3476461934534755, "grad_norm": 1.354192056998777, "learning_rate": 7.579374878869308e-06, "loss": 0.6892, "step": 11343 }, { "epoch": 0.34767684197621673, "grad_norm": 1.3153240629661036, "learning_rate": 7.5789496885954585e-06, "loss": 0.7916, "step": 11344 }, { "epoch": 0.34770749049895794, "grad_norm": 0.6415356193930283, "learning_rate": 7.578524472910558e-06, "loss": 0.4256, "step": 11345 }, { "epoch": 0.34773813902169914, "grad_norm": 1.2797031929325424, "learning_rate": 7.578099231818794e-06, "loss": 0.729, "step": 11346 }, { "epoch": 0.34776878754444035, "grad_norm": 0.5030224124638473, "learning_rate": 7.57767396532436e-06, "loss": 0.4404, "step": 11347 }, { "epoch": 0.34779943606718156, "grad_norm": 1.145420097436751, "learning_rate": 7.5772486734314455e-06, "loss": 0.7041, "step": 11348 }, { "epoch": 0.34783008458992276, "grad_norm": 1.1762118900583844, "learning_rate": 7.576823356144241e-06, "loss": 0.7559, "step": 11349 }, { "epoch": 0.34786073311266397, "grad_norm": 0.4543234262999969, "learning_rate": 7.576398013466935e-06, "loss": 0.4205, "step": 11350 }, { "epoch": 0.3478913816354052, "grad_norm": 1.4083001040683514, "learning_rate": 7.575972645403722e-06, "loss": 0.7986, "step": 11351 }, { "epoch": 0.3479220301581464, "grad_norm": 1.2394647099639593, "learning_rate": 7.575547251958788e-06, "loss": 0.6714, "step": 11352 }, { "epoch": 0.3479526786808876, "grad_norm": 0.4930861156824386, "learning_rate": 7.57512183313633e-06, "loss": 0.4281, "step": 11353 }, { "epoch": 0.3479833272036288, "grad_norm": 1.1333065197215455, "learning_rate": 7.5746963889405374e-06, "loss": 0.6865, "step": 11354 }, { "epoch": 0.34801397572637, "grad_norm": 1.1470746259618998, "learning_rate": 7.574270919375604e-06, "loss": 0.6263, "step": 11355 }, { "epoch": 0.3480446242491112, "grad_norm": 1.3147892392831808, "learning_rate": 7.57384542444572e-06, "loss": 0.6009, "step": 11356 }, { "epoch": 0.3480752727718524, "grad_norm": 1.1943467714357885, "learning_rate": 7.573419904155077e-06, "loss": 0.7465, "step": 11357 }, { "epoch": 0.3481059212945936, "grad_norm": 1.282382077870483, "learning_rate": 7.57299435850787e-06, "loss": 0.7277, "step": 11358 }, { "epoch": 0.3481365698173348, "grad_norm": 1.3729546985673564, "learning_rate": 7.572568787508292e-06, "loss": 0.6713, "step": 11359 }, { "epoch": 0.348167218340076, "grad_norm": 0.5120207809733651, "learning_rate": 7.572143191160535e-06, "loss": 0.4278, "step": 11360 }, { "epoch": 0.34819786686281723, "grad_norm": 1.3266412123210523, "learning_rate": 7.5717175694687925e-06, "loss": 0.6855, "step": 11361 }, { "epoch": 0.34822851538555843, "grad_norm": 1.3400256907824193, "learning_rate": 7.57129192243726e-06, "loss": 0.7339, "step": 11362 }, { "epoch": 0.34825916390829964, "grad_norm": 1.2262672084243473, "learning_rate": 7.57086625007013e-06, "loss": 0.6712, "step": 11363 }, { "epoch": 0.34828981243104085, "grad_norm": 1.1852734241332372, "learning_rate": 7.570440552371596e-06, "loss": 0.6757, "step": 11364 }, { "epoch": 0.34832046095378205, "grad_norm": 1.422868767159406, "learning_rate": 7.570014829345854e-06, "loss": 0.6787, "step": 11365 }, { "epoch": 0.34835110947652326, "grad_norm": 1.365786140096565, "learning_rate": 7.5695890809971e-06, "loss": 0.7335, "step": 11366 }, { "epoch": 0.34838175799926446, "grad_norm": 1.4690962590272758, "learning_rate": 7.569163307329526e-06, "loss": 0.8259, "step": 11367 }, { "epoch": 0.3484124065220056, "grad_norm": 1.223131589616119, "learning_rate": 7.568737508347327e-06, "loss": 0.642, "step": 11368 }, { "epoch": 0.3484430550447468, "grad_norm": 0.4550759311823341, "learning_rate": 7.568311684054701e-06, "loss": 0.4358, "step": 11369 }, { "epoch": 0.348473703567488, "grad_norm": 1.2395148792294466, "learning_rate": 7.5678858344558436e-06, "loss": 0.6978, "step": 11370 }, { "epoch": 0.34850435209022923, "grad_norm": 1.223743286824891, "learning_rate": 7.567459959554951e-06, "loss": 0.6425, "step": 11371 }, { "epoch": 0.34853500061297044, "grad_norm": 0.4798548286937914, "learning_rate": 7.567034059356216e-06, "loss": 0.4328, "step": 11372 }, { "epoch": 0.34856564913571164, "grad_norm": 1.1675393910351062, "learning_rate": 7.56660813386384e-06, "loss": 0.6562, "step": 11373 }, { "epoch": 0.34859629765845285, "grad_norm": 0.47232093630219857, "learning_rate": 7.566182183082016e-06, "loss": 0.4291, "step": 11374 }, { "epoch": 0.34862694618119405, "grad_norm": 1.3758864657995988, "learning_rate": 7.565756207014942e-06, "loss": 0.7164, "step": 11375 }, { "epoch": 0.34865759470393526, "grad_norm": 1.247304863261131, "learning_rate": 7.565330205666817e-06, "loss": 0.7135, "step": 11376 }, { "epoch": 0.34868824322667646, "grad_norm": 1.2807232084750217, "learning_rate": 7.564904179041837e-06, "loss": 0.6214, "step": 11377 }, { "epoch": 0.34871889174941767, "grad_norm": 1.335267476809676, "learning_rate": 7.564478127144199e-06, "loss": 0.7569, "step": 11378 }, { "epoch": 0.3487495402721589, "grad_norm": 1.313214026589788, "learning_rate": 7.564052049978102e-06, "loss": 0.6813, "step": 11379 }, { "epoch": 0.3487801887949001, "grad_norm": 1.4244230749606943, "learning_rate": 7.563625947547743e-06, "loss": 0.6899, "step": 11380 }, { "epoch": 0.3488108373176413, "grad_norm": 1.2365471126541412, "learning_rate": 7.5631998198573245e-06, "loss": 0.7126, "step": 11381 }, { "epoch": 0.3488414858403825, "grad_norm": 2.0157388501347837, "learning_rate": 7.5627736669110405e-06, "loss": 0.6327, "step": 11382 }, { "epoch": 0.3488721343631237, "grad_norm": 0.5210372505130022, "learning_rate": 7.562347488713091e-06, "loss": 0.459, "step": 11383 }, { "epoch": 0.3489027828858649, "grad_norm": 1.3187677287735573, "learning_rate": 7.561921285267677e-06, "loss": 0.6824, "step": 11384 }, { "epoch": 0.3489334314086061, "grad_norm": 1.1607117586208138, "learning_rate": 7.561495056578998e-06, "loss": 0.6736, "step": 11385 }, { "epoch": 0.3489640799313473, "grad_norm": 0.45622862862586483, "learning_rate": 7.561068802651253e-06, "loss": 0.428, "step": 11386 }, { "epoch": 0.3489947284540885, "grad_norm": 1.2727818471952674, "learning_rate": 7.560642523488639e-06, "loss": 0.7216, "step": 11387 }, { "epoch": 0.3490253769768297, "grad_norm": 1.2242160248519633, "learning_rate": 7.560216219095362e-06, "loss": 0.7222, "step": 11388 }, { "epoch": 0.34905602549957093, "grad_norm": 1.2669687002328545, "learning_rate": 7.5597898894756176e-06, "loss": 0.6018, "step": 11389 }, { "epoch": 0.34908667402231214, "grad_norm": 1.194382082561661, "learning_rate": 7.55936353463361e-06, "loss": 0.634, "step": 11390 }, { "epoch": 0.34911732254505334, "grad_norm": 1.368597001091596, "learning_rate": 7.558937154573538e-06, "loss": 0.67, "step": 11391 }, { "epoch": 0.34914797106779455, "grad_norm": 1.1749525202314843, "learning_rate": 7.558510749299604e-06, "loss": 0.6613, "step": 11392 }, { "epoch": 0.34917861959053575, "grad_norm": 1.2056952883138192, "learning_rate": 7.5580843188160084e-06, "loss": 0.6803, "step": 11393 }, { "epoch": 0.34920926811327696, "grad_norm": 1.3600679194021326, "learning_rate": 7.557657863126954e-06, "loss": 0.6981, "step": 11394 }, { "epoch": 0.34923991663601817, "grad_norm": 1.1598193768062808, "learning_rate": 7.557231382236641e-06, "loss": 0.6464, "step": 11395 }, { "epoch": 0.34927056515875937, "grad_norm": 1.1823773191547182, "learning_rate": 7.556804876149273e-06, "loss": 0.6863, "step": 11396 }, { "epoch": 0.3493012136815006, "grad_norm": 1.2178744234318233, "learning_rate": 7.556378344869053e-06, "loss": 0.7536, "step": 11397 }, { "epoch": 0.3493318622042418, "grad_norm": 1.2657681566959402, "learning_rate": 7.5559517884001834e-06, "loss": 0.7545, "step": 11398 }, { "epoch": 0.34936251072698293, "grad_norm": 1.2770642533534755, "learning_rate": 7.555525206746868e-06, "loss": 0.6905, "step": 11399 }, { "epoch": 0.34939315924972414, "grad_norm": 1.2605058852506152, "learning_rate": 7.555098599913306e-06, "loss": 0.6423, "step": 11400 }, { "epoch": 0.34942380777246534, "grad_norm": 1.6902294646635718, "learning_rate": 7.5546719679037066e-06, "loss": 0.7034, "step": 11401 }, { "epoch": 0.34945445629520655, "grad_norm": 1.2881171205763247, "learning_rate": 7.554245310722268e-06, "loss": 0.6955, "step": 11402 }, { "epoch": 0.34948510481794776, "grad_norm": 1.2177252377083816, "learning_rate": 7.553818628373198e-06, "loss": 0.6915, "step": 11403 }, { "epoch": 0.34951575334068896, "grad_norm": 1.4189129346430702, "learning_rate": 7.5533919208607e-06, "loss": 0.4683, "step": 11404 }, { "epoch": 0.34954640186343017, "grad_norm": 1.1896585585521033, "learning_rate": 7.552965188188979e-06, "loss": 0.7101, "step": 11405 }, { "epoch": 0.3495770503861714, "grad_norm": 1.3338767116050017, "learning_rate": 7.552538430362236e-06, "loss": 0.7232, "step": 11406 }, { "epoch": 0.3496076989089126, "grad_norm": 0.5715724136551872, "learning_rate": 7.552111647384681e-06, "loss": 0.4237, "step": 11407 }, { "epoch": 0.3496383474316538, "grad_norm": 1.3536327924392946, "learning_rate": 7.551684839260516e-06, "loss": 0.6821, "step": 11408 }, { "epoch": 0.349668995954395, "grad_norm": 1.1901650087862208, "learning_rate": 7.5512580059939475e-06, "loss": 0.6134, "step": 11409 }, { "epoch": 0.3496996444771362, "grad_norm": 1.1481104082088391, "learning_rate": 7.550831147589182e-06, "loss": 0.6158, "step": 11410 }, { "epoch": 0.3497302929998774, "grad_norm": 1.287966678478144, "learning_rate": 7.550404264050423e-06, "loss": 0.6645, "step": 11411 }, { "epoch": 0.3497609415226186, "grad_norm": 0.4514743834572055, "learning_rate": 7.549977355381879e-06, "loss": 0.4243, "step": 11412 }, { "epoch": 0.3497915900453598, "grad_norm": 1.2288648082786324, "learning_rate": 7.549550421587757e-06, "loss": 0.6649, "step": 11413 }, { "epoch": 0.349822238568101, "grad_norm": 1.3658792054328346, "learning_rate": 7.549123462672261e-06, "loss": 0.6823, "step": 11414 }, { "epoch": 0.3498528870908422, "grad_norm": 1.1811840289614934, "learning_rate": 7.548696478639599e-06, "loss": 0.6237, "step": 11415 }, { "epoch": 0.34988353561358343, "grad_norm": 1.1776998041162878, "learning_rate": 7.548269469493978e-06, "loss": 0.6654, "step": 11416 }, { "epoch": 0.34991418413632464, "grad_norm": 1.1868775427937908, "learning_rate": 7.547842435239608e-06, "loss": 0.6513, "step": 11417 }, { "epoch": 0.34994483265906584, "grad_norm": 1.265440640122502, "learning_rate": 7.5474153758806935e-06, "loss": 0.7116, "step": 11418 }, { "epoch": 0.34997548118180705, "grad_norm": 1.2650293064165377, "learning_rate": 7.546988291421442e-06, "loss": 0.6313, "step": 11419 }, { "epoch": 0.35000612970454825, "grad_norm": 1.2306922775265234, "learning_rate": 7.546561181866065e-06, "loss": 0.7546, "step": 11420 }, { "epoch": 0.35003677822728946, "grad_norm": 1.3662304573024955, "learning_rate": 7.546134047218769e-06, "loss": 0.5837, "step": 11421 }, { "epoch": 0.35006742675003066, "grad_norm": 0.6024969751462834, "learning_rate": 7.545706887483764e-06, "loss": 0.4444, "step": 11422 }, { "epoch": 0.35009807527277187, "grad_norm": 1.1114739472536028, "learning_rate": 7.5452797026652544e-06, "loss": 0.7029, "step": 11423 }, { "epoch": 0.3501287237955131, "grad_norm": 1.2530279689206616, "learning_rate": 7.544852492767455e-06, "loss": 0.7449, "step": 11424 }, { "epoch": 0.3501593723182543, "grad_norm": 0.46824148527852055, "learning_rate": 7.544425257794572e-06, "loss": 0.4354, "step": 11425 }, { "epoch": 0.3501900208409955, "grad_norm": 1.2435443015841872, "learning_rate": 7.543997997750816e-06, "loss": 0.6912, "step": 11426 }, { "epoch": 0.3502206693637367, "grad_norm": 1.3289508532288952, "learning_rate": 7.543570712640398e-06, "loss": 0.744, "step": 11427 }, { "epoch": 0.3502513178864779, "grad_norm": 1.4270238589721456, "learning_rate": 7.543143402467525e-06, "loss": 0.8181, "step": 11428 }, { "epoch": 0.3502819664092191, "grad_norm": 1.0990789959252252, "learning_rate": 7.542716067236411e-06, "loss": 0.6756, "step": 11429 }, { "epoch": 0.35031261493196025, "grad_norm": 1.3326587706597626, "learning_rate": 7.542288706951264e-06, "loss": 0.7414, "step": 11430 }, { "epoch": 0.35034326345470146, "grad_norm": 1.2604230534400445, "learning_rate": 7.541861321616297e-06, "loss": 0.6409, "step": 11431 }, { "epoch": 0.35037391197744266, "grad_norm": 1.1053653147022104, "learning_rate": 7.541433911235719e-06, "loss": 0.6376, "step": 11432 }, { "epoch": 0.35040456050018387, "grad_norm": 1.2702842885557144, "learning_rate": 7.541006475813744e-06, "loss": 0.7559, "step": 11433 }, { "epoch": 0.3504352090229251, "grad_norm": 1.311755384482577, "learning_rate": 7.540579015354579e-06, "loss": 0.686, "step": 11434 }, { "epoch": 0.3504658575456663, "grad_norm": 1.2158716703266212, "learning_rate": 7.540151529862441e-06, "loss": 0.7111, "step": 11435 }, { "epoch": 0.3504965060684075, "grad_norm": 1.2328352942097094, "learning_rate": 7.5397240193415415e-06, "loss": 0.6957, "step": 11436 }, { "epoch": 0.3505271545911487, "grad_norm": 1.179680312952539, "learning_rate": 7.539296483796088e-06, "loss": 0.6679, "step": 11437 }, { "epoch": 0.3505578031138899, "grad_norm": 1.370941630200097, "learning_rate": 7.538868923230298e-06, "loss": 0.6077, "step": 11438 }, { "epoch": 0.3505884516366311, "grad_norm": 1.457670102780447, "learning_rate": 7.538441337648383e-06, "loss": 0.7543, "step": 11439 }, { "epoch": 0.3506191001593723, "grad_norm": 0.5515845293963604, "learning_rate": 7.538013727054557e-06, "loss": 0.4156, "step": 11440 }, { "epoch": 0.3506497486821135, "grad_norm": 1.2482332595479066, "learning_rate": 7.537586091453031e-06, "loss": 0.6346, "step": 11441 }, { "epoch": 0.3506803972048547, "grad_norm": 1.3256155662568767, "learning_rate": 7.537158430848022e-06, "loss": 0.6129, "step": 11442 }, { "epoch": 0.3507110457275959, "grad_norm": 1.1668345219932053, "learning_rate": 7.536730745243739e-06, "loss": 0.6542, "step": 11443 }, { "epoch": 0.35074169425033713, "grad_norm": 1.2371363608874901, "learning_rate": 7.5363030346444014e-06, "loss": 0.6501, "step": 11444 }, { "epoch": 0.35077234277307834, "grad_norm": 1.208824165288298, "learning_rate": 7.535875299054218e-06, "loss": 0.7089, "step": 11445 }, { "epoch": 0.35080299129581954, "grad_norm": 1.20497948118013, "learning_rate": 7.535447538477409e-06, "loss": 0.5937, "step": 11446 }, { "epoch": 0.35083363981856075, "grad_norm": 1.2184298054930693, "learning_rate": 7.535019752918186e-06, "loss": 0.6156, "step": 11447 }, { "epoch": 0.35086428834130196, "grad_norm": 1.316443898281678, "learning_rate": 7.5345919423807655e-06, "loss": 0.7871, "step": 11448 }, { "epoch": 0.35089493686404316, "grad_norm": 1.3137004191312247, "learning_rate": 7.534164106869361e-06, "loss": 0.6997, "step": 11449 }, { "epoch": 0.35092558538678437, "grad_norm": 1.1856416625255903, "learning_rate": 7.53373624638819e-06, "loss": 0.7206, "step": 11450 }, { "epoch": 0.3509562339095256, "grad_norm": 1.1411404122023368, "learning_rate": 7.533308360941466e-06, "loss": 0.6035, "step": 11451 }, { "epoch": 0.3509868824322668, "grad_norm": 0.5337015674451749, "learning_rate": 7.532880450533408e-06, "loss": 0.4204, "step": 11452 }, { "epoch": 0.351017530955008, "grad_norm": 1.3444674662382525, "learning_rate": 7.53245251516823e-06, "loss": 0.6834, "step": 11453 }, { "epoch": 0.3510481794777492, "grad_norm": 1.2291159925035697, "learning_rate": 7.53202455485015e-06, "loss": 0.6783, "step": 11454 }, { "epoch": 0.3510788280004904, "grad_norm": 1.1833006643288357, "learning_rate": 7.531596569583385e-06, "loss": 0.6519, "step": 11455 }, { "epoch": 0.3511094765232316, "grad_norm": 1.1206891191951907, "learning_rate": 7.53116855937215e-06, "loss": 0.6944, "step": 11456 }, { "epoch": 0.3511401250459728, "grad_norm": 1.4003685503795076, "learning_rate": 7.530740524220663e-06, "loss": 0.7233, "step": 11457 }, { "epoch": 0.351170773568714, "grad_norm": 1.278593325587105, "learning_rate": 7.530312464133143e-06, "loss": 0.6852, "step": 11458 }, { "epoch": 0.3512014220914552, "grad_norm": 1.5103083733283968, "learning_rate": 7.529884379113808e-06, "loss": 0.7428, "step": 11459 }, { "epoch": 0.3512320706141964, "grad_norm": 0.48188308258398865, "learning_rate": 7.529456269166872e-06, "loss": 0.4293, "step": 11460 }, { "epoch": 0.3512627191369376, "grad_norm": 1.2044755403476133, "learning_rate": 7.529028134296559e-06, "loss": 0.7045, "step": 11461 }, { "epoch": 0.3512933676596788, "grad_norm": 1.3453040926420583, "learning_rate": 7.5285999745070825e-06, "loss": 0.5941, "step": 11462 }, { "epoch": 0.35132401618242, "grad_norm": 1.356141457297119, "learning_rate": 7.528171789802665e-06, "loss": 0.6646, "step": 11463 }, { "epoch": 0.3513546647051612, "grad_norm": 1.3029996414875509, "learning_rate": 7.527743580187525e-06, "loss": 0.639, "step": 11464 }, { "epoch": 0.3513853132279024, "grad_norm": 1.3973608600341616, "learning_rate": 7.527315345665879e-06, "loss": 0.6841, "step": 11465 }, { "epoch": 0.3514159617506436, "grad_norm": 1.206315340441087, "learning_rate": 7.526887086241947e-06, "loss": 0.6194, "step": 11466 }, { "epoch": 0.3514466102733848, "grad_norm": 0.45161087081945084, "learning_rate": 7.526458801919954e-06, "loss": 0.4306, "step": 11467 }, { "epoch": 0.351477258796126, "grad_norm": 1.3664910935619952, "learning_rate": 7.526030492704113e-06, "loss": 0.7076, "step": 11468 }, { "epoch": 0.3515079073188672, "grad_norm": 1.1893586712232094, "learning_rate": 7.525602158598647e-06, "loss": 0.6081, "step": 11469 }, { "epoch": 0.3515385558416084, "grad_norm": 1.4415023561206846, "learning_rate": 7.525173799607778e-06, "loss": 0.7145, "step": 11470 }, { "epoch": 0.35156920436434963, "grad_norm": 0.4487529108593054, "learning_rate": 7.5247454157357254e-06, "loss": 0.4388, "step": 11471 }, { "epoch": 0.35159985288709084, "grad_norm": 1.3662905195947401, "learning_rate": 7.52431700698671e-06, "loss": 0.6903, "step": 11472 }, { "epoch": 0.35163050140983204, "grad_norm": 1.470891167050762, "learning_rate": 7.523888573364953e-06, "loss": 0.8383, "step": 11473 }, { "epoch": 0.35166114993257325, "grad_norm": 1.1980962539871718, "learning_rate": 7.523460114874677e-06, "loss": 0.6863, "step": 11474 }, { "epoch": 0.35169179845531445, "grad_norm": 1.54717776875019, "learning_rate": 7.523031631520101e-06, "loss": 0.6515, "step": 11475 }, { "epoch": 0.35172244697805566, "grad_norm": 1.178674802548555, "learning_rate": 7.52260312330545e-06, "loss": 0.6305, "step": 11476 }, { "epoch": 0.35175309550079686, "grad_norm": 1.3253255244722337, "learning_rate": 7.522174590234945e-06, "loss": 0.643, "step": 11477 }, { "epoch": 0.35178374402353807, "grad_norm": 1.285259320377778, "learning_rate": 7.521746032312809e-06, "loss": 0.7647, "step": 11478 }, { "epoch": 0.3518143925462793, "grad_norm": 1.2233618985608805, "learning_rate": 7.521317449543263e-06, "loss": 0.6819, "step": 11479 }, { "epoch": 0.3518450410690205, "grad_norm": 1.3529657102898955, "learning_rate": 7.520888841930532e-06, "loss": 0.7043, "step": 11480 }, { "epoch": 0.3518756895917617, "grad_norm": 1.0744344174247205, "learning_rate": 7.520460209478837e-06, "loss": 0.6352, "step": 11481 }, { "epoch": 0.3519063381145029, "grad_norm": 1.1810004234108626, "learning_rate": 7.520031552192404e-06, "loss": 0.6716, "step": 11482 }, { "epoch": 0.3519369866372441, "grad_norm": 1.3490149968661604, "learning_rate": 7.519602870075455e-06, "loss": 0.6492, "step": 11483 }, { "epoch": 0.3519676351599853, "grad_norm": 2.051050118830089, "learning_rate": 7.519174163132212e-06, "loss": 0.7155, "step": 11484 }, { "epoch": 0.3519982836827265, "grad_norm": 1.2485689071685697, "learning_rate": 7.518745431366905e-06, "loss": 0.5879, "step": 11485 }, { "epoch": 0.3520289322054677, "grad_norm": 1.298207540970653, "learning_rate": 7.5183166747837534e-06, "loss": 0.5861, "step": 11486 }, { "epoch": 0.3520595807282089, "grad_norm": 1.259004175585053, "learning_rate": 7.517887893386984e-06, "loss": 0.6058, "step": 11487 }, { "epoch": 0.3520902292509501, "grad_norm": 1.1635814319262534, "learning_rate": 7.517459087180819e-06, "loss": 0.5832, "step": 11488 }, { "epoch": 0.35212087777369133, "grad_norm": 1.1399230205989233, "learning_rate": 7.517030256169486e-06, "loss": 0.5942, "step": 11489 }, { "epoch": 0.35215152629643254, "grad_norm": 1.2342165928124693, "learning_rate": 7.516601400357211e-06, "loss": 0.696, "step": 11490 }, { "epoch": 0.35218217481917374, "grad_norm": 1.18982535537652, "learning_rate": 7.516172519748218e-06, "loss": 0.6029, "step": 11491 }, { "epoch": 0.3522128233419149, "grad_norm": 1.2364999611999123, "learning_rate": 7.515743614346734e-06, "loss": 0.698, "step": 11492 }, { "epoch": 0.3522434718646561, "grad_norm": 1.4906606108662919, "learning_rate": 7.5153146841569825e-06, "loss": 0.659, "step": 11493 }, { "epoch": 0.3522741203873973, "grad_norm": 0.47432207290723094, "learning_rate": 7.514885729183195e-06, "loss": 0.437, "step": 11494 }, { "epoch": 0.3523047689101385, "grad_norm": 1.290007778193982, "learning_rate": 7.514456749429592e-06, "loss": 0.7491, "step": 11495 }, { "epoch": 0.3523354174328797, "grad_norm": 1.2234606271219555, "learning_rate": 7.514027744900404e-06, "loss": 0.6154, "step": 11496 }, { "epoch": 0.3523660659556209, "grad_norm": 1.1983827797646815, "learning_rate": 7.513598715599857e-06, "loss": 0.6199, "step": 11497 }, { "epoch": 0.3523967144783621, "grad_norm": 1.2630455897290345, "learning_rate": 7.51316966153218e-06, "loss": 0.5868, "step": 11498 }, { "epoch": 0.35242736300110333, "grad_norm": 1.270086662938986, "learning_rate": 7.512740582701598e-06, "loss": 0.7022, "step": 11499 }, { "epoch": 0.35245801152384454, "grad_norm": 1.3090892088602797, "learning_rate": 7.512311479112342e-06, "loss": 0.7005, "step": 11500 }, { "epoch": 0.35248866004658574, "grad_norm": 0.4984675362302929, "learning_rate": 7.511882350768636e-06, "loss": 0.4492, "step": 11501 }, { "epoch": 0.35251930856932695, "grad_norm": 1.168167954469697, "learning_rate": 7.511453197674714e-06, "loss": 0.6565, "step": 11502 }, { "epoch": 0.35254995709206816, "grad_norm": 0.4838884052649955, "learning_rate": 7.511024019834798e-06, "loss": 0.4188, "step": 11503 }, { "epoch": 0.35258060561480936, "grad_norm": 0.43200897580268854, "learning_rate": 7.51059481725312e-06, "loss": 0.4053, "step": 11504 }, { "epoch": 0.35261125413755057, "grad_norm": 1.5754405544835628, "learning_rate": 7.510165589933911e-06, "loss": 0.6744, "step": 11505 }, { "epoch": 0.3526419026602918, "grad_norm": 1.1993727736768764, "learning_rate": 7.509736337881395e-06, "loss": 0.6533, "step": 11506 }, { "epoch": 0.352672551183033, "grad_norm": 0.4490366593408077, "learning_rate": 7.509307061099806e-06, "loss": 0.4321, "step": 11507 }, { "epoch": 0.3527031997057742, "grad_norm": 0.4301025446011543, "learning_rate": 7.508877759593373e-06, "loss": 0.4044, "step": 11508 }, { "epoch": 0.3527338482285154, "grad_norm": 1.2547815582212754, "learning_rate": 7.508448433366326e-06, "loss": 0.5488, "step": 11509 }, { "epoch": 0.3527644967512566, "grad_norm": 1.263712584624072, "learning_rate": 7.508019082422893e-06, "loss": 0.6691, "step": 11510 }, { "epoch": 0.3527951452739978, "grad_norm": 1.2130863750844014, "learning_rate": 7.507589706767309e-06, "loss": 0.6971, "step": 11511 }, { "epoch": 0.352825793796739, "grad_norm": 1.117259302060333, "learning_rate": 7.5071603064038e-06, "loss": 0.7368, "step": 11512 }, { "epoch": 0.3528564423194802, "grad_norm": 1.1894738023269962, "learning_rate": 7.5067308813366e-06, "loss": 0.7363, "step": 11513 }, { "epoch": 0.3528870908422214, "grad_norm": 1.530705329161863, "learning_rate": 7.506301431569938e-06, "loss": 0.7305, "step": 11514 }, { "epoch": 0.3529177393649626, "grad_norm": 1.4142494050609256, "learning_rate": 7.505871957108049e-06, "loss": 0.7003, "step": 11515 }, { "epoch": 0.35294838788770383, "grad_norm": 1.219724313911098, "learning_rate": 7.505442457955159e-06, "loss": 0.6523, "step": 11516 }, { "epoch": 0.35297903641044504, "grad_norm": 0.5029896107515651, "learning_rate": 7.505012934115506e-06, "loss": 0.4136, "step": 11517 }, { "epoch": 0.35300968493318624, "grad_norm": 1.3405894314295772, "learning_rate": 7.504583385593318e-06, "loss": 0.655, "step": 11518 }, { "epoch": 0.35304033345592745, "grad_norm": 1.0795539962690015, "learning_rate": 7.504153812392832e-06, "loss": 0.6375, "step": 11519 }, { "epoch": 0.35307098197866865, "grad_norm": 1.3533339211304132, "learning_rate": 7.503724214518275e-06, "loss": 0.6729, "step": 11520 }, { "epoch": 0.35310163050140986, "grad_norm": 1.3012948570370255, "learning_rate": 7.503294591973883e-06, "loss": 0.5884, "step": 11521 }, { "epoch": 0.35313227902415106, "grad_norm": 0.48194071746877926, "learning_rate": 7.50286494476389e-06, "loss": 0.4535, "step": 11522 }, { "epoch": 0.3531629275468922, "grad_norm": 1.1518174123517606, "learning_rate": 7.502435272892527e-06, "loss": 0.6533, "step": 11523 }, { "epoch": 0.3531935760696334, "grad_norm": 1.4093181671650246, "learning_rate": 7.50200557636403e-06, "loss": 0.7384, "step": 11524 }, { "epoch": 0.3532242245923746, "grad_norm": 1.4931214291206982, "learning_rate": 7.501575855182631e-06, "loss": 0.6606, "step": 11525 }, { "epoch": 0.35325487311511583, "grad_norm": 1.153152467873263, "learning_rate": 7.501146109352566e-06, "loss": 0.5934, "step": 11526 }, { "epoch": 0.35328552163785704, "grad_norm": 0.4968900252179282, "learning_rate": 7.500716338878068e-06, "loss": 0.4344, "step": 11527 }, { "epoch": 0.35331617016059824, "grad_norm": 1.2593150766934291, "learning_rate": 7.500286543763372e-06, "loss": 0.6786, "step": 11528 }, { "epoch": 0.35334681868333945, "grad_norm": 1.1590564542620214, "learning_rate": 7.499856724012713e-06, "loss": 0.6629, "step": 11529 }, { "epoch": 0.35337746720608065, "grad_norm": 1.3771038916599194, "learning_rate": 7.4994268796303275e-06, "loss": 0.7096, "step": 11530 }, { "epoch": 0.35340811572882186, "grad_norm": 1.2684111153074782, "learning_rate": 7.498997010620447e-06, "loss": 0.6628, "step": 11531 }, { "epoch": 0.35343876425156306, "grad_norm": 1.3404917490479353, "learning_rate": 7.498567116987311e-06, "loss": 0.6729, "step": 11532 }, { "epoch": 0.35346941277430427, "grad_norm": 1.4991888245172187, "learning_rate": 7.498137198735153e-06, "loss": 0.7744, "step": 11533 }, { "epoch": 0.3535000612970455, "grad_norm": 1.2155262869384902, "learning_rate": 7.4977072558682104e-06, "loss": 0.6028, "step": 11534 }, { "epoch": 0.3535307098197867, "grad_norm": 1.3222494658732087, "learning_rate": 7.497277288390719e-06, "loss": 0.7379, "step": 11535 }, { "epoch": 0.3535613583425279, "grad_norm": 1.2445036017983355, "learning_rate": 7.496847296306917e-06, "loss": 0.6627, "step": 11536 }, { "epoch": 0.3535920068652691, "grad_norm": 1.2343251796882937, "learning_rate": 7.496417279621039e-06, "loss": 0.7346, "step": 11537 }, { "epoch": 0.3536226553880103, "grad_norm": 1.2661925976231314, "learning_rate": 7.495987238337321e-06, "loss": 0.7364, "step": 11538 }, { "epoch": 0.3536533039107515, "grad_norm": 1.2773410497677669, "learning_rate": 7.495557172460004e-06, "loss": 0.6484, "step": 11539 }, { "epoch": 0.3536839524334927, "grad_norm": 1.3362290873271419, "learning_rate": 7.495127081993323e-06, "loss": 0.6216, "step": 11540 }, { "epoch": 0.3537146009562339, "grad_norm": 1.256434104570403, "learning_rate": 7.494696966941517e-06, "loss": 0.673, "step": 11541 }, { "epoch": 0.3537452494789751, "grad_norm": 1.1710058388343976, "learning_rate": 7.494266827308823e-06, "loss": 0.6671, "step": 11542 }, { "epoch": 0.3537758980017163, "grad_norm": 1.302913021012463, "learning_rate": 7.4938366630994805e-06, "loss": 0.6912, "step": 11543 }, { "epoch": 0.35380654652445753, "grad_norm": 1.4707815377436226, "learning_rate": 7.493406474317726e-06, "loss": 0.6671, "step": 11544 }, { "epoch": 0.35383719504719874, "grad_norm": 1.2920082765367507, "learning_rate": 7.492976260967801e-06, "loss": 0.6642, "step": 11545 }, { "epoch": 0.35386784356993994, "grad_norm": 1.3030466228631465, "learning_rate": 7.492546023053941e-06, "loss": 0.6308, "step": 11546 }, { "epoch": 0.35389849209268115, "grad_norm": 1.196021571420545, "learning_rate": 7.49211576058039e-06, "loss": 0.5999, "step": 11547 }, { "epoch": 0.35392914061542236, "grad_norm": 1.2598105078234079, "learning_rate": 7.491685473551384e-06, "loss": 0.6876, "step": 11548 }, { "epoch": 0.35395978913816356, "grad_norm": 1.1394186700939122, "learning_rate": 7.4912551619711614e-06, "loss": 0.613, "step": 11549 }, { "epoch": 0.35399043766090477, "grad_norm": 1.3479199488244247, "learning_rate": 7.490824825843965e-06, "loss": 0.6669, "step": 11550 }, { "epoch": 0.35402108618364597, "grad_norm": 1.314355505036008, "learning_rate": 7.490394465174036e-06, "loss": 0.7144, "step": 11551 }, { "epoch": 0.3540517347063872, "grad_norm": 1.1887293600398008, "learning_rate": 7.489964079965613e-06, "loss": 0.6541, "step": 11552 }, { "epoch": 0.3540823832291284, "grad_norm": 0.5324847402293554, "learning_rate": 7.489533670222935e-06, "loss": 0.441, "step": 11553 }, { "epoch": 0.35411303175186953, "grad_norm": 1.4474994279548676, "learning_rate": 7.489103235950246e-06, "loss": 0.6968, "step": 11554 }, { "epoch": 0.35414368027461074, "grad_norm": 1.6396425565818957, "learning_rate": 7.488672777151786e-06, "loss": 0.5707, "step": 11555 }, { "epoch": 0.35417432879735194, "grad_norm": 0.4746670833883039, "learning_rate": 7.488242293831795e-06, "loss": 0.4333, "step": 11556 }, { "epoch": 0.35420497732009315, "grad_norm": 1.2768029704697563, "learning_rate": 7.487811785994518e-06, "loss": 0.6152, "step": 11557 }, { "epoch": 0.35423562584283436, "grad_norm": 0.4507142065879564, "learning_rate": 7.487381253644193e-06, "loss": 0.4227, "step": 11558 }, { "epoch": 0.35426627436557556, "grad_norm": 1.3593717453977612, "learning_rate": 7.486950696785066e-06, "loss": 0.6709, "step": 11559 }, { "epoch": 0.35429692288831677, "grad_norm": 1.4765841606438113, "learning_rate": 7.4865201154213765e-06, "loss": 0.6088, "step": 11560 }, { "epoch": 0.354327571411058, "grad_norm": 1.6037305854927077, "learning_rate": 7.486089509557368e-06, "loss": 0.7806, "step": 11561 }, { "epoch": 0.3543582199337992, "grad_norm": 0.46179802679946635, "learning_rate": 7.485658879197284e-06, "loss": 0.422, "step": 11562 }, { "epoch": 0.3543888684565404, "grad_norm": 0.5024449026864846, "learning_rate": 7.4852282243453665e-06, "loss": 0.4487, "step": 11563 }, { "epoch": 0.3544195169792816, "grad_norm": 1.349163925388261, "learning_rate": 7.4847975450058596e-06, "loss": 0.6818, "step": 11564 }, { "epoch": 0.3544501655020228, "grad_norm": 1.3659287537624547, "learning_rate": 7.4843668411830076e-06, "loss": 0.7195, "step": 11565 }, { "epoch": 0.354480814024764, "grad_norm": 1.5967240629996389, "learning_rate": 7.483936112881052e-06, "loss": 0.7061, "step": 11566 }, { "epoch": 0.3545114625475052, "grad_norm": 1.3264795829905445, "learning_rate": 7.48350536010424e-06, "loss": 0.5779, "step": 11567 }, { "epoch": 0.3545421110702464, "grad_norm": 1.3936579392492348, "learning_rate": 7.483074582856812e-06, "loss": 0.751, "step": 11568 }, { "epoch": 0.3545727595929876, "grad_norm": 1.3291235248266349, "learning_rate": 7.482643781143016e-06, "loss": 0.6364, "step": 11569 }, { "epoch": 0.3546034081157288, "grad_norm": 1.3252155064386546, "learning_rate": 7.482212954967095e-06, "loss": 0.6206, "step": 11570 }, { "epoch": 0.35463405663847003, "grad_norm": 1.238186987697913, "learning_rate": 7.481782104333297e-06, "loss": 0.5771, "step": 11571 }, { "epoch": 0.35466470516121124, "grad_norm": 1.1913109939720645, "learning_rate": 7.481351229245862e-06, "loss": 0.7111, "step": 11572 }, { "epoch": 0.35469535368395244, "grad_norm": 1.1027234906366727, "learning_rate": 7.4809203297090405e-06, "loss": 0.6626, "step": 11573 }, { "epoch": 0.35472600220669365, "grad_norm": 1.1937755283827216, "learning_rate": 7.480489405727075e-06, "loss": 0.6368, "step": 11574 }, { "epoch": 0.35475665072943485, "grad_norm": 1.2773605535020225, "learning_rate": 7.480058457304214e-06, "loss": 0.5972, "step": 11575 }, { "epoch": 0.35478729925217606, "grad_norm": 1.3724951371622107, "learning_rate": 7.479627484444701e-06, "loss": 0.7364, "step": 11576 }, { "epoch": 0.35481794777491726, "grad_norm": 1.1685793899596986, "learning_rate": 7.479196487152784e-06, "loss": 0.6616, "step": 11577 }, { "epoch": 0.35484859629765847, "grad_norm": 0.5732829624106681, "learning_rate": 7.478765465432712e-06, "loss": 0.4536, "step": 11578 }, { "epoch": 0.3548792448203997, "grad_norm": 1.2417846125732983, "learning_rate": 7.478334419288726e-06, "loss": 0.616, "step": 11579 }, { "epoch": 0.3549098933431409, "grad_norm": 1.1678291824511535, "learning_rate": 7.4779033487250796e-06, "loss": 0.5506, "step": 11580 }, { "epoch": 0.3549405418658821, "grad_norm": 1.176871429116174, "learning_rate": 7.477472253746017e-06, "loss": 0.5939, "step": 11581 }, { "epoch": 0.3549711903886233, "grad_norm": 1.2340896414477367, "learning_rate": 7.477041134355787e-06, "loss": 0.634, "step": 11582 }, { "epoch": 0.3550018389113645, "grad_norm": 1.1852800510425254, "learning_rate": 7.476609990558636e-06, "loss": 0.7564, "step": 11583 }, { "epoch": 0.3550324874341057, "grad_norm": 0.44907199338267006, "learning_rate": 7.476178822358813e-06, "loss": 0.4238, "step": 11584 }, { "epoch": 0.35506313595684685, "grad_norm": 1.3233145581382453, "learning_rate": 7.4757476297605654e-06, "loss": 0.7498, "step": 11585 }, { "epoch": 0.35509378447958806, "grad_norm": 1.2004003793845472, "learning_rate": 7.475316412768145e-06, "loss": 0.7022, "step": 11586 }, { "epoch": 0.35512443300232927, "grad_norm": 1.450131543904679, "learning_rate": 7.474885171385797e-06, "loss": 0.7152, "step": 11587 }, { "epoch": 0.35515508152507047, "grad_norm": 0.4933397070038029, "learning_rate": 7.4744539056177735e-06, "loss": 0.4491, "step": 11588 }, { "epoch": 0.3551857300478117, "grad_norm": 1.3200185484318523, "learning_rate": 7.4740226154683215e-06, "loss": 0.6857, "step": 11589 }, { "epoch": 0.3552163785705529, "grad_norm": 1.2735551444620061, "learning_rate": 7.473591300941692e-06, "loss": 0.7556, "step": 11590 }, { "epoch": 0.3552470270932941, "grad_norm": 1.088151445081538, "learning_rate": 7.473159962042136e-06, "loss": 0.5148, "step": 11591 }, { "epoch": 0.3552776756160353, "grad_norm": 1.2491652163799285, "learning_rate": 7.4727285987738995e-06, "loss": 0.6551, "step": 11592 }, { "epoch": 0.3553083241387765, "grad_norm": 1.2420903355973048, "learning_rate": 7.472297211141237e-06, "loss": 0.6369, "step": 11593 }, { "epoch": 0.3553389726615177, "grad_norm": 1.311345703539706, "learning_rate": 7.471865799148398e-06, "loss": 0.621, "step": 11594 }, { "epoch": 0.3553696211842589, "grad_norm": 1.1937157244547563, "learning_rate": 7.471434362799632e-06, "loss": 0.6227, "step": 11595 }, { "epoch": 0.3554002697070001, "grad_norm": 1.0263958189447158, "learning_rate": 7.47100290209919e-06, "loss": 0.6738, "step": 11596 }, { "epoch": 0.3554309182297413, "grad_norm": 0.5100981521462822, "learning_rate": 7.470571417051327e-06, "loss": 0.4452, "step": 11597 }, { "epoch": 0.3554615667524825, "grad_norm": 1.2563868152098536, "learning_rate": 7.470139907660288e-06, "loss": 0.6203, "step": 11598 }, { "epoch": 0.35549221527522373, "grad_norm": 1.2407981669549084, "learning_rate": 7.4697083739303315e-06, "loss": 0.6829, "step": 11599 }, { "epoch": 0.35552286379796494, "grad_norm": 1.431833434397503, "learning_rate": 7.469276815865705e-06, "loss": 0.7108, "step": 11600 }, { "epoch": 0.35555351232070614, "grad_norm": 1.2727601346045456, "learning_rate": 7.468845233470663e-06, "loss": 0.6738, "step": 11601 }, { "epoch": 0.35558416084344735, "grad_norm": 1.2448694310055681, "learning_rate": 7.468413626749457e-06, "loss": 0.5932, "step": 11602 }, { "epoch": 0.35561480936618856, "grad_norm": 1.5586063915851385, "learning_rate": 7.467981995706339e-06, "loss": 0.6412, "step": 11603 }, { "epoch": 0.35564545788892976, "grad_norm": 1.2620313597023596, "learning_rate": 7.467550340345564e-06, "loss": 0.6302, "step": 11604 }, { "epoch": 0.35567610641167097, "grad_norm": 1.313196731532388, "learning_rate": 7.467118660671383e-06, "loss": 0.6622, "step": 11605 }, { "epoch": 0.3557067549344122, "grad_norm": 0.4685145551667825, "learning_rate": 7.466686956688053e-06, "loss": 0.4252, "step": 11606 }, { "epoch": 0.3557374034571534, "grad_norm": 0.4942325245697444, "learning_rate": 7.466255228399824e-06, "loss": 0.4305, "step": 11607 }, { "epoch": 0.3557680519798946, "grad_norm": 1.2677234129730577, "learning_rate": 7.4658234758109505e-06, "loss": 0.6541, "step": 11608 }, { "epoch": 0.3557987005026358, "grad_norm": 1.125019864520637, "learning_rate": 7.465391698925688e-06, "loss": 0.6526, "step": 11609 }, { "epoch": 0.355829349025377, "grad_norm": 1.0515542807481106, "learning_rate": 7.4649598977482894e-06, "loss": 0.6881, "step": 11610 }, { "epoch": 0.3558599975481182, "grad_norm": 1.283435697879439, "learning_rate": 7.46452807228301e-06, "loss": 0.6165, "step": 11611 }, { "epoch": 0.3558906460708594, "grad_norm": 0.4700706418810161, "learning_rate": 7.464096222534107e-06, "loss": 0.4305, "step": 11612 }, { "epoch": 0.3559212945936006, "grad_norm": 1.241603016990057, "learning_rate": 7.463664348505832e-06, "loss": 0.6717, "step": 11613 }, { "epoch": 0.3559519431163418, "grad_norm": 1.1949314980375436, "learning_rate": 7.463232450202443e-06, "loss": 0.6592, "step": 11614 }, { "epoch": 0.355982591639083, "grad_norm": 0.48479196711871086, "learning_rate": 7.462800527628193e-06, "loss": 0.4429, "step": 11615 }, { "epoch": 0.3560132401618242, "grad_norm": 1.2976710819188553, "learning_rate": 7.462368580787341e-06, "loss": 0.6836, "step": 11616 }, { "epoch": 0.3560438886845654, "grad_norm": 0.47347804271450994, "learning_rate": 7.46193660968414e-06, "loss": 0.4241, "step": 11617 }, { "epoch": 0.3560745372073066, "grad_norm": 1.2845568559226284, "learning_rate": 7.461504614322848e-06, "loss": 0.6673, "step": 11618 }, { "epoch": 0.3561051857300478, "grad_norm": 1.2967837113497516, "learning_rate": 7.46107259470772e-06, "loss": 0.7342, "step": 11619 }, { "epoch": 0.356135834252789, "grad_norm": 1.127669818825221, "learning_rate": 7.460640550843015e-06, "loss": 0.7023, "step": 11620 }, { "epoch": 0.3561664827755302, "grad_norm": 1.115614735097931, "learning_rate": 7.46020848273299e-06, "loss": 0.6477, "step": 11621 }, { "epoch": 0.3561971312982714, "grad_norm": 1.2348872447400563, "learning_rate": 7.459776390381901e-06, "loss": 0.6454, "step": 11622 }, { "epoch": 0.3562277798210126, "grad_norm": 1.2754641804784976, "learning_rate": 7.459344273794004e-06, "loss": 0.7289, "step": 11623 }, { "epoch": 0.3562584283437538, "grad_norm": 1.3874028933759759, "learning_rate": 7.4589121329735604e-06, "loss": 0.6848, "step": 11624 }, { "epoch": 0.356289076866495, "grad_norm": 1.2545627276885294, "learning_rate": 7.4584799679248275e-06, "loss": 0.6748, "step": 11625 }, { "epoch": 0.35631972538923623, "grad_norm": 1.091417351378809, "learning_rate": 7.45804777865206e-06, "loss": 0.7513, "step": 11626 }, { "epoch": 0.35635037391197744, "grad_norm": 1.7265956082700329, "learning_rate": 7.457615565159521e-06, "loss": 0.7474, "step": 11627 }, { "epoch": 0.35638102243471864, "grad_norm": 1.1967556484631714, "learning_rate": 7.457183327451465e-06, "loss": 0.6111, "step": 11628 }, { "epoch": 0.35641167095745985, "grad_norm": 1.501233885711961, "learning_rate": 7.456751065532153e-06, "loss": 0.7101, "step": 11629 }, { "epoch": 0.35644231948020105, "grad_norm": 0.6619354061762719, "learning_rate": 7.456318779405845e-06, "loss": 0.4308, "step": 11630 }, { "epoch": 0.35647296800294226, "grad_norm": 3.133848578328559, "learning_rate": 7.4558864690768e-06, "loss": 0.6312, "step": 11631 }, { "epoch": 0.35650361652568346, "grad_norm": 1.281076177579442, "learning_rate": 7.455454134549278e-06, "loss": 0.618, "step": 11632 }, { "epoch": 0.35653426504842467, "grad_norm": 0.5211320056644011, "learning_rate": 7.455021775827536e-06, "loss": 0.4285, "step": 11633 }, { "epoch": 0.3565649135711659, "grad_norm": 1.2287129790422955, "learning_rate": 7.454589392915838e-06, "loss": 0.684, "step": 11634 }, { "epoch": 0.3565955620939071, "grad_norm": 1.1860518249749392, "learning_rate": 7.454156985818441e-06, "loss": 0.6873, "step": 11635 }, { "epoch": 0.3566262106166483, "grad_norm": 1.2725889835373747, "learning_rate": 7.453724554539609e-06, "loss": 0.7339, "step": 11636 }, { "epoch": 0.3566568591393895, "grad_norm": 1.0867897139682035, "learning_rate": 7.4532920990836e-06, "loss": 0.547, "step": 11637 }, { "epoch": 0.3566875076621307, "grad_norm": 1.267993421144414, "learning_rate": 7.4528596194546775e-06, "loss": 0.7315, "step": 11638 }, { "epoch": 0.3567181561848719, "grad_norm": 1.259538534923368, "learning_rate": 7.4524271156570996e-06, "loss": 0.6582, "step": 11639 }, { "epoch": 0.3567488047076131, "grad_norm": 1.2786186251616352, "learning_rate": 7.451994587695133e-06, "loss": 0.684, "step": 11640 }, { "epoch": 0.3567794532303543, "grad_norm": 1.1680674486184863, "learning_rate": 7.4515620355730334e-06, "loss": 0.6307, "step": 11641 }, { "epoch": 0.3568101017530955, "grad_norm": 1.2007793170737386, "learning_rate": 7.451129459295066e-06, "loss": 0.6264, "step": 11642 }, { "epoch": 0.3568407502758367, "grad_norm": 1.3880771406569763, "learning_rate": 7.450696858865494e-06, "loss": 0.6886, "step": 11643 }, { "epoch": 0.35687139879857793, "grad_norm": 1.1550475275486254, "learning_rate": 7.4502642342885775e-06, "loss": 0.5996, "step": 11644 }, { "epoch": 0.35690204732131914, "grad_norm": 1.2643964565630024, "learning_rate": 7.4498315855685835e-06, "loss": 0.6297, "step": 11645 }, { "epoch": 0.35693269584406034, "grad_norm": 0.7658086925876003, "learning_rate": 7.449398912709768e-06, "loss": 0.4118, "step": 11646 }, { "epoch": 0.3569633443668015, "grad_norm": 1.1332433760072345, "learning_rate": 7.448966215716401e-06, "loss": 0.5687, "step": 11647 }, { "epoch": 0.3569939928895427, "grad_norm": 1.3258542891491802, "learning_rate": 7.448533494592743e-06, "loss": 0.687, "step": 11648 }, { "epoch": 0.3570246414122839, "grad_norm": 0.5543080113833709, "learning_rate": 7.448100749343059e-06, "loss": 0.4425, "step": 11649 }, { "epoch": 0.3570552899350251, "grad_norm": 1.157026322011865, "learning_rate": 7.447667979971609e-06, "loss": 0.679, "step": 11650 }, { "epoch": 0.3570859384577663, "grad_norm": 1.1824780702219648, "learning_rate": 7.447235186482662e-06, "loss": 0.714, "step": 11651 }, { "epoch": 0.3571165869805075, "grad_norm": 1.4597450766157454, "learning_rate": 7.446802368880481e-06, "loss": 0.698, "step": 11652 }, { "epoch": 0.35714723550324873, "grad_norm": 1.302335273640106, "learning_rate": 7.44636952716933e-06, "loss": 0.6656, "step": 11653 }, { "epoch": 0.35717788402598993, "grad_norm": 1.1763735445295769, "learning_rate": 7.445936661353472e-06, "loss": 0.6056, "step": 11654 }, { "epoch": 0.35720853254873114, "grad_norm": 1.2511598953515892, "learning_rate": 7.445503771437177e-06, "loss": 0.7554, "step": 11655 }, { "epoch": 0.35723918107147234, "grad_norm": 1.1483838683737542, "learning_rate": 7.445070857424706e-06, "loss": 0.5856, "step": 11656 }, { "epoch": 0.35726982959421355, "grad_norm": 1.4488425641478668, "learning_rate": 7.4446379193203265e-06, "loss": 0.6434, "step": 11657 }, { "epoch": 0.35730047811695476, "grad_norm": 1.0663339071618732, "learning_rate": 7.444204957128305e-06, "loss": 0.6521, "step": 11658 }, { "epoch": 0.35733112663969596, "grad_norm": 1.1561397108069107, "learning_rate": 7.443771970852907e-06, "loss": 0.631, "step": 11659 }, { "epoch": 0.35736177516243717, "grad_norm": 1.2428639409695073, "learning_rate": 7.443338960498398e-06, "loss": 0.6613, "step": 11660 }, { "epoch": 0.3573924236851784, "grad_norm": 1.2699112170172717, "learning_rate": 7.4429059260690425e-06, "loss": 0.7028, "step": 11661 }, { "epoch": 0.3574230722079196, "grad_norm": 1.3588263464510628, "learning_rate": 7.442472867569112e-06, "loss": 0.7045, "step": 11662 }, { "epoch": 0.3574537207306608, "grad_norm": 1.412616270187153, "learning_rate": 7.442039785002872e-06, "loss": 0.4447, "step": 11663 }, { "epoch": 0.357484369253402, "grad_norm": 1.3726742372573193, "learning_rate": 7.441606678374589e-06, "loss": 0.7044, "step": 11664 }, { "epoch": 0.3575150177761432, "grad_norm": 0.5926287711658459, "learning_rate": 7.441173547688529e-06, "loss": 0.4336, "step": 11665 }, { "epoch": 0.3575456662988844, "grad_norm": 1.3135000465005302, "learning_rate": 7.440740392948964e-06, "loss": 0.6658, "step": 11666 }, { "epoch": 0.3575763148216256, "grad_norm": 1.3516898325565152, "learning_rate": 7.4403072141601585e-06, "loss": 0.6753, "step": 11667 }, { "epoch": 0.3576069633443668, "grad_norm": 1.2434922582742376, "learning_rate": 7.439874011326381e-06, "loss": 0.6936, "step": 11668 }, { "epoch": 0.357637611867108, "grad_norm": 1.3846341944717317, "learning_rate": 7.439440784451901e-06, "loss": 0.8022, "step": 11669 }, { "epoch": 0.3576682603898492, "grad_norm": 1.2336847206857953, "learning_rate": 7.439007533540986e-06, "loss": 0.6785, "step": 11670 }, { "epoch": 0.35769890891259043, "grad_norm": 1.3979094427040653, "learning_rate": 7.438574258597907e-06, "loss": 0.6593, "step": 11671 }, { "epoch": 0.35772955743533164, "grad_norm": 1.3030568282599262, "learning_rate": 7.438140959626931e-06, "loss": 0.6342, "step": 11672 }, { "epoch": 0.35776020595807284, "grad_norm": 1.410839540834605, "learning_rate": 7.437707636632329e-06, "loss": 0.7093, "step": 11673 }, { "epoch": 0.35779085448081405, "grad_norm": 1.1642161143824337, "learning_rate": 7.437274289618368e-06, "loss": 0.6147, "step": 11674 }, { "epoch": 0.35782150300355525, "grad_norm": 1.2225421686937041, "learning_rate": 7.436840918589323e-06, "loss": 0.624, "step": 11675 }, { "epoch": 0.35785215152629646, "grad_norm": 1.2757762414581355, "learning_rate": 7.436407523549458e-06, "loss": 0.6897, "step": 11676 }, { "epoch": 0.35788280004903766, "grad_norm": 1.2584001176033381, "learning_rate": 7.435974104503048e-06, "loss": 0.6088, "step": 11677 }, { "epoch": 0.3579134485717788, "grad_norm": 1.539129880349124, "learning_rate": 7.435540661454361e-06, "loss": 0.4883, "step": 11678 }, { "epoch": 0.35794409709452, "grad_norm": 1.2531480689551633, "learning_rate": 7.43510719440767e-06, "loss": 0.6152, "step": 11679 }, { "epoch": 0.3579747456172612, "grad_norm": 1.2795244551476572, "learning_rate": 7.434673703367243e-06, "loss": 0.6623, "step": 11680 }, { "epoch": 0.35800539414000243, "grad_norm": 1.2417795724842748, "learning_rate": 7.434240188337355e-06, "loss": 0.6461, "step": 11681 }, { "epoch": 0.35803604266274364, "grad_norm": 0.885383209216037, "learning_rate": 7.433806649322274e-06, "loss": 0.4573, "step": 11682 }, { "epoch": 0.35806669118548484, "grad_norm": 1.4463133163460768, "learning_rate": 7.433373086326274e-06, "loss": 0.6958, "step": 11683 }, { "epoch": 0.35809733970822605, "grad_norm": 1.4619124662900143, "learning_rate": 7.432939499353627e-06, "loss": 0.706, "step": 11684 }, { "epoch": 0.35812798823096725, "grad_norm": 1.3610384504524067, "learning_rate": 7.432505888408603e-06, "loss": 0.7107, "step": 11685 }, { "epoch": 0.35815863675370846, "grad_norm": 1.177920364520005, "learning_rate": 7.432072253495478e-06, "loss": 0.6022, "step": 11686 }, { "epoch": 0.35818928527644966, "grad_norm": 1.2567947289865224, "learning_rate": 7.431638594618521e-06, "loss": 0.6606, "step": 11687 }, { "epoch": 0.35821993379919087, "grad_norm": 0.6922299312730157, "learning_rate": 7.431204911782009e-06, "loss": 0.4367, "step": 11688 }, { "epoch": 0.3582505823219321, "grad_norm": 1.5227594975764804, "learning_rate": 7.4307712049902105e-06, "loss": 0.747, "step": 11689 }, { "epoch": 0.3582812308446733, "grad_norm": 1.3346457047193723, "learning_rate": 7.430337474247403e-06, "loss": 0.7574, "step": 11690 }, { "epoch": 0.3583118793674145, "grad_norm": 1.3512956639465685, "learning_rate": 7.429903719557857e-06, "loss": 0.7036, "step": 11691 }, { "epoch": 0.3583425278901557, "grad_norm": 1.5377908634280675, "learning_rate": 7.42946994092585e-06, "loss": 0.6753, "step": 11692 }, { "epoch": 0.3583731764128969, "grad_norm": 1.1827091459974601, "learning_rate": 7.429036138355652e-06, "loss": 0.6979, "step": 11693 }, { "epoch": 0.3584038249356381, "grad_norm": 1.2270045836250565, "learning_rate": 7.428602311851542e-06, "loss": 0.6631, "step": 11694 }, { "epoch": 0.3584344734583793, "grad_norm": 0.6495619820898629, "learning_rate": 7.42816846141779e-06, "loss": 0.4086, "step": 11695 }, { "epoch": 0.3584651219811205, "grad_norm": 1.5545827313095708, "learning_rate": 7.4277345870586725e-06, "loss": 0.7563, "step": 11696 }, { "epoch": 0.3584957705038617, "grad_norm": 1.283049781105048, "learning_rate": 7.427300688778465e-06, "loss": 0.671, "step": 11697 }, { "epoch": 0.3585264190266029, "grad_norm": 1.1355296428719286, "learning_rate": 7.426866766581444e-06, "loss": 0.6861, "step": 11698 }, { "epoch": 0.35855706754934413, "grad_norm": 1.2393025061773941, "learning_rate": 7.4264328204718835e-06, "loss": 0.667, "step": 11699 }, { "epoch": 0.35858771607208534, "grad_norm": 1.4552464437857098, "learning_rate": 7.425998850454059e-06, "loss": 0.7138, "step": 11700 }, { "epoch": 0.35861836459482654, "grad_norm": 1.3883028163833424, "learning_rate": 7.425564856532248e-06, "loss": 0.6672, "step": 11701 }, { "epoch": 0.35864901311756775, "grad_norm": 1.096446109268094, "learning_rate": 7.4251308387107256e-06, "loss": 0.611, "step": 11702 }, { "epoch": 0.35867966164030896, "grad_norm": 1.1267026992620794, "learning_rate": 7.424696796993769e-06, "loss": 0.6476, "step": 11703 }, { "epoch": 0.35871031016305016, "grad_norm": 1.2816732208157804, "learning_rate": 7.424262731385653e-06, "loss": 0.7647, "step": 11704 }, { "epoch": 0.35874095868579137, "grad_norm": 0.5432146699659041, "learning_rate": 7.423828641890657e-06, "loss": 0.4352, "step": 11705 }, { "epoch": 0.3587716072085326, "grad_norm": 1.2595261358127827, "learning_rate": 7.423394528513057e-06, "loss": 0.7416, "step": 11706 }, { "epoch": 0.3588022557312738, "grad_norm": 1.2976885007080028, "learning_rate": 7.422960391257131e-06, "loss": 0.593, "step": 11707 }, { "epoch": 0.358832904254015, "grad_norm": 1.1314231575917342, "learning_rate": 7.422526230127156e-06, "loss": 0.7293, "step": 11708 }, { "epoch": 0.35886355277675613, "grad_norm": 1.2480694577481048, "learning_rate": 7.42209204512741e-06, "loss": 0.739, "step": 11709 }, { "epoch": 0.35889420129949734, "grad_norm": 1.2380258636946326, "learning_rate": 7.421657836262172e-06, "loss": 0.7321, "step": 11710 }, { "epoch": 0.35892484982223855, "grad_norm": 1.2629084140260833, "learning_rate": 7.42122360353572e-06, "loss": 0.7139, "step": 11711 }, { "epoch": 0.35895549834497975, "grad_norm": 1.408674118197038, "learning_rate": 7.4207893469523304e-06, "loss": 0.726, "step": 11712 }, { "epoch": 0.35898614686772096, "grad_norm": 1.1452873822790766, "learning_rate": 7.420355066516286e-06, "loss": 0.6008, "step": 11713 }, { "epoch": 0.35901679539046216, "grad_norm": 1.2245784799347423, "learning_rate": 7.419920762231864e-06, "loss": 0.618, "step": 11714 }, { "epoch": 0.35904744391320337, "grad_norm": 1.3394338488622888, "learning_rate": 7.419486434103341e-06, "loss": 0.6344, "step": 11715 }, { "epoch": 0.3590780924359446, "grad_norm": 1.1329906571917325, "learning_rate": 7.419052082135001e-06, "loss": 0.6912, "step": 11716 }, { "epoch": 0.3591087409586858, "grad_norm": 1.2657561627876832, "learning_rate": 7.418617706331123e-06, "loss": 0.7753, "step": 11717 }, { "epoch": 0.359139389481427, "grad_norm": 1.0934451527391247, "learning_rate": 7.418183306695984e-06, "loss": 0.758, "step": 11718 }, { "epoch": 0.3591700380041682, "grad_norm": 1.3514690440001667, "learning_rate": 7.417748883233866e-06, "loss": 0.6411, "step": 11719 }, { "epoch": 0.3592006865269094, "grad_norm": 1.296040948092249, "learning_rate": 7.417314435949051e-06, "loss": 0.7275, "step": 11720 }, { "epoch": 0.3592313350496506, "grad_norm": 0.4892588118007942, "learning_rate": 7.416879964845818e-06, "loss": 0.4327, "step": 11721 }, { "epoch": 0.3592619835723918, "grad_norm": 0.4825761617343421, "learning_rate": 7.416445469928448e-06, "loss": 0.4318, "step": 11722 }, { "epoch": 0.359292632095133, "grad_norm": 1.2783112119983555, "learning_rate": 7.416010951201222e-06, "loss": 0.7703, "step": 11723 }, { "epoch": 0.3593232806178742, "grad_norm": 1.2834414849890874, "learning_rate": 7.415576408668423e-06, "loss": 0.5812, "step": 11724 }, { "epoch": 0.3593539291406154, "grad_norm": 1.0589324664483375, "learning_rate": 7.415141842334333e-06, "loss": 0.6054, "step": 11725 }, { "epoch": 0.35938457766335663, "grad_norm": 1.27202048710496, "learning_rate": 7.4147072522032294e-06, "loss": 0.7246, "step": 11726 }, { "epoch": 0.35941522618609784, "grad_norm": 1.293539480586865, "learning_rate": 7.4142726382793984e-06, "loss": 0.7048, "step": 11727 }, { "epoch": 0.35944587470883904, "grad_norm": 1.235412528697394, "learning_rate": 7.4138380005671214e-06, "loss": 0.6621, "step": 11728 }, { "epoch": 0.35947652323158025, "grad_norm": 1.33168029891161, "learning_rate": 7.413403339070682e-06, "loss": 0.6857, "step": 11729 }, { "epoch": 0.35950717175432145, "grad_norm": 1.2214352085771392, "learning_rate": 7.412968653794362e-06, "loss": 0.6121, "step": 11730 }, { "epoch": 0.35953782027706266, "grad_norm": 1.27410095030509, "learning_rate": 7.412533944742443e-06, "loss": 0.715, "step": 11731 }, { "epoch": 0.35956846879980386, "grad_norm": 1.2863280924429987, "learning_rate": 7.412099211919209e-06, "loss": 0.6463, "step": 11732 }, { "epoch": 0.35959911732254507, "grad_norm": 1.199010036896894, "learning_rate": 7.411664455328948e-06, "loss": 0.5676, "step": 11733 }, { "epoch": 0.3596297658452863, "grad_norm": 1.33063457288512, "learning_rate": 7.411229674975937e-06, "loss": 0.776, "step": 11734 }, { "epoch": 0.3596604143680275, "grad_norm": 1.1914958586210036, "learning_rate": 7.410794870864464e-06, "loss": 0.6667, "step": 11735 }, { "epoch": 0.3596910628907687, "grad_norm": 1.156471184818555, "learning_rate": 7.410360042998812e-06, "loss": 0.7184, "step": 11736 }, { "epoch": 0.3597217114135099, "grad_norm": 1.198190276000836, "learning_rate": 7.409925191383267e-06, "loss": 0.6797, "step": 11737 }, { "epoch": 0.3597523599362511, "grad_norm": 1.3386848191315097, "learning_rate": 7.40949031602211e-06, "loss": 0.6992, "step": 11738 }, { "epoch": 0.3597830084589923, "grad_norm": 1.3432638675577877, "learning_rate": 7.4090554169196304e-06, "loss": 0.6946, "step": 11739 }, { "epoch": 0.35981365698173345, "grad_norm": 1.1847794087304075, "learning_rate": 7.408620494080111e-06, "loss": 0.6043, "step": 11740 }, { "epoch": 0.35984430550447466, "grad_norm": 1.2894310759549699, "learning_rate": 7.408185547507837e-06, "loss": 0.6754, "step": 11741 }, { "epoch": 0.35987495402721587, "grad_norm": 1.2162430666814095, "learning_rate": 7.4077505772070955e-06, "loss": 0.6689, "step": 11742 }, { "epoch": 0.35990560254995707, "grad_norm": 1.1307739500346776, "learning_rate": 7.407315583182171e-06, "loss": 0.6054, "step": 11743 }, { "epoch": 0.3599362510726983, "grad_norm": 0.613521582210771, "learning_rate": 7.406880565437351e-06, "loss": 0.4214, "step": 11744 }, { "epoch": 0.3599668995954395, "grad_norm": 1.1787958865965655, "learning_rate": 7.4064455239769195e-06, "loss": 0.5771, "step": 11745 }, { "epoch": 0.3599975481181807, "grad_norm": 1.2065359338091362, "learning_rate": 7.406010458805165e-06, "loss": 0.558, "step": 11746 }, { "epoch": 0.3600281966409219, "grad_norm": 1.3197027948806046, "learning_rate": 7.405575369926374e-06, "loss": 0.6328, "step": 11747 }, { "epoch": 0.3600588451636631, "grad_norm": 1.1769324647513273, "learning_rate": 7.405140257344835e-06, "loss": 0.6627, "step": 11748 }, { "epoch": 0.3600894936864043, "grad_norm": 1.1424302264774833, "learning_rate": 7.404705121064831e-06, "loss": 0.6557, "step": 11749 }, { "epoch": 0.3601201422091455, "grad_norm": 1.2263238238396619, "learning_rate": 7.404269961090653e-06, "loss": 0.6661, "step": 11750 }, { "epoch": 0.3601507907318867, "grad_norm": 1.3043581783009228, "learning_rate": 7.403834777426588e-06, "loss": 0.6604, "step": 11751 }, { "epoch": 0.3601814392546279, "grad_norm": 1.2646820210202905, "learning_rate": 7.403399570076924e-06, "loss": 0.7359, "step": 11752 }, { "epoch": 0.3602120877773691, "grad_norm": 1.1232380532608712, "learning_rate": 7.40296433904595e-06, "loss": 0.7075, "step": 11753 }, { "epoch": 0.36024273630011033, "grad_norm": 1.1833164121450712, "learning_rate": 7.4025290843379525e-06, "loss": 0.6246, "step": 11754 }, { "epoch": 0.36027338482285154, "grad_norm": 1.1484142030296998, "learning_rate": 7.402093805957221e-06, "loss": 0.6179, "step": 11755 }, { "epoch": 0.36030403334559274, "grad_norm": 1.203909411505505, "learning_rate": 7.401658503908046e-06, "loss": 0.6681, "step": 11756 }, { "epoch": 0.36033468186833395, "grad_norm": 1.3779966168390674, "learning_rate": 7.401223178194714e-06, "loss": 0.7156, "step": 11757 }, { "epoch": 0.36036533039107516, "grad_norm": 1.142706731681466, "learning_rate": 7.400787828821516e-06, "loss": 0.6295, "step": 11758 }, { "epoch": 0.36039597891381636, "grad_norm": 1.2676479791458197, "learning_rate": 7.400352455792741e-06, "loss": 0.6247, "step": 11759 }, { "epoch": 0.36042662743655757, "grad_norm": 0.4883497103898846, "learning_rate": 7.39991705911268e-06, "loss": 0.4302, "step": 11760 }, { "epoch": 0.3604572759592988, "grad_norm": 1.3680789989546354, "learning_rate": 7.399481638785622e-06, "loss": 0.7004, "step": 11761 }, { "epoch": 0.36048792448204, "grad_norm": 0.46989447412897895, "learning_rate": 7.3990461948158565e-06, "loss": 0.4057, "step": 11762 }, { "epoch": 0.3605185730047812, "grad_norm": 1.5553352904657816, "learning_rate": 7.398610727207677e-06, "loss": 0.6267, "step": 11763 }, { "epoch": 0.3605492215275224, "grad_norm": 1.2907232722981656, "learning_rate": 7.39817523596537e-06, "loss": 0.7256, "step": 11764 }, { "epoch": 0.3605798700502636, "grad_norm": 1.1126458655816638, "learning_rate": 7.39773972109323e-06, "loss": 0.6442, "step": 11765 }, { "epoch": 0.3606105185730048, "grad_norm": 0.4788889704563282, "learning_rate": 7.397304182595546e-06, "loss": 0.4315, "step": 11766 }, { "epoch": 0.360641167095746, "grad_norm": 1.1159137820206353, "learning_rate": 7.396868620476611e-06, "loss": 0.6532, "step": 11767 }, { "epoch": 0.3606718156184872, "grad_norm": 1.3656154040899806, "learning_rate": 7.396433034740718e-06, "loss": 0.6547, "step": 11768 }, { "epoch": 0.3607024641412284, "grad_norm": 1.2509049369887282, "learning_rate": 7.395997425392154e-06, "loss": 0.6881, "step": 11769 }, { "epoch": 0.3607331126639696, "grad_norm": 1.21151830126842, "learning_rate": 7.395561792435216e-06, "loss": 0.6394, "step": 11770 }, { "epoch": 0.3607637611867108, "grad_norm": 1.4112674207658564, "learning_rate": 7.395126135874196e-06, "loss": 0.5704, "step": 11771 }, { "epoch": 0.360794409709452, "grad_norm": 1.396772160678517, "learning_rate": 7.394690455713383e-06, "loss": 0.6748, "step": 11772 }, { "epoch": 0.3608250582321932, "grad_norm": 1.2964889559180726, "learning_rate": 7.394254751957073e-06, "loss": 0.7548, "step": 11773 }, { "epoch": 0.3608557067549344, "grad_norm": 0.4729634124069369, "learning_rate": 7.393819024609559e-06, "loss": 0.4324, "step": 11774 }, { "epoch": 0.3608863552776756, "grad_norm": 0.45243654204863754, "learning_rate": 7.3933832736751335e-06, "loss": 0.426, "step": 11775 }, { "epoch": 0.3609170038004168, "grad_norm": 1.3036170103494205, "learning_rate": 7.39294749915809e-06, "loss": 0.7145, "step": 11776 }, { "epoch": 0.360947652323158, "grad_norm": 1.3558625534151076, "learning_rate": 7.392511701062721e-06, "loss": 0.7593, "step": 11777 }, { "epoch": 0.3609783008458992, "grad_norm": 1.3020222285465965, "learning_rate": 7.392075879393324e-06, "loss": 0.6378, "step": 11778 }, { "epoch": 0.3610089493686404, "grad_norm": 1.2497549619206096, "learning_rate": 7.391640034154192e-06, "loss": 0.6988, "step": 11779 }, { "epoch": 0.3610395978913816, "grad_norm": 1.1304601079574226, "learning_rate": 7.391204165349618e-06, "loss": 0.6735, "step": 11780 }, { "epoch": 0.36107024641412283, "grad_norm": 1.263699544235421, "learning_rate": 7.390768272983896e-06, "loss": 0.6339, "step": 11781 }, { "epoch": 0.36110089493686404, "grad_norm": 1.0475882527641343, "learning_rate": 7.390332357061324e-06, "loss": 0.6791, "step": 11782 }, { "epoch": 0.36113154345960524, "grad_norm": 1.2083881246995094, "learning_rate": 7.389896417586195e-06, "loss": 0.6848, "step": 11783 }, { "epoch": 0.36116219198234645, "grad_norm": 1.289328539357466, "learning_rate": 7.389460454562806e-06, "loss": 0.6864, "step": 11784 }, { "epoch": 0.36119284050508765, "grad_norm": 1.377028648048059, "learning_rate": 7.3890244679954516e-06, "loss": 0.6951, "step": 11785 }, { "epoch": 0.36122348902782886, "grad_norm": 1.1824461880857162, "learning_rate": 7.3885884578884256e-06, "loss": 0.744, "step": 11786 }, { "epoch": 0.36125413755057006, "grad_norm": 1.15752425008565, "learning_rate": 7.388152424246031e-06, "loss": 0.6804, "step": 11787 }, { "epoch": 0.36128478607331127, "grad_norm": 2.3824050740522362, "learning_rate": 7.387716367072556e-06, "loss": 0.5953, "step": 11788 }, { "epoch": 0.3613154345960525, "grad_norm": 1.2447013215888967, "learning_rate": 7.387280286372302e-06, "loss": 0.7571, "step": 11789 }, { "epoch": 0.3613460831187937, "grad_norm": 1.1571456283940793, "learning_rate": 7.386844182149564e-06, "loss": 0.6546, "step": 11790 }, { "epoch": 0.3613767316415349, "grad_norm": 1.2903539606521215, "learning_rate": 7.38640805440864e-06, "loss": 0.6654, "step": 11791 }, { "epoch": 0.3614073801642761, "grad_norm": 1.229551360136186, "learning_rate": 7.385971903153826e-06, "loss": 0.6891, "step": 11792 }, { "epoch": 0.3614380286870173, "grad_norm": 0.5442704967467896, "learning_rate": 7.3855357283894216e-06, "loss": 0.4378, "step": 11793 }, { "epoch": 0.3614686772097585, "grad_norm": 0.48853591067449614, "learning_rate": 7.385099530119723e-06, "loss": 0.4358, "step": 11794 }, { "epoch": 0.3614993257324997, "grad_norm": 1.2882719566330116, "learning_rate": 7.384663308349027e-06, "loss": 0.6072, "step": 11795 }, { "epoch": 0.3615299742552409, "grad_norm": 1.1072762772668014, "learning_rate": 7.3842270630816345e-06, "loss": 0.6609, "step": 11796 }, { "epoch": 0.3615606227779821, "grad_norm": 1.1929541802598371, "learning_rate": 7.383790794321841e-06, "loss": 0.6808, "step": 11797 }, { "epoch": 0.3615912713007233, "grad_norm": 1.3075549942738725, "learning_rate": 7.383354502073951e-06, "loss": 0.6679, "step": 11798 }, { "epoch": 0.36162191982346453, "grad_norm": 1.4406400602804175, "learning_rate": 7.382918186342256e-06, "loss": 0.7294, "step": 11799 }, { "epoch": 0.36165256834620574, "grad_norm": 1.1513939282815584, "learning_rate": 7.382481847131059e-06, "loss": 0.7351, "step": 11800 }, { "epoch": 0.36168321686894694, "grad_norm": 1.2195078704514204, "learning_rate": 7.382045484444658e-06, "loss": 0.6885, "step": 11801 }, { "epoch": 0.3617138653916881, "grad_norm": 1.2764248116112888, "learning_rate": 7.3816090982873554e-06, "loss": 0.6983, "step": 11802 }, { "epoch": 0.3617445139144293, "grad_norm": 1.1385609621032815, "learning_rate": 7.381172688663447e-06, "loss": 0.7258, "step": 11803 }, { "epoch": 0.3617751624371705, "grad_norm": 1.1907365349016283, "learning_rate": 7.380736255577236e-06, "loss": 0.6262, "step": 11804 }, { "epoch": 0.3618058109599117, "grad_norm": 1.245038469413192, "learning_rate": 7.380299799033022e-06, "loss": 0.7114, "step": 11805 }, { "epoch": 0.3618364594826529, "grad_norm": 1.1637330970477275, "learning_rate": 7.379863319035104e-06, "loss": 0.6297, "step": 11806 }, { "epoch": 0.3618671080053941, "grad_norm": 1.4546463323661163, "learning_rate": 7.3794268155877845e-06, "loss": 0.6982, "step": 11807 }, { "epoch": 0.36189775652813533, "grad_norm": 1.30629112136837, "learning_rate": 7.3789902886953636e-06, "loss": 0.7665, "step": 11808 }, { "epoch": 0.36192840505087653, "grad_norm": 1.1836971518687458, "learning_rate": 7.378553738362142e-06, "loss": 0.6062, "step": 11809 }, { "epoch": 0.36195905357361774, "grad_norm": 1.227456094321914, "learning_rate": 7.378117164592422e-06, "loss": 0.6048, "step": 11810 }, { "epoch": 0.36198970209635895, "grad_norm": 1.18359155920602, "learning_rate": 7.377680567390506e-06, "loss": 0.6221, "step": 11811 }, { "epoch": 0.36202035061910015, "grad_norm": 1.1434845792125394, "learning_rate": 7.3772439467606934e-06, "loss": 0.6405, "step": 11812 }, { "epoch": 0.36205099914184136, "grad_norm": 1.3954429355154712, "learning_rate": 7.376807302707291e-06, "loss": 0.722, "step": 11813 }, { "epoch": 0.36208164766458256, "grad_norm": 0.6071227051749606, "learning_rate": 7.376370635234596e-06, "loss": 0.4673, "step": 11814 }, { "epoch": 0.36211229618732377, "grad_norm": 1.0869757002471503, "learning_rate": 7.375933944346913e-06, "loss": 0.6277, "step": 11815 }, { "epoch": 0.362142944710065, "grad_norm": 1.2757825201442188, "learning_rate": 7.375497230048544e-06, "loss": 0.6197, "step": 11816 }, { "epoch": 0.3621735932328062, "grad_norm": 1.3998171808929565, "learning_rate": 7.375060492343796e-06, "loss": 0.7725, "step": 11817 }, { "epoch": 0.3622042417555474, "grad_norm": 1.385135544022528, "learning_rate": 7.374623731236966e-06, "loss": 0.6665, "step": 11818 }, { "epoch": 0.3622348902782886, "grad_norm": 1.1922010079032475, "learning_rate": 7.374186946732362e-06, "loss": 0.6707, "step": 11819 }, { "epoch": 0.3622655388010298, "grad_norm": 1.2639138380733188, "learning_rate": 7.373750138834287e-06, "loss": 0.7355, "step": 11820 }, { "epoch": 0.362296187323771, "grad_norm": 1.148182053406925, "learning_rate": 7.373313307547044e-06, "loss": 0.583, "step": 11821 }, { "epoch": 0.3623268358465122, "grad_norm": 1.2228634615890195, "learning_rate": 7.372876452874938e-06, "loss": 0.6792, "step": 11822 }, { "epoch": 0.3623574843692534, "grad_norm": 1.1938649778798527, "learning_rate": 7.372439574822273e-06, "loss": 0.6483, "step": 11823 }, { "epoch": 0.3623881328919946, "grad_norm": 1.16250401850594, "learning_rate": 7.372002673393355e-06, "loss": 0.6438, "step": 11824 }, { "epoch": 0.3624187814147358, "grad_norm": 1.260490852543711, "learning_rate": 7.371565748592487e-06, "loss": 0.7107, "step": 11825 }, { "epoch": 0.36244942993747703, "grad_norm": 0.4680561179220725, "learning_rate": 7.371128800423975e-06, "loss": 0.4302, "step": 11826 }, { "epoch": 0.36248007846021824, "grad_norm": 1.3135998017589017, "learning_rate": 7.370691828892124e-06, "loss": 0.6919, "step": 11827 }, { "epoch": 0.36251072698295944, "grad_norm": 0.4390362999126555, "learning_rate": 7.370254834001241e-06, "loss": 0.4308, "step": 11828 }, { "epoch": 0.36254137550570065, "grad_norm": 1.1841054386572638, "learning_rate": 7.369817815755629e-06, "loss": 0.7188, "step": 11829 }, { "epoch": 0.36257202402844185, "grad_norm": 1.3567729110666524, "learning_rate": 7.3693807741595955e-06, "loss": 0.6584, "step": 11830 }, { "epoch": 0.36260267255118306, "grad_norm": 1.3885714595687493, "learning_rate": 7.368943709217448e-06, "loss": 0.7106, "step": 11831 }, { "epoch": 0.36263332107392426, "grad_norm": 1.372688535729671, "learning_rate": 7.368506620933491e-06, "loss": 0.8113, "step": 11832 }, { "epoch": 0.3626639695966654, "grad_norm": 1.3395401988916482, "learning_rate": 7.3680695093120334e-06, "loss": 0.6764, "step": 11833 }, { "epoch": 0.3626946181194066, "grad_norm": 0.49074283848981803, "learning_rate": 7.3676323743573805e-06, "loss": 0.4447, "step": 11834 }, { "epoch": 0.3627252666421478, "grad_norm": 1.4608362989600254, "learning_rate": 7.36719521607384e-06, "loss": 0.6771, "step": 11835 }, { "epoch": 0.36275591516488903, "grad_norm": 1.3002295885297082, "learning_rate": 7.366758034465719e-06, "loss": 0.6414, "step": 11836 }, { "epoch": 0.36278656368763024, "grad_norm": 0.4531479203631776, "learning_rate": 7.366320829537328e-06, "loss": 0.4668, "step": 11837 }, { "epoch": 0.36281721221037144, "grad_norm": 1.2775161327373616, "learning_rate": 7.365883601292969e-06, "loss": 0.7112, "step": 11838 }, { "epoch": 0.36284786073311265, "grad_norm": 0.4520490299917324, "learning_rate": 7.365446349736955e-06, "loss": 0.4326, "step": 11839 }, { "epoch": 0.36287850925585385, "grad_norm": 1.2807806023208392, "learning_rate": 7.365009074873594e-06, "loss": 0.5923, "step": 11840 }, { "epoch": 0.36290915777859506, "grad_norm": 1.2820802670914113, "learning_rate": 7.364571776707192e-06, "loss": 0.6906, "step": 11841 }, { "epoch": 0.36293980630133627, "grad_norm": 0.4450868761795076, "learning_rate": 7.3641344552420605e-06, "loss": 0.444, "step": 11842 }, { "epoch": 0.36297045482407747, "grad_norm": 1.216849903286598, "learning_rate": 7.363697110482506e-06, "loss": 0.7423, "step": 11843 }, { "epoch": 0.3630011033468187, "grad_norm": 1.180584363583941, "learning_rate": 7.36325974243284e-06, "loss": 0.6479, "step": 11844 }, { "epoch": 0.3630317518695599, "grad_norm": 1.0765878804804394, "learning_rate": 7.362822351097371e-06, "loss": 0.6393, "step": 11845 }, { "epoch": 0.3630624003923011, "grad_norm": 1.3358783155730283, "learning_rate": 7.362384936480407e-06, "loss": 0.6368, "step": 11846 }, { "epoch": 0.3630930489150423, "grad_norm": 1.2587265965153958, "learning_rate": 7.361947498586262e-06, "loss": 0.7086, "step": 11847 }, { "epoch": 0.3631236974377835, "grad_norm": 1.129608770764529, "learning_rate": 7.361510037419244e-06, "loss": 0.5672, "step": 11848 }, { "epoch": 0.3631543459605247, "grad_norm": 1.335503370665705, "learning_rate": 7.361072552983661e-06, "loss": 0.6525, "step": 11849 }, { "epoch": 0.3631849944832659, "grad_norm": 1.2581676930200418, "learning_rate": 7.360635045283828e-06, "loss": 0.7191, "step": 11850 }, { "epoch": 0.3632156430060071, "grad_norm": 1.3138966936328245, "learning_rate": 7.360197514324055e-06, "loss": 0.5556, "step": 11851 }, { "epoch": 0.3632462915287483, "grad_norm": 1.3272888986428542, "learning_rate": 7.35975996010865e-06, "loss": 0.5916, "step": 11852 }, { "epoch": 0.3632769400514895, "grad_norm": 1.3081041060796572, "learning_rate": 7.359322382641926e-06, "loss": 0.746, "step": 11853 }, { "epoch": 0.36330758857423073, "grad_norm": 1.4012234793416845, "learning_rate": 7.358884781928196e-06, "loss": 0.7444, "step": 11854 }, { "epoch": 0.36333823709697194, "grad_norm": 1.2005480231539147, "learning_rate": 7.35844715797177e-06, "loss": 0.6764, "step": 11855 }, { "epoch": 0.36336888561971314, "grad_norm": 1.2771136135235786, "learning_rate": 7.358009510776963e-06, "loss": 0.6803, "step": 11856 }, { "epoch": 0.36339953414245435, "grad_norm": 1.2739189417568064, "learning_rate": 7.357571840348082e-06, "loss": 0.6718, "step": 11857 }, { "epoch": 0.36343018266519556, "grad_norm": 1.3495270845150333, "learning_rate": 7.357134146689444e-06, "loss": 0.6742, "step": 11858 }, { "epoch": 0.36346083118793676, "grad_norm": 1.1844666194629865, "learning_rate": 7.356696429805359e-06, "loss": 0.6603, "step": 11859 }, { "epoch": 0.36349147971067797, "grad_norm": 1.1651756363257928, "learning_rate": 7.356258689700143e-06, "loss": 0.699, "step": 11860 }, { "epoch": 0.3635221282334192, "grad_norm": 1.1818011030098583, "learning_rate": 7.355820926378105e-06, "loss": 0.6746, "step": 11861 }, { "epoch": 0.3635527767561604, "grad_norm": 0.5236066342205518, "learning_rate": 7.3553831398435625e-06, "loss": 0.4184, "step": 11862 }, { "epoch": 0.3635834252789016, "grad_norm": 0.48676509342326857, "learning_rate": 7.354945330100827e-06, "loss": 0.409, "step": 11863 }, { "epoch": 0.36361407380164273, "grad_norm": 1.200558369046751, "learning_rate": 7.354507497154212e-06, "loss": 0.7355, "step": 11864 }, { "epoch": 0.36364472232438394, "grad_norm": 1.1666723289482854, "learning_rate": 7.354069641008032e-06, "loss": 0.6813, "step": 11865 }, { "epoch": 0.36367537084712515, "grad_norm": 1.2726946200068279, "learning_rate": 7.353631761666602e-06, "loss": 0.6844, "step": 11866 }, { "epoch": 0.36370601936986635, "grad_norm": 1.177586957727297, "learning_rate": 7.353193859134237e-06, "loss": 0.5989, "step": 11867 }, { "epoch": 0.36373666789260756, "grad_norm": 1.2441933529804972, "learning_rate": 7.352755933415249e-06, "loss": 0.7854, "step": 11868 }, { "epoch": 0.36376731641534876, "grad_norm": 1.3631988822993906, "learning_rate": 7.352317984513956e-06, "loss": 0.7868, "step": 11869 }, { "epoch": 0.36379796493808997, "grad_norm": 1.3587306386139912, "learning_rate": 7.351880012434671e-06, "loss": 0.6441, "step": 11870 }, { "epoch": 0.3638286134608312, "grad_norm": 1.3222493724355864, "learning_rate": 7.351442017181711e-06, "loss": 0.7319, "step": 11871 }, { "epoch": 0.3638592619835724, "grad_norm": 1.159153595588828, "learning_rate": 7.351003998759392e-06, "loss": 0.631, "step": 11872 }, { "epoch": 0.3638899105063136, "grad_norm": 1.2141029758192863, "learning_rate": 7.350565957172028e-06, "loss": 0.6211, "step": 11873 }, { "epoch": 0.3639205590290548, "grad_norm": 1.2088810926260656, "learning_rate": 7.350127892423936e-06, "loss": 0.6847, "step": 11874 }, { "epoch": 0.363951207551796, "grad_norm": 1.387922101747585, "learning_rate": 7.349689804519433e-06, "loss": 0.6479, "step": 11875 }, { "epoch": 0.3639818560745372, "grad_norm": 1.0580319536393503, "learning_rate": 7.349251693462836e-06, "loss": 0.6023, "step": 11876 }, { "epoch": 0.3640125045972784, "grad_norm": 1.3385917132000833, "learning_rate": 7.348813559258461e-06, "loss": 0.6453, "step": 11877 }, { "epoch": 0.3640431531200196, "grad_norm": 1.3497995777404586, "learning_rate": 7.348375401910624e-06, "loss": 0.6732, "step": 11878 }, { "epoch": 0.3640738016427608, "grad_norm": 1.1786892170131371, "learning_rate": 7.3479372214236425e-06, "loss": 0.6983, "step": 11879 }, { "epoch": 0.364104450165502, "grad_norm": 1.1676328157919396, "learning_rate": 7.347499017801837e-06, "loss": 0.677, "step": 11880 }, { "epoch": 0.36413509868824323, "grad_norm": 1.1525825715159363, "learning_rate": 7.347060791049521e-06, "loss": 0.6996, "step": 11881 }, { "epoch": 0.36416574721098444, "grad_norm": 1.495949852078385, "learning_rate": 7.3466225411710156e-06, "loss": 0.6597, "step": 11882 }, { "epoch": 0.36419639573372564, "grad_norm": 1.192783022204994, "learning_rate": 7.3461842681706375e-06, "loss": 0.6734, "step": 11883 }, { "epoch": 0.36422704425646685, "grad_norm": 1.1860637534465643, "learning_rate": 7.345745972052707e-06, "loss": 0.6191, "step": 11884 }, { "epoch": 0.36425769277920805, "grad_norm": 0.5767042137398758, "learning_rate": 7.345307652821538e-06, "loss": 0.4421, "step": 11885 }, { "epoch": 0.36428834130194926, "grad_norm": 1.1581056748249217, "learning_rate": 7.344869310481455e-06, "loss": 0.6856, "step": 11886 }, { "epoch": 0.36431898982469046, "grad_norm": 1.3398525249477344, "learning_rate": 7.344430945036776e-06, "loss": 0.6557, "step": 11887 }, { "epoch": 0.36434963834743167, "grad_norm": 1.2465482333256157, "learning_rate": 7.343992556491817e-06, "loss": 0.7621, "step": 11888 }, { "epoch": 0.3643802868701729, "grad_norm": 1.2485376165293336, "learning_rate": 7.3435541448509e-06, "loss": 0.7226, "step": 11889 }, { "epoch": 0.3644109353929141, "grad_norm": 1.1378538013547597, "learning_rate": 7.343115710118344e-06, "loss": 0.6807, "step": 11890 }, { "epoch": 0.3644415839156553, "grad_norm": 1.298092082350547, "learning_rate": 7.34267725229847e-06, "loss": 0.6866, "step": 11891 }, { "epoch": 0.3644722324383965, "grad_norm": 1.15216859632646, "learning_rate": 7.3422387713955965e-06, "loss": 0.5708, "step": 11892 }, { "epoch": 0.3645028809611377, "grad_norm": 1.2540474958553645, "learning_rate": 7.341800267414047e-06, "loss": 0.666, "step": 11893 }, { "epoch": 0.3645335294838789, "grad_norm": 1.268443091441063, "learning_rate": 7.341361740358139e-06, "loss": 0.6349, "step": 11894 }, { "epoch": 0.36456417800662005, "grad_norm": 1.289906954145654, "learning_rate": 7.340923190232195e-06, "loss": 0.6728, "step": 11895 }, { "epoch": 0.36459482652936126, "grad_norm": 1.295853500362877, "learning_rate": 7.3404846170405355e-06, "loss": 0.7146, "step": 11896 }, { "epoch": 0.36462547505210247, "grad_norm": 1.088959245646588, "learning_rate": 7.340046020787484e-06, "loss": 0.67, "step": 11897 }, { "epoch": 0.36465612357484367, "grad_norm": 1.2558496061946471, "learning_rate": 7.339607401477357e-06, "loss": 0.6341, "step": 11898 }, { "epoch": 0.3646867720975849, "grad_norm": 0.4660153856657566, "learning_rate": 7.339168759114483e-06, "loss": 0.4246, "step": 11899 }, { "epoch": 0.3647174206203261, "grad_norm": 1.2552732490451903, "learning_rate": 7.338730093703179e-06, "loss": 0.7771, "step": 11900 }, { "epoch": 0.3647480691430673, "grad_norm": 1.09002837336371, "learning_rate": 7.33829140524777e-06, "loss": 0.6802, "step": 11901 }, { "epoch": 0.3647787176658085, "grad_norm": 1.1155848618808073, "learning_rate": 7.337852693752576e-06, "loss": 0.6818, "step": 11902 }, { "epoch": 0.3648093661885497, "grad_norm": 1.365694277206042, "learning_rate": 7.337413959221923e-06, "loss": 0.6978, "step": 11903 }, { "epoch": 0.3648400147112909, "grad_norm": 1.197155027014524, "learning_rate": 7.33697520166013e-06, "loss": 0.7346, "step": 11904 }, { "epoch": 0.3648706632340321, "grad_norm": 1.335081389573835, "learning_rate": 7.336536421071524e-06, "loss": 0.7233, "step": 11905 }, { "epoch": 0.3649013117567733, "grad_norm": 1.4585501040686644, "learning_rate": 7.336097617460427e-06, "loss": 0.7116, "step": 11906 }, { "epoch": 0.3649319602795145, "grad_norm": 1.2009057917818102, "learning_rate": 7.335658790831162e-06, "loss": 0.7057, "step": 11907 }, { "epoch": 0.36496260880225573, "grad_norm": 1.2546513257308851, "learning_rate": 7.335219941188052e-06, "loss": 0.649, "step": 11908 }, { "epoch": 0.36499325732499693, "grad_norm": 1.1569147488600515, "learning_rate": 7.334781068535424e-06, "loss": 0.7013, "step": 11909 }, { "epoch": 0.36502390584773814, "grad_norm": 1.3465559072948434, "learning_rate": 7.334342172877601e-06, "loss": 0.622, "step": 11910 }, { "epoch": 0.36505455437047934, "grad_norm": 1.2618037949248588, "learning_rate": 7.333903254218906e-06, "loss": 0.6431, "step": 11911 }, { "epoch": 0.36508520289322055, "grad_norm": 1.368298395386397, "learning_rate": 7.333464312563666e-06, "loss": 0.6493, "step": 11912 }, { "epoch": 0.36511585141596176, "grad_norm": 1.2141135711282505, "learning_rate": 7.333025347916205e-06, "loss": 0.7046, "step": 11913 }, { "epoch": 0.36514649993870296, "grad_norm": 1.3589474282550815, "learning_rate": 7.3325863602808486e-06, "loss": 0.6825, "step": 11914 }, { "epoch": 0.36517714846144417, "grad_norm": 1.2265911435583892, "learning_rate": 7.332147349661921e-06, "loss": 0.7217, "step": 11915 }, { "epoch": 0.3652077969841854, "grad_norm": 1.284876589399865, "learning_rate": 7.33170831606375e-06, "loss": 0.7349, "step": 11916 }, { "epoch": 0.3652384455069266, "grad_norm": 1.2528020444817276, "learning_rate": 7.33126925949066e-06, "loss": 0.6054, "step": 11917 }, { "epoch": 0.3652690940296678, "grad_norm": 1.3113600358307818, "learning_rate": 7.330830179946977e-06, "loss": 0.6427, "step": 11918 }, { "epoch": 0.365299742552409, "grad_norm": 0.44349675259390897, "learning_rate": 7.330391077437028e-06, "loss": 0.4367, "step": 11919 }, { "epoch": 0.3653303910751502, "grad_norm": 0.44024856617620245, "learning_rate": 7.32995195196514e-06, "loss": 0.433, "step": 11920 }, { "epoch": 0.3653610395978914, "grad_norm": 1.1878019440454228, "learning_rate": 7.329512803535639e-06, "loss": 0.7119, "step": 11921 }, { "epoch": 0.3653916881206326, "grad_norm": 1.3647648653412296, "learning_rate": 7.329073632152852e-06, "loss": 0.8041, "step": 11922 }, { "epoch": 0.3654223366433738, "grad_norm": 1.41654268338693, "learning_rate": 7.328634437821107e-06, "loss": 0.7419, "step": 11923 }, { "epoch": 0.365452985166115, "grad_norm": 1.268455224523202, "learning_rate": 7.328195220544731e-06, "loss": 0.6531, "step": 11924 }, { "epoch": 0.3654836336888562, "grad_norm": 1.398251607645646, "learning_rate": 7.327755980328053e-06, "loss": 0.6879, "step": 11925 }, { "epoch": 0.3655142822115974, "grad_norm": 1.3535622400591887, "learning_rate": 7.3273167171753965e-06, "loss": 0.6696, "step": 11926 }, { "epoch": 0.3655449307343386, "grad_norm": 1.2951292623722357, "learning_rate": 7.326877431091095e-06, "loss": 0.6513, "step": 11927 }, { "epoch": 0.3655755792570798, "grad_norm": 1.2073907077346813, "learning_rate": 7.326438122079474e-06, "loss": 0.7006, "step": 11928 }, { "epoch": 0.365606227779821, "grad_norm": 0.46761727672753123, "learning_rate": 7.325998790144866e-06, "loss": 0.4166, "step": 11929 }, { "epoch": 0.3656368763025622, "grad_norm": 1.2684221621712728, "learning_rate": 7.325559435291594e-06, "loss": 0.6772, "step": 11930 }, { "epoch": 0.3656675248253034, "grad_norm": 1.3993081151722255, "learning_rate": 7.3251200575239904e-06, "loss": 0.6272, "step": 11931 }, { "epoch": 0.3656981733480446, "grad_norm": 1.2869468152728052, "learning_rate": 7.324680656846382e-06, "loss": 0.6088, "step": 11932 }, { "epoch": 0.3657288218707858, "grad_norm": 1.2159037210193555, "learning_rate": 7.324241233263103e-06, "loss": 0.6345, "step": 11933 }, { "epoch": 0.365759470393527, "grad_norm": 1.255523751110728, "learning_rate": 7.32380178677848e-06, "loss": 0.6223, "step": 11934 }, { "epoch": 0.3657901189162682, "grad_norm": 1.1682138378862332, "learning_rate": 7.323362317396843e-06, "loss": 0.7405, "step": 11935 }, { "epoch": 0.36582076743900943, "grad_norm": 1.2314839566943867, "learning_rate": 7.322922825122522e-06, "loss": 0.6653, "step": 11936 }, { "epoch": 0.36585141596175064, "grad_norm": 1.1698567379349576, "learning_rate": 7.322483309959849e-06, "loss": 0.6269, "step": 11937 }, { "epoch": 0.36588206448449184, "grad_norm": 0.4915421593989688, "learning_rate": 7.322043771913154e-06, "loss": 0.4454, "step": 11938 }, { "epoch": 0.36591271300723305, "grad_norm": 1.2387616447987873, "learning_rate": 7.321604210986766e-06, "loss": 0.6862, "step": 11939 }, { "epoch": 0.36594336152997425, "grad_norm": 1.2145159640883947, "learning_rate": 7.321164627185019e-06, "loss": 0.6939, "step": 11940 }, { "epoch": 0.36597401005271546, "grad_norm": 1.1349966432395946, "learning_rate": 7.320725020512243e-06, "loss": 0.6205, "step": 11941 }, { "epoch": 0.36600465857545667, "grad_norm": 1.3706319830281628, "learning_rate": 7.32028539097277e-06, "loss": 0.6606, "step": 11942 }, { "epoch": 0.36603530709819787, "grad_norm": 1.2732881762043655, "learning_rate": 7.31984573857093e-06, "loss": 0.7009, "step": 11943 }, { "epoch": 0.3660659556209391, "grad_norm": 1.1745029671194387, "learning_rate": 7.319406063311056e-06, "loss": 0.5866, "step": 11944 }, { "epoch": 0.3660966041436803, "grad_norm": 0.4452027245820716, "learning_rate": 7.318966365197483e-06, "loss": 0.4338, "step": 11945 }, { "epoch": 0.3661272526664215, "grad_norm": 1.1949941674081053, "learning_rate": 7.318526644234538e-06, "loss": 0.6495, "step": 11946 }, { "epoch": 0.3661579011891627, "grad_norm": 0.46494247379940806, "learning_rate": 7.31808690042656e-06, "loss": 0.4377, "step": 11947 }, { "epoch": 0.3661885497119039, "grad_norm": 0.44473354791258096, "learning_rate": 7.317647133777877e-06, "loss": 0.4423, "step": 11948 }, { "epoch": 0.3662191982346451, "grad_norm": 1.1384559027820833, "learning_rate": 7.317207344292825e-06, "loss": 0.6407, "step": 11949 }, { "epoch": 0.3662498467573863, "grad_norm": 1.3382201360007986, "learning_rate": 7.316767531975734e-06, "loss": 0.6328, "step": 11950 }, { "epoch": 0.3662804952801275, "grad_norm": 0.44431309034936756, "learning_rate": 7.31632769683094e-06, "loss": 0.4453, "step": 11951 }, { "epoch": 0.3663111438028687, "grad_norm": 0.44756529522284577, "learning_rate": 7.315887838862778e-06, "loss": 0.4318, "step": 11952 }, { "epoch": 0.3663417923256099, "grad_norm": 1.3357779339032905, "learning_rate": 7.315447958075581e-06, "loss": 0.6873, "step": 11953 }, { "epoch": 0.36637244084835113, "grad_norm": 1.0833300531793952, "learning_rate": 7.315008054473681e-06, "loss": 0.5785, "step": 11954 }, { "epoch": 0.36640308937109234, "grad_norm": 1.2539058141651989, "learning_rate": 7.314568128061416e-06, "loss": 0.6247, "step": 11955 }, { "epoch": 0.36643373789383354, "grad_norm": 1.4101359218403244, "learning_rate": 7.314128178843118e-06, "loss": 0.7162, "step": 11956 }, { "epoch": 0.3664643864165747, "grad_norm": 1.2165356252027588, "learning_rate": 7.313688206823123e-06, "loss": 0.7138, "step": 11957 }, { "epoch": 0.3664950349393159, "grad_norm": 1.3540548158498409, "learning_rate": 7.3132482120057654e-06, "loss": 0.6872, "step": 11958 }, { "epoch": 0.3665256834620571, "grad_norm": 1.2388947239480617, "learning_rate": 7.312808194395382e-06, "loss": 0.6833, "step": 11959 }, { "epoch": 0.3665563319847983, "grad_norm": 1.1920967883985925, "learning_rate": 7.312368153996309e-06, "loss": 0.6878, "step": 11960 }, { "epoch": 0.3665869805075395, "grad_norm": 1.1343413579584996, "learning_rate": 7.311928090812878e-06, "loss": 0.6527, "step": 11961 }, { "epoch": 0.3666176290302807, "grad_norm": 1.1034426638139374, "learning_rate": 7.31148800484943e-06, "loss": 0.6279, "step": 11962 }, { "epoch": 0.36664827755302193, "grad_norm": 1.2274999294935574, "learning_rate": 7.311047896110299e-06, "loss": 0.6007, "step": 11963 }, { "epoch": 0.36667892607576313, "grad_norm": 1.260901130273907, "learning_rate": 7.310607764599823e-06, "loss": 0.6168, "step": 11964 }, { "epoch": 0.36670957459850434, "grad_norm": 0.5421548555963754, "learning_rate": 7.310167610322337e-06, "loss": 0.4179, "step": 11965 }, { "epoch": 0.36674022312124555, "grad_norm": 1.3303832599886372, "learning_rate": 7.309727433282177e-06, "loss": 0.7895, "step": 11966 }, { "epoch": 0.36677087164398675, "grad_norm": 0.48476427431513786, "learning_rate": 7.3092872334836814e-06, "loss": 0.4303, "step": 11967 }, { "epoch": 0.36680152016672796, "grad_norm": 1.5428994415393613, "learning_rate": 7.30884701093119e-06, "loss": 0.6664, "step": 11968 }, { "epoch": 0.36683216868946916, "grad_norm": 1.0635405153685533, "learning_rate": 7.308406765629037e-06, "loss": 0.6548, "step": 11969 }, { "epoch": 0.36686281721221037, "grad_norm": 1.3002971942903512, "learning_rate": 7.307966497581562e-06, "loss": 0.7421, "step": 11970 }, { "epoch": 0.3668934657349516, "grad_norm": 1.1985797485990088, "learning_rate": 7.307526206793102e-06, "loss": 0.6292, "step": 11971 }, { "epoch": 0.3669241142576928, "grad_norm": 1.1661139784715207, "learning_rate": 7.3070858932679956e-06, "loss": 0.6606, "step": 11972 }, { "epoch": 0.366954762780434, "grad_norm": 1.398477325593321, "learning_rate": 7.3066455570105824e-06, "loss": 0.8415, "step": 11973 }, { "epoch": 0.3669854113031752, "grad_norm": 0.49171831182443665, "learning_rate": 7.3062051980252e-06, "loss": 0.4343, "step": 11974 }, { "epoch": 0.3670160598259164, "grad_norm": 1.0897421902645479, "learning_rate": 7.305764816316188e-06, "loss": 0.6657, "step": 11975 }, { "epoch": 0.3670467083486576, "grad_norm": 1.3239507378113382, "learning_rate": 7.305324411887884e-06, "loss": 0.7331, "step": 11976 }, { "epoch": 0.3670773568713988, "grad_norm": 1.316034487842601, "learning_rate": 7.3048839847446305e-06, "loss": 0.6732, "step": 11977 }, { "epoch": 0.36710800539414, "grad_norm": 0.4826088317607586, "learning_rate": 7.304443534890764e-06, "loss": 0.4487, "step": 11978 }, { "epoch": 0.3671386539168812, "grad_norm": 1.3287479587995863, "learning_rate": 7.304003062330627e-06, "loss": 0.6893, "step": 11979 }, { "epoch": 0.3671693024396224, "grad_norm": 0.47157871411852, "learning_rate": 7.303562567068557e-06, "loss": 0.4557, "step": 11980 }, { "epoch": 0.36719995096236363, "grad_norm": 0.44807380440763367, "learning_rate": 7.303122049108897e-06, "loss": 0.427, "step": 11981 }, { "epoch": 0.36723059948510484, "grad_norm": 1.1566434146503988, "learning_rate": 7.302681508455985e-06, "loss": 0.6862, "step": 11982 }, { "epoch": 0.36726124800784604, "grad_norm": 1.4111417014369512, "learning_rate": 7.302240945114164e-06, "loss": 0.6317, "step": 11983 }, { "epoch": 0.36729189653058725, "grad_norm": 1.299854169733784, "learning_rate": 7.301800359087773e-06, "loss": 0.8325, "step": 11984 }, { "epoch": 0.36732254505332845, "grad_norm": 1.0742303763665841, "learning_rate": 7.301359750381154e-06, "loss": 0.5945, "step": 11985 }, { "epoch": 0.36735319357606966, "grad_norm": 1.2429573496468014, "learning_rate": 7.300919118998648e-06, "loss": 0.7147, "step": 11986 }, { "epoch": 0.36738384209881086, "grad_norm": 1.2587225567233475, "learning_rate": 7.300478464944599e-06, "loss": 0.6528, "step": 11987 }, { "epoch": 0.367414490621552, "grad_norm": 1.3227892400957892, "learning_rate": 7.300037788223346e-06, "loss": 0.7342, "step": 11988 }, { "epoch": 0.3674451391442932, "grad_norm": 1.1669643893977566, "learning_rate": 7.299597088839232e-06, "loss": 0.6336, "step": 11989 }, { "epoch": 0.3674757876670344, "grad_norm": 1.3702092650991402, "learning_rate": 7.2991563667966004e-06, "loss": 0.7175, "step": 11990 }, { "epoch": 0.36750643618977563, "grad_norm": 0.576923068562378, "learning_rate": 7.298715622099793e-06, "loss": 0.4401, "step": 11991 }, { "epoch": 0.36753708471251684, "grad_norm": 1.2545633520977735, "learning_rate": 7.298274854753153e-06, "loss": 0.609, "step": 11992 }, { "epoch": 0.36756773323525804, "grad_norm": 1.2305212755631931, "learning_rate": 7.29783406476102e-06, "loss": 0.6986, "step": 11993 }, { "epoch": 0.36759838175799925, "grad_norm": 1.2758980643625428, "learning_rate": 7.297393252127744e-06, "loss": 0.7174, "step": 11994 }, { "epoch": 0.36762903028074045, "grad_norm": 1.2884937656067883, "learning_rate": 7.2969524168576615e-06, "loss": 0.6844, "step": 11995 }, { "epoch": 0.36765967880348166, "grad_norm": 2.239728482998254, "learning_rate": 7.29651155895512e-06, "loss": 0.6424, "step": 11996 }, { "epoch": 0.36769032732622287, "grad_norm": 0.4629446613897844, "learning_rate": 7.296070678424461e-06, "loss": 0.4304, "step": 11997 }, { "epoch": 0.36772097584896407, "grad_norm": 1.2224644619850689, "learning_rate": 7.295629775270033e-06, "loss": 0.6881, "step": 11998 }, { "epoch": 0.3677516243717053, "grad_norm": 1.250074335441536, "learning_rate": 7.2951888494961755e-06, "loss": 0.7207, "step": 11999 }, { "epoch": 0.3677822728944465, "grad_norm": 1.3011916020852736, "learning_rate": 7.294747901107235e-06, "loss": 0.7134, "step": 12000 }, { "epoch": 0.3678129214171877, "grad_norm": 1.1131011594567188, "learning_rate": 7.294306930107556e-06, "loss": 0.587, "step": 12001 }, { "epoch": 0.3678435699399289, "grad_norm": 1.1823265219238355, "learning_rate": 7.293865936501485e-06, "loss": 0.6428, "step": 12002 }, { "epoch": 0.3678742184626701, "grad_norm": 1.2276097427463135, "learning_rate": 7.293424920293366e-06, "loss": 0.7578, "step": 12003 }, { "epoch": 0.3679048669854113, "grad_norm": 1.2330346476565408, "learning_rate": 7.2929838814875434e-06, "loss": 0.6491, "step": 12004 }, { "epoch": 0.3679355155081525, "grad_norm": 0.5098717003644873, "learning_rate": 7.292542820088364e-06, "loss": 0.427, "step": 12005 }, { "epoch": 0.3679661640308937, "grad_norm": 1.2167157998018945, "learning_rate": 7.292101736100175e-06, "loss": 0.7183, "step": 12006 }, { "epoch": 0.3679968125536349, "grad_norm": 1.3135035568143665, "learning_rate": 7.291660629527321e-06, "loss": 0.6462, "step": 12007 }, { "epoch": 0.36802746107637613, "grad_norm": 1.2685087972337497, "learning_rate": 7.291219500374147e-06, "loss": 0.6577, "step": 12008 }, { "epoch": 0.36805810959911733, "grad_norm": 1.3363075312655037, "learning_rate": 7.2907783486450016e-06, "loss": 0.5889, "step": 12009 }, { "epoch": 0.36808875812185854, "grad_norm": 1.4232264096209561, "learning_rate": 7.290337174344231e-06, "loss": 0.7335, "step": 12010 }, { "epoch": 0.36811940664459974, "grad_norm": 0.45490867907630694, "learning_rate": 7.289895977476184e-06, "loss": 0.4447, "step": 12011 }, { "epoch": 0.36815005516734095, "grad_norm": 1.2290836100393314, "learning_rate": 7.289454758045203e-06, "loss": 0.708, "step": 12012 }, { "epoch": 0.36818070369008216, "grad_norm": 1.2053003265923796, "learning_rate": 7.289013516055639e-06, "loss": 0.7293, "step": 12013 }, { "epoch": 0.36821135221282336, "grad_norm": 1.1455190802835, "learning_rate": 7.288572251511842e-06, "loss": 0.6763, "step": 12014 }, { "epoch": 0.36824200073556457, "grad_norm": 1.10654420873287, "learning_rate": 7.2881309644181546e-06, "loss": 0.622, "step": 12015 }, { "epoch": 0.3682726492583058, "grad_norm": 0.47006980894766714, "learning_rate": 7.287689654778928e-06, "loss": 0.4381, "step": 12016 }, { "epoch": 0.368303297781047, "grad_norm": 1.3861528091590274, "learning_rate": 7.287248322598509e-06, "loss": 0.653, "step": 12017 }, { "epoch": 0.3683339463037882, "grad_norm": 1.1283852961799374, "learning_rate": 7.286806967881248e-06, "loss": 0.5901, "step": 12018 }, { "epoch": 0.36836459482652933, "grad_norm": 1.1831778930234687, "learning_rate": 7.286365590631492e-06, "loss": 0.7052, "step": 12019 }, { "epoch": 0.36839524334927054, "grad_norm": 1.3627765096628675, "learning_rate": 7.285924190853593e-06, "loss": 0.6415, "step": 12020 }, { "epoch": 0.36842589187201175, "grad_norm": 0.4739671264126191, "learning_rate": 7.285482768551897e-06, "loss": 0.4385, "step": 12021 }, { "epoch": 0.36845654039475295, "grad_norm": 1.200010771445854, "learning_rate": 7.285041323730754e-06, "loss": 0.6613, "step": 12022 }, { "epoch": 0.36848718891749416, "grad_norm": 1.177149377359286, "learning_rate": 7.284599856394513e-06, "loss": 0.6844, "step": 12023 }, { "epoch": 0.36851783744023536, "grad_norm": 1.1238490219746302, "learning_rate": 7.284158366547527e-06, "loss": 0.6074, "step": 12024 }, { "epoch": 0.36854848596297657, "grad_norm": 1.1124119656660219, "learning_rate": 7.283716854194144e-06, "loss": 0.5595, "step": 12025 }, { "epoch": 0.3685791344857178, "grad_norm": 1.116028275915779, "learning_rate": 7.283275319338714e-06, "loss": 0.6063, "step": 12026 }, { "epoch": 0.368609783008459, "grad_norm": 1.3999430727096338, "learning_rate": 7.282833761985588e-06, "loss": 0.5806, "step": 12027 }, { "epoch": 0.3686404315312002, "grad_norm": 1.2863129055647877, "learning_rate": 7.282392182139117e-06, "loss": 0.5817, "step": 12028 }, { "epoch": 0.3686710800539414, "grad_norm": 1.3058875545995776, "learning_rate": 7.2819505798036525e-06, "loss": 0.667, "step": 12029 }, { "epoch": 0.3687017285766826, "grad_norm": 1.3232063763353137, "learning_rate": 7.281508954983544e-06, "loss": 0.6832, "step": 12030 }, { "epoch": 0.3687323770994238, "grad_norm": 1.3866982101193204, "learning_rate": 7.281067307683144e-06, "loss": 0.7114, "step": 12031 }, { "epoch": 0.368763025622165, "grad_norm": 1.1316969940525288, "learning_rate": 7.280625637906804e-06, "loss": 0.6926, "step": 12032 }, { "epoch": 0.3687936741449062, "grad_norm": 1.4031998800148604, "learning_rate": 7.280183945658879e-06, "loss": 0.6471, "step": 12033 }, { "epoch": 0.3688243226676474, "grad_norm": 0.47460147996220614, "learning_rate": 7.279742230943714e-06, "loss": 0.4209, "step": 12034 }, { "epoch": 0.3688549711903886, "grad_norm": 0.49247371967968473, "learning_rate": 7.2793004937656686e-06, "loss": 0.4479, "step": 12035 }, { "epoch": 0.36888561971312983, "grad_norm": 0.4733732677426702, "learning_rate": 7.27885873412909e-06, "loss": 0.4523, "step": 12036 }, { "epoch": 0.36891626823587104, "grad_norm": 1.2399712416201694, "learning_rate": 7.278416952038335e-06, "loss": 0.6818, "step": 12037 }, { "epoch": 0.36894691675861224, "grad_norm": 1.2372657261324562, "learning_rate": 7.277975147497753e-06, "loss": 0.6178, "step": 12038 }, { "epoch": 0.36897756528135345, "grad_norm": 0.45917189851986256, "learning_rate": 7.277533320511702e-06, "loss": 0.4416, "step": 12039 }, { "epoch": 0.36900821380409465, "grad_norm": 1.3489832972543312, "learning_rate": 7.27709147108453e-06, "loss": 0.7907, "step": 12040 }, { "epoch": 0.36903886232683586, "grad_norm": 1.2280406779894355, "learning_rate": 7.276649599220594e-06, "loss": 0.6758, "step": 12041 }, { "epoch": 0.36906951084957706, "grad_norm": 1.224597909115962, "learning_rate": 7.276207704924247e-06, "loss": 0.664, "step": 12042 }, { "epoch": 0.36910015937231827, "grad_norm": 1.350982297639562, "learning_rate": 7.275765788199842e-06, "loss": 0.6057, "step": 12043 }, { "epoch": 0.3691308078950595, "grad_norm": 1.2914592546675825, "learning_rate": 7.275323849051734e-06, "loss": 0.7262, "step": 12044 }, { "epoch": 0.3691614564178007, "grad_norm": 1.183212340321854, "learning_rate": 7.27488188748428e-06, "loss": 0.6571, "step": 12045 }, { "epoch": 0.3691921049405419, "grad_norm": 1.8586950877239021, "learning_rate": 7.274439903501832e-06, "loss": 0.7092, "step": 12046 }, { "epoch": 0.3692227534632831, "grad_norm": 1.2928843187036632, "learning_rate": 7.273997897108744e-06, "loss": 0.6595, "step": 12047 }, { "epoch": 0.3692534019860243, "grad_norm": 1.1675744033345856, "learning_rate": 7.273555868309377e-06, "loss": 0.6856, "step": 12048 }, { "epoch": 0.3692840505087655, "grad_norm": 1.3632230715817157, "learning_rate": 7.273113817108078e-06, "loss": 0.6577, "step": 12049 }, { "epoch": 0.36931469903150665, "grad_norm": 1.1792684363954662, "learning_rate": 7.2726717435092095e-06, "loss": 0.5935, "step": 12050 }, { "epoch": 0.36934534755424786, "grad_norm": 1.0496267648475468, "learning_rate": 7.272229647517124e-06, "loss": 0.7453, "step": 12051 }, { "epoch": 0.36937599607698907, "grad_norm": 1.217388267666115, "learning_rate": 7.2717875291361796e-06, "loss": 0.6068, "step": 12052 }, { "epoch": 0.36940664459973027, "grad_norm": 1.173957739508731, "learning_rate": 7.2713453883707294e-06, "loss": 0.7155, "step": 12053 }, { "epoch": 0.3694372931224715, "grad_norm": 1.339979542418999, "learning_rate": 7.270903225225132e-06, "loss": 0.6888, "step": 12054 }, { "epoch": 0.3694679416452127, "grad_norm": 1.2709384849677907, "learning_rate": 7.2704610397037445e-06, "loss": 0.5692, "step": 12055 }, { "epoch": 0.3694985901679539, "grad_norm": 1.3470290774015774, "learning_rate": 7.270018831810924e-06, "loss": 0.8345, "step": 12056 }, { "epoch": 0.3695292386906951, "grad_norm": 1.2270674427871224, "learning_rate": 7.269576601551027e-06, "loss": 0.6687, "step": 12057 }, { "epoch": 0.3695598872134363, "grad_norm": 1.115060987927902, "learning_rate": 7.269134348928411e-06, "loss": 0.6151, "step": 12058 }, { "epoch": 0.3695905357361775, "grad_norm": 1.095317052864844, "learning_rate": 7.268692073947434e-06, "loss": 0.6398, "step": 12059 }, { "epoch": 0.3696211842589187, "grad_norm": 1.1555648922633979, "learning_rate": 7.268249776612453e-06, "loss": 0.6531, "step": 12060 }, { "epoch": 0.3696518327816599, "grad_norm": 1.2769999054508945, "learning_rate": 7.267807456927828e-06, "loss": 0.6386, "step": 12061 }, { "epoch": 0.3696824813044011, "grad_norm": 0.621884689357595, "learning_rate": 7.267365114897914e-06, "loss": 0.4521, "step": 12062 }, { "epoch": 0.36971312982714233, "grad_norm": 0.5567796406074914, "learning_rate": 7.266922750527073e-06, "loss": 0.4531, "step": 12063 }, { "epoch": 0.36974377834988353, "grad_norm": 1.232690814316755, "learning_rate": 7.26648036381966e-06, "loss": 0.616, "step": 12064 }, { "epoch": 0.36977442687262474, "grad_norm": 0.4355322562698701, "learning_rate": 7.266037954780038e-06, "loss": 0.4268, "step": 12065 }, { "epoch": 0.36980507539536595, "grad_norm": 1.2834395207209919, "learning_rate": 7.265595523412563e-06, "loss": 0.6682, "step": 12066 }, { "epoch": 0.36983572391810715, "grad_norm": 1.1429475511152376, "learning_rate": 7.265153069721597e-06, "loss": 0.7297, "step": 12067 }, { "epoch": 0.36986637244084836, "grad_norm": 1.298736763398994, "learning_rate": 7.264710593711497e-06, "loss": 0.6844, "step": 12068 }, { "epoch": 0.36989702096358956, "grad_norm": 1.2924506518010859, "learning_rate": 7.264268095386625e-06, "loss": 0.6297, "step": 12069 }, { "epoch": 0.36992766948633077, "grad_norm": 1.3118979215835012, "learning_rate": 7.263825574751339e-06, "loss": 0.654, "step": 12070 }, { "epoch": 0.369958318009072, "grad_norm": 0.5749189294594824, "learning_rate": 7.26338303181e-06, "loss": 0.4105, "step": 12071 }, { "epoch": 0.3699889665318132, "grad_norm": 1.3766349331283818, "learning_rate": 7.262940466566971e-06, "loss": 0.6582, "step": 12072 }, { "epoch": 0.3700196150545544, "grad_norm": 1.3873985596864147, "learning_rate": 7.262497879026609e-06, "loss": 0.7427, "step": 12073 }, { "epoch": 0.3700502635772956, "grad_norm": 1.2781186521453922, "learning_rate": 7.2620552691932766e-06, "loss": 0.7209, "step": 12074 }, { "epoch": 0.3700809121000368, "grad_norm": 0.5333981608391171, "learning_rate": 7.2616126370713355e-06, "loss": 0.4288, "step": 12075 }, { "epoch": 0.370111560622778, "grad_norm": 0.4620228238054446, "learning_rate": 7.261169982665146e-06, "loss": 0.4123, "step": 12076 }, { "epoch": 0.3701422091455192, "grad_norm": 0.453742355832109, "learning_rate": 7.26072730597907e-06, "loss": 0.4163, "step": 12077 }, { "epoch": 0.3701728576682604, "grad_norm": 1.350118662414608, "learning_rate": 7.26028460701747e-06, "loss": 0.6927, "step": 12078 }, { "epoch": 0.3702035061910016, "grad_norm": 0.4888741004696426, "learning_rate": 7.259841885784707e-06, "loss": 0.4308, "step": 12079 }, { "epoch": 0.3702341547137428, "grad_norm": 1.1879256716234805, "learning_rate": 7.259399142285145e-06, "loss": 0.6658, "step": 12080 }, { "epoch": 0.370264803236484, "grad_norm": 1.3446684076093336, "learning_rate": 7.258956376523143e-06, "loss": 0.6883, "step": 12081 }, { "epoch": 0.3702954517592252, "grad_norm": 1.1014826893636114, "learning_rate": 7.258513588503067e-06, "loss": 0.6621, "step": 12082 }, { "epoch": 0.3703261002819664, "grad_norm": 1.2322788333594297, "learning_rate": 7.258070778229279e-06, "loss": 0.6672, "step": 12083 }, { "epoch": 0.3703567488047076, "grad_norm": 1.2980713376597024, "learning_rate": 7.257627945706141e-06, "loss": 0.7048, "step": 12084 }, { "epoch": 0.3703873973274488, "grad_norm": 1.2439075100908923, "learning_rate": 7.257185090938017e-06, "loss": 0.7208, "step": 12085 }, { "epoch": 0.37041804585019, "grad_norm": 1.353625672576473, "learning_rate": 7.2567422139292706e-06, "loss": 0.7076, "step": 12086 }, { "epoch": 0.3704486943729312, "grad_norm": 0.6013652272139565, "learning_rate": 7.256299314684269e-06, "loss": 0.4429, "step": 12087 }, { "epoch": 0.3704793428956724, "grad_norm": 1.3509466115081346, "learning_rate": 7.25585639320737e-06, "loss": 0.6682, "step": 12088 }, { "epoch": 0.3705099914184136, "grad_norm": 1.3988838325122448, "learning_rate": 7.255413449502942e-06, "loss": 0.6439, "step": 12089 }, { "epoch": 0.3705406399411548, "grad_norm": 1.196560509009115, "learning_rate": 7.254970483575345e-06, "loss": 0.6036, "step": 12090 }, { "epoch": 0.37057128846389603, "grad_norm": 0.5144273113220895, "learning_rate": 7.254527495428951e-06, "loss": 0.4319, "step": 12091 }, { "epoch": 0.37060193698663724, "grad_norm": 1.5385116522218618, "learning_rate": 7.254084485068119e-06, "loss": 0.7833, "step": 12092 }, { "epoch": 0.37063258550937844, "grad_norm": 1.4078094812885618, "learning_rate": 7.2536414524972154e-06, "loss": 0.7282, "step": 12093 }, { "epoch": 0.37066323403211965, "grad_norm": 1.184781337056121, "learning_rate": 7.253198397720607e-06, "loss": 0.7437, "step": 12094 }, { "epoch": 0.37069388255486085, "grad_norm": 0.4829194688017713, "learning_rate": 7.252755320742658e-06, "loss": 0.4131, "step": 12095 }, { "epoch": 0.37072453107760206, "grad_norm": 1.445404091944272, "learning_rate": 7.252312221567734e-06, "loss": 0.7079, "step": 12096 }, { "epoch": 0.37075517960034327, "grad_norm": 1.4075989827967887, "learning_rate": 7.2518691002002014e-06, "loss": 0.6726, "step": 12097 }, { "epoch": 0.37078582812308447, "grad_norm": 1.2462779650319828, "learning_rate": 7.251425956644426e-06, "loss": 0.5867, "step": 12098 }, { "epoch": 0.3708164766458257, "grad_norm": 0.5088919867788406, "learning_rate": 7.250982790904776e-06, "loss": 0.4173, "step": 12099 }, { "epoch": 0.3708471251685669, "grad_norm": 1.3607102814933243, "learning_rate": 7.250539602985616e-06, "loss": 0.7694, "step": 12100 }, { "epoch": 0.3708777736913081, "grad_norm": 0.5033645419597359, "learning_rate": 7.250096392891312e-06, "loss": 0.4321, "step": 12101 }, { "epoch": 0.3709084222140493, "grad_norm": 1.3088369228363548, "learning_rate": 7.249653160626236e-06, "loss": 0.6454, "step": 12102 }, { "epoch": 0.3709390707367905, "grad_norm": 1.2034715343881743, "learning_rate": 7.249209906194748e-06, "loss": 0.5833, "step": 12103 }, { "epoch": 0.3709697192595317, "grad_norm": 1.3797617043481243, "learning_rate": 7.248766629601221e-06, "loss": 0.5527, "step": 12104 }, { "epoch": 0.3710003677822729, "grad_norm": 1.3833803208623023, "learning_rate": 7.2483233308500215e-06, "loss": 0.6721, "step": 12105 }, { "epoch": 0.3710310163050141, "grad_norm": 1.306952036434484, "learning_rate": 7.247880009945517e-06, "loss": 0.5552, "step": 12106 }, { "epoch": 0.3710616648277553, "grad_norm": 1.340541212963105, "learning_rate": 7.247436666892075e-06, "loss": 0.6604, "step": 12107 }, { "epoch": 0.3710923133504965, "grad_norm": 1.1365964623543103, "learning_rate": 7.246993301694064e-06, "loss": 0.7051, "step": 12108 }, { "epoch": 0.37112296187323773, "grad_norm": 1.223348251257313, "learning_rate": 7.246549914355853e-06, "loss": 0.6826, "step": 12109 }, { "epoch": 0.37115361039597894, "grad_norm": 1.2851615729459582, "learning_rate": 7.246106504881811e-06, "loss": 0.6028, "step": 12110 }, { "epoch": 0.37118425891872014, "grad_norm": 1.22896321179327, "learning_rate": 7.245663073276309e-06, "loss": 0.677, "step": 12111 }, { "epoch": 0.3712149074414613, "grad_norm": 1.4774908897792087, "learning_rate": 7.245219619543712e-06, "loss": 0.5785, "step": 12112 }, { "epoch": 0.3712455559642025, "grad_norm": 1.3130056347222858, "learning_rate": 7.244776143688392e-06, "loss": 0.6713, "step": 12113 }, { "epoch": 0.3712762044869437, "grad_norm": 1.0484801939907546, "learning_rate": 7.244332645714719e-06, "loss": 0.5556, "step": 12114 }, { "epoch": 0.3713068530096849, "grad_norm": 1.1475847691773675, "learning_rate": 7.2438891256270615e-06, "loss": 0.6355, "step": 12115 }, { "epoch": 0.3713375015324261, "grad_norm": 1.1720910258770063, "learning_rate": 7.24344558342979e-06, "loss": 0.7434, "step": 12116 }, { "epoch": 0.3713681500551673, "grad_norm": 1.2863686969237758, "learning_rate": 7.243002019127277e-06, "loss": 0.7077, "step": 12117 }, { "epoch": 0.37139879857790853, "grad_norm": 1.1907848281864861, "learning_rate": 7.2425584327238895e-06, "loss": 0.5558, "step": 12118 }, { "epoch": 0.37142944710064973, "grad_norm": 1.2186380841268887, "learning_rate": 7.2421148242240005e-06, "loss": 0.6155, "step": 12119 }, { "epoch": 0.37146009562339094, "grad_norm": 1.3266603658590144, "learning_rate": 7.241671193631979e-06, "loss": 0.6854, "step": 12120 }, { "epoch": 0.37149074414613215, "grad_norm": 1.3170359949128478, "learning_rate": 7.2412275409522e-06, "loss": 0.7205, "step": 12121 }, { "epoch": 0.37152139266887335, "grad_norm": 1.3870747312847875, "learning_rate": 7.240783866189031e-06, "loss": 0.6794, "step": 12122 }, { "epoch": 0.37155204119161456, "grad_norm": 1.2774257250038903, "learning_rate": 7.240340169346847e-06, "loss": 0.5234, "step": 12123 }, { "epoch": 0.37158268971435576, "grad_norm": 1.1379476371430883, "learning_rate": 7.239896450430016e-06, "loss": 0.6291, "step": 12124 }, { "epoch": 0.37161333823709697, "grad_norm": 1.240869218259922, "learning_rate": 7.239452709442914e-06, "loss": 0.7221, "step": 12125 }, { "epoch": 0.3716439867598382, "grad_norm": 1.3752630559296148, "learning_rate": 7.23900894638991e-06, "loss": 0.6815, "step": 12126 }, { "epoch": 0.3716746352825794, "grad_norm": 1.2541359828012013, "learning_rate": 7.238565161275379e-06, "loss": 0.7172, "step": 12127 }, { "epoch": 0.3717052838053206, "grad_norm": 0.6707362231103955, "learning_rate": 7.2381213541036925e-06, "loss": 0.4323, "step": 12128 }, { "epoch": 0.3717359323280618, "grad_norm": 1.204300851817911, "learning_rate": 7.237677524879223e-06, "loss": 0.6722, "step": 12129 }, { "epoch": 0.371766580850803, "grad_norm": 1.292298533320278, "learning_rate": 7.2372336736063456e-06, "loss": 0.6382, "step": 12130 }, { "epoch": 0.3717972293735442, "grad_norm": 1.3107122850789588, "learning_rate": 7.23678980028943e-06, "loss": 0.6114, "step": 12131 }, { "epoch": 0.3718278778962854, "grad_norm": 1.144899953387902, "learning_rate": 7.2363459049328545e-06, "loss": 0.6664, "step": 12132 }, { "epoch": 0.3718585264190266, "grad_norm": 1.2919319180869222, "learning_rate": 7.23590198754099e-06, "loss": 0.7206, "step": 12133 }, { "epoch": 0.3718891749417678, "grad_norm": 1.2156782984867516, "learning_rate": 7.235458048118211e-06, "loss": 0.7321, "step": 12134 }, { "epoch": 0.371919823464509, "grad_norm": 1.1415889541086859, "learning_rate": 7.235014086668892e-06, "loss": 0.642, "step": 12135 }, { "epoch": 0.37195047198725023, "grad_norm": 1.3129756744001755, "learning_rate": 7.234570103197407e-06, "loss": 0.7273, "step": 12136 }, { "epoch": 0.37198112050999144, "grad_norm": 1.3198844923545254, "learning_rate": 7.2341260977081314e-06, "loss": 0.5926, "step": 12137 }, { "epoch": 0.37201176903273264, "grad_norm": 1.2316794295810334, "learning_rate": 7.233682070205439e-06, "loss": 0.6613, "step": 12138 }, { "epoch": 0.37204241755547385, "grad_norm": 1.1955208390442131, "learning_rate": 7.2332380206937055e-06, "loss": 0.636, "step": 12139 }, { "epoch": 0.37207306607821505, "grad_norm": 0.5314539602220216, "learning_rate": 7.232793949177308e-06, "loss": 0.448, "step": 12140 }, { "epoch": 0.37210371460095626, "grad_norm": 0.5190944331291905, "learning_rate": 7.23234985566062e-06, "loss": 0.4403, "step": 12141 }, { "epoch": 0.37213436312369746, "grad_norm": 1.3916138280851844, "learning_rate": 7.231905740148017e-06, "loss": 0.6037, "step": 12142 }, { "epoch": 0.3721650116464386, "grad_norm": 1.2821518946343387, "learning_rate": 7.231461602643876e-06, "loss": 0.6452, "step": 12143 }, { "epoch": 0.3721956601691798, "grad_norm": 1.2001131215625618, "learning_rate": 7.2310174431525715e-06, "loss": 0.7197, "step": 12144 }, { "epoch": 0.372226308691921, "grad_norm": 1.3791784975557373, "learning_rate": 7.230573261678484e-06, "loss": 0.7268, "step": 12145 }, { "epoch": 0.37225695721466223, "grad_norm": 1.305554838116953, "learning_rate": 7.230129058225986e-06, "loss": 0.6443, "step": 12146 }, { "epoch": 0.37228760573740344, "grad_norm": 1.2433129208929818, "learning_rate": 7.229684832799455e-06, "loss": 0.8092, "step": 12147 }, { "epoch": 0.37231825426014464, "grad_norm": 1.3788558067877434, "learning_rate": 7.22924058540327e-06, "loss": 0.6444, "step": 12148 }, { "epoch": 0.37234890278288585, "grad_norm": 0.5706998720950048, "learning_rate": 7.228796316041807e-06, "loss": 0.4413, "step": 12149 }, { "epoch": 0.37237955130562705, "grad_norm": 1.2631583489221825, "learning_rate": 7.228352024719442e-06, "loss": 0.6118, "step": 12150 }, { "epoch": 0.37241019982836826, "grad_norm": 1.189203430557472, "learning_rate": 7.2279077114405575e-06, "loss": 0.6421, "step": 12151 }, { "epoch": 0.37244084835110947, "grad_norm": 1.1443269588813674, "learning_rate": 7.227463376209527e-06, "loss": 0.6819, "step": 12152 }, { "epoch": 0.37247149687385067, "grad_norm": 1.0977274130208459, "learning_rate": 7.227019019030729e-06, "loss": 0.6328, "step": 12153 }, { "epoch": 0.3725021453965919, "grad_norm": 1.304367275459591, "learning_rate": 7.226574639908543e-06, "loss": 0.7602, "step": 12154 }, { "epoch": 0.3725327939193331, "grad_norm": 1.193690879447642, "learning_rate": 7.226130238847347e-06, "loss": 0.6783, "step": 12155 }, { "epoch": 0.3725634424420743, "grad_norm": 1.2125901037116897, "learning_rate": 7.225685815851522e-06, "loss": 0.6606, "step": 12156 }, { "epoch": 0.3725940909648155, "grad_norm": 1.3755590145204706, "learning_rate": 7.225241370925444e-06, "loss": 0.6774, "step": 12157 }, { "epoch": 0.3726247394875567, "grad_norm": 1.3145275909949332, "learning_rate": 7.224796904073493e-06, "loss": 0.578, "step": 12158 }, { "epoch": 0.3726553880102979, "grad_norm": 0.5117833564516056, "learning_rate": 7.224352415300049e-06, "loss": 0.4569, "step": 12159 }, { "epoch": 0.3726860365330391, "grad_norm": 1.1878479600165874, "learning_rate": 7.223907904609493e-06, "loss": 0.5785, "step": 12160 }, { "epoch": 0.3727166850557803, "grad_norm": 1.3332840898963019, "learning_rate": 7.223463372006202e-06, "loss": 0.7506, "step": 12161 }, { "epoch": 0.3727473335785215, "grad_norm": 1.2113710898456602, "learning_rate": 7.223018817494558e-06, "loss": 0.6606, "step": 12162 }, { "epoch": 0.37277798210126273, "grad_norm": 1.3218725844921784, "learning_rate": 7.222574241078939e-06, "loss": 0.7235, "step": 12163 }, { "epoch": 0.37280863062400393, "grad_norm": 1.418311273612794, "learning_rate": 7.22212964276373e-06, "loss": 0.7056, "step": 12164 }, { "epoch": 0.37283927914674514, "grad_norm": 1.2727217981764172, "learning_rate": 7.221685022553309e-06, "loss": 0.6482, "step": 12165 }, { "epoch": 0.37286992766948635, "grad_norm": 1.2741342467122065, "learning_rate": 7.221240380452055e-06, "loss": 0.6001, "step": 12166 }, { "epoch": 0.37290057619222755, "grad_norm": 0.4754184817661771, "learning_rate": 7.220795716464352e-06, "loss": 0.4318, "step": 12167 }, { "epoch": 0.37293122471496876, "grad_norm": 0.4371706970170424, "learning_rate": 7.2203510305945815e-06, "loss": 0.4107, "step": 12168 }, { "epoch": 0.37296187323770996, "grad_norm": 1.2137468628187498, "learning_rate": 7.2199063228471235e-06, "loss": 0.6401, "step": 12169 }, { "epoch": 0.37299252176045117, "grad_norm": 1.2958356629500998, "learning_rate": 7.21946159322636e-06, "loss": 0.7074, "step": 12170 }, { "epoch": 0.3730231702831924, "grad_norm": 1.3258921777238528, "learning_rate": 7.219016841736675e-06, "loss": 0.6644, "step": 12171 }, { "epoch": 0.3730538188059336, "grad_norm": 1.1759406160915429, "learning_rate": 7.218572068382448e-06, "loss": 0.6993, "step": 12172 }, { "epoch": 0.3730844673286748, "grad_norm": 1.3061556962125305, "learning_rate": 7.218127273168063e-06, "loss": 0.6352, "step": 12173 }, { "epoch": 0.37311511585141593, "grad_norm": 1.1014954264011327, "learning_rate": 7.217682456097902e-06, "loss": 0.5712, "step": 12174 }, { "epoch": 0.37314576437415714, "grad_norm": 1.290157510575249, "learning_rate": 7.21723761717635e-06, "loss": 0.6612, "step": 12175 }, { "epoch": 0.37317641289689835, "grad_norm": 1.1710016450856526, "learning_rate": 7.216792756407787e-06, "loss": 0.5688, "step": 12176 }, { "epoch": 0.37320706141963955, "grad_norm": 1.2079453126500432, "learning_rate": 7.216347873796598e-06, "loss": 0.6801, "step": 12177 }, { "epoch": 0.37323770994238076, "grad_norm": 1.3730075558538823, "learning_rate": 7.215902969347166e-06, "loss": 0.6743, "step": 12178 }, { "epoch": 0.37326835846512196, "grad_norm": 1.2475083362102062, "learning_rate": 7.215458043063877e-06, "loss": 0.6221, "step": 12179 }, { "epoch": 0.37329900698786317, "grad_norm": 1.136924351241869, "learning_rate": 7.215013094951111e-06, "loss": 0.6849, "step": 12180 }, { "epoch": 0.3733296555106044, "grad_norm": 1.3846019600009676, "learning_rate": 7.214568125013254e-06, "loss": 0.7608, "step": 12181 }, { "epoch": 0.3733603040333456, "grad_norm": 1.1888205978790796, "learning_rate": 7.214123133254691e-06, "loss": 0.7, "step": 12182 }, { "epoch": 0.3733909525560868, "grad_norm": 1.3279211362818595, "learning_rate": 7.2136781196798075e-06, "loss": 0.7249, "step": 12183 }, { "epoch": 0.373421601078828, "grad_norm": 1.1466384523859832, "learning_rate": 7.213233084292986e-06, "loss": 0.6422, "step": 12184 }, { "epoch": 0.3734522496015692, "grad_norm": 1.308100422125347, "learning_rate": 7.212788027098613e-06, "loss": 0.6732, "step": 12185 }, { "epoch": 0.3734828981243104, "grad_norm": 1.267602600472686, "learning_rate": 7.212342948101075e-06, "loss": 0.6348, "step": 12186 }, { "epoch": 0.3735135466470516, "grad_norm": 1.3210428870398652, "learning_rate": 7.211897847304753e-06, "loss": 0.6685, "step": 12187 }, { "epoch": 0.3735441951697928, "grad_norm": 1.4353432453062442, "learning_rate": 7.211452724714037e-06, "loss": 0.7355, "step": 12188 }, { "epoch": 0.373574843692534, "grad_norm": 0.4842358118846694, "learning_rate": 7.211007580333311e-06, "loss": 0.4179, "step": 12189 }, { "epoch": 0.3736054922152752, "grad_norm": 1.2113352943601359, "learning_rate": 7.2105624141669615e-06, "loss": 0.6885, "step": 12190 }, { "epoch": 0.37363614073801643, "grad_norm": 0.5014792071571832, "learning_rate": 7.210117226219377e-06, "loss": 0.4476, "step": 12191 }, { "epoch": 0.37366678926075764, "grad_norm": 1.2111044397104147, "learning_rate": 7.20967201649494e-06, "loss": 0.6877, "step": 12192 }, { "epoch": 0.37369743778349884, "grad_norm": 0.45235828426102115, "learning_rate": 7.209226784998039e-06, "loss": 0.4138, "step": 12193 }, { "epoch": 0.37372808630624005, "grad_norm": 1.2054163142314789, "learning_rate": 7.2087815317330625e-06, "loss": 0.6552, "step": 12194 }, { "epoch": 0.37375873482898125, "grad_norm": 0.4717989265215923, "learning_rate": 7.2083362567043955e-06, "loss": 0.4586, "step": 12195 }, { "epoch": 0.37378938335172246, "grad_norm": 0.4576074940811604, "learning_rate": 7.207890959916426e-06, "loss": 0.4217, "step": 12196 }, { "epoch": 0.37382003187446367, "grad_norm": 1.0715902412743794, "learning_rate": 7.207445641373543e-06, "loss": 0.6763, "step": 12197 }, { "epoch": 0.37385068039720487, "grad_norm": 1.3028379460503352, "learning_rate": 7.207000301080132e-06, "loss": 0.6941, "step": 12198 }, { "epoch": 0.3738813289199461, "grad_norm": 1.2812687838526486, "learning_rate": 7.206554939040585e-06, "loss": 0.6119, "step": 12199 }, { "epoch": 0.3739119774426873, "grad_norm": 0.45365480194364205, "learning_rate": 7.206109555259284e-06, "loss": 0.4182, "step": 12200 }, { "epoch": 0.3739426259654285, "grad_norm": 1.1898203889649066, "learning_rate": 7.205664149740623e-06, "loss": 0.681, "step": 12201 }, { "epoch": 0.3739732744881697, "grad_norm": 1.2955804564073998, "learning_rate": 7.205218722488989e-06, "loss": 0.727, "step": 12202 }, { "epoch": 0.3740039230109109, "grad_norm": 1.172495499704105, "learning_rate": 7.204773273508772e-06, "loss": 0.645, "step": 12203 }, { "epoch": 0.3740345715336521, "grad_norm": 1.107654769304749, "learning_rate": 7.2043278028043565e-06, "loss": 0.6175, "step": 12204 }, { "epoch": 0.37406522005639326, "grad_norm": 1.27272754690063, "learning_rate": 7.203882310380137e-06, "loss": 0.7053, "step": 12205 }, { "epoch": 0.37409586857913446, "grad_norm": 1.099981361923708, "learning_rate": 7.203436796240502e-06, "loss": 0.6335, "step": 12206 }, { "epoch": 0.37412651710187567, "grad_norm": 0.9727029406529857, "learning_rate": 7.202991260389839e-06, "loss": 0.592, "step": 12207 }, { "epoch": 0.37415716562461687, "grad_norm": 1.4150678952308644, "learning_rate": 7.202545702832539e-06, "loss": 0.6637, "step": 12208 }, { "epoch": 0.3741878141473581, "grad_norm": 1.2332191048762275, "learning_rate": 7.202100123572994e-06, "loss": 0.5494, "step": 12209 }, { "epoch": 0.3742184626700993, "grad_norm": 1.35959960795014, "learning_rate": 7.201654522615593e-06, "loss": 0.5877, "step": 12210 }, { "epoch": 0.3742491111928405, "grad_norm": 1.1930133214396519, "learning_rate": 7.2012088999647264e-06, "loss": 0.6042, "step": 12211 }, { "epoch": 0.3742797597155817, "grad_norm": 1.4669233395912886, "learning_rate": 7.200763255624785e-06, "loss": 0.6634, "step": 12212 }, { "epoch": 0.3743104082383229, "grad_norm": 1.2957116945151528, "learning_rate": 7.200317589600161e-06, "loss": 0.6687, "step": 12213 }, { "epoch": 0.3743410567610641, "grad_norm": 1.2656177641494089, "learning_rate": 7.199871901895244e-06, "loss": 0.5836, "step": 12214 }, { "epoch": 0.3743717052838053, "grad_norm": 1.2817129751445584, "learning_rate": 7.199426192514427e-06, "loss": 0.6867, "step": 12215 }, { "epoch": 0.3744023538065465, "grad_norm": 1.2254572528191323, "learning_rate": 7.198980461462101e-06, "loss": 0.7223, "step": 12216 }, { "epoch": 0.3744330023292877, "grad_norm": 1.3249432133212877, "learning_rate": 7.198534708742656e-06, "loss": 0.598, "step": 12217 }, { "epoch": 0.37446365085202893, "grad_norm": 1.1607105330492833, "learning_rate": 7.198088934360488e-06, "loss": 0.764, "step": 12218 }, { "epoch": 0.37449429937477013, "grad_norm": 1.2721647193848908, "learning_rate": 7.197643138319988e-06, "loss": 0.7224, "step": 12219 }, { "epoch": 0.37452494789751134, "grad_norm": 1.3787713667101744, "learning_rate": 7.197197320625546e-06, "loss": 0.7189, "step": 12220 }, { "epoch": 0.37455559642025255, "grad_norm": 1.169195650361689, "learning_rate": 7.196751481281556e-06, "loss": 0.6148, "step": 12221 }, { "epoch": 0.37458624494299375, "grad_norm": 1.209358934378481, "learning_rate": 7.196305620292413e-06, "loss": 0.6558, "step": 12222 }, { "epoch": 0.37461689346573496, "grad_norm": 1.3373003417800935, "learning_rate": 7.195859737662509e-06, "loss": 0.7268, "step": 12223 }, { "epoch": 0.37464754198847616, "grad_norm": 1.1727320467136533, "learning_rate": 7.195413833396236e-06, "loss": 0.7033, "step": 12224 }, { "epoch": 0.37467819051121737, "grad_norm": 1.4016214547962522, "learning_rate": 7.19496790749799e-06, "loss": 0.6789, "step": 12225 }, { "epoch": 0.3747088390339586, "grad_norm": 1.2112899793771208, "learning_rate": 7.194521959972163e-06, "loss": 0.5973, "step": 12226 }, { "epoch": 0.3747394875566998, "grad_norm": 1.4307313226267573, "learning_rate": 7.194075990823151e-06, "loss": 0.6752, "step": 12227 }, { "epoch": 0.374770136079441, "grad_norm": 1.1236374696079359, "learning_rate": 7.193630000055344e-06, "loss": 0.6, "step": 12228 }, { "epoch": 0.3748007846021822, "grad_norm": 1.2406310909292475, "learning_rate": 7.193183987673143e-06, "loss": 0.6835, "step": 12229 }, { "epoch": 0.3748314331249234, "grad_norm": 1.3552952586506417, "learning_rate": 7.192737953680936e-06, "loss": 0.748, "step": 12230 }, { "epoch": 0.3748620816476646, "grad_norm": 1.2061204236178167, "learning_rate": 7.192291898083122e-06, "loss": 0.6836, "step": 12231 }, { "epoch": 0.3748927301704058, "grad_norm": 1.440654967053857, "learning_rate": 7.191845820884093e-06, "loss": 0.6077, "step": 12232 }, { "epoch": 0.374923378693147, "grad_norm": 1.2766015321573572, "learning_rate": 7.191399722088249e-06, "loss": 0.6398, "step": 12233 }, { "epoch": 0.3749540272158882, "grad_norm": 1.1514883861163168, "learning_rate": 7.190953601699983e-06, "loss": 0.5327, "step": 12234 }, { "epoch": 0.3749846757386294, "grad_norm": 1.2227717125116633, "learning_rate": 7.190507459723689e-06, "loss": 0.6837, "step": 12235 }, { "epoch": 0.3750153242613706, "grad_norm": 1.175348826230099, "learning_rate": 7.190061296163765e-06, "loss": 0.7082, "step": 12236 }, { "epoch": 0.3750459727841118, "grad_norm": 1.1742416480019153, "learning_rate": 7.189615111024608e-06, "loss": 0.6513, "step": 12237 }, { "epoch": 0.375076621306853, "grad_norm": 1.2753205875327405, "learning_rate": 7.189168904310612e-06, "loss": 0.668, "step": 12238 }, { "epoch": 0.3751072698295942, "grad_norm": 1.2361052811452469, "learning_rate": 7.188722676026174e-06, "loss": 0.7035, "step": 12239 }, { "epoch": 0.3751379183523354, "grad_norm": 1.4756704604654212, "learning_rate": 7.1882764261756925e-06, "loss": 0.6885, "step": 12240 }, { "epoch": 0.3751685668750766, "grad_norm": 0.5286555279408576, "learning_rate": 7.187830154763563e-06, "loss": 0.4331, "step": 12241 }, { "epoch": 0.3751992153978178, "grad_norm": 1.3100138970156912, "learning_rate": 7.187383861794184e-06, "loss": 0.7288, "step": 12242 }, { "epoch": 0.375229863920559, "grad_norm": 0.5037698574862081, "learning_rate": 7.186937547271951e-06, "loss": 0.453, "step": 12243 }, { "epoch": 0.3752605124433002, "grad_norm": 1.4404403390756382, "learning_rate": 7.186491211201263e-06, "loss": 0.8131, "step": 12244 }, { "epoch": 0.3752911609660414, "grad_norm": 1.203261232510894, "learning_rate": 7.186044853586518e-06, "loss": 0.7028, "step": 12245 }, { "epoch": 0.37532180948878263, "grad_norm": 1.4516169714960647, "learning_rate": 7.1855984744321135e-06, "loss": 0.5912, "step": 12246 }, { "epoch": 0.37535245801152384, "grad_norm": 1.3024507901475693, "learning_rate": 7.185152073742448e-06, "loss": 0.7393, "step": 12247 }, { "epoch": 0.37538310653426504, "grad_norm": 1.8181992746161504, "learning_rate": 7.184705651521919e-06, "loss": 0.7013, "step": 12248 }, { "epoch": 0.37541375505700625, "grad_norm": 0.4594490072255798, "learning_rate": 7.184259207774928e-06, "loss": 0.436, "step": 12249 }, { "epoch": 0.37544440357974745, "grad_norm": 1.3287505750981525, "learning_rate": 7.183812742505871e-06, "loss": 0.6551, "step": 12250 }, { "epoch": 0.37547505210248866, "grad_norm": 1.2708051459006449, "learning_rate": 7.183366255719149e-06, "loss": 0.6687, "step": 12251 }, { "epoch": 0.37550570062522987, "grad_norm": 1.2127843157289935, "learning_rate": 7.182919747419161e-06, "loss": 0.6264, "step": 12252 }, { "epoch": 0.37553634914797107, "grad_norm": 1.470170762903622, "learning_rate": 7.182473217610306e-06, "loss": 0.6267, "step": 12253 }, { "epoch": 0.3755669976707123, "grad_norm": 1.233181465136501, "learning_rate": 7.182026666296983e-06, "loss": 0.6824, "step": 12254 }, { "epoch": 0.3755976461934535, "grad_norm": 1.2768389873647878, "learning_rate": 7.1815800934835945e-06, "loss": 0.6536, "step": 12255 }, { "epoch": 0.3756282947161947, "grad_norm": 1.2033286726945986, "learning_rate": 7.181133499174538e-06, "loss": 0.679, "step": 12256 }, { "epoch": 0.3756589432389359, "grad_norm": 1.3266621021603335, "learning_rate": 7.180686883374216e-06, "loss": 0.6234, "step": 12257 }, { "epoch": 0.3756895917616771, "grad_norm": 1.1141492026999187, "learning_rate": 7.180240246087027e-06, "loss": 0.6216, "step": 12258 }, { "epoch": 0.3757202402844183, "grad_norm": 1.1774651708516124, "learning_rate": 7.179793587317374e-06, "loss": 0.6597, "step": 12259 }, { "epoch": 0.3757508888071595, "grad_norm": 1.1788420154066073, "learning_rate": 7.179346907069657e-06, "loss": 0.7312, "step": 12260 }, { "epoch": 0.3757815373299007, "grad_norm": 1.2231378779347524, "learning_rate": 7.178900205348276e-06, "loss": 0.6816, "step": 12261 }, { "epoch": 0.3758121858526419, "grad_norm": 1.105942501748326, "learning_rate": 7.178453482157635e-06, "loss": 0.6187, "step": 12262 }, { "epoch": 0.37584283437538313, "grad_norm": 0.48635562234387925, "learning_rate": 7.178006737502135e-06, "loss": 0.4334, "step": 12263 }, { "epoch": 0.37587348289812433, "grad_norm": 1.266854091129213, "learning_rate": 7.177559971386175e-06, "loss": 0.679, "step": 12264 }, { "epoch": 0.37590413142086554, "grad_norm": 1.5184521498320185, "learning_rate": 7.177113183814162e-06, "loss": 0.6473, "step": 12265 }, { "epoch": 0.37593477994360674, "grad_norm": 0.4952946455099375, "learning_rate": 7.176666374790494e-06, "loss": 0.4368, "step": 12266 }, { "epoch": 0.3759654284663479, "grad_norm": 1.2532371376563547, "learning_rate": 7.176219544319576e-06, "loss": 0.6126, "step": 12267 }, { "epoch": 0.3759960769890891, "grad_norm": 1.3397369359374751, "learning_rate": 7.175772692405811e-06, "loss": 0.673, "step": 12268 }, { "epoch": 0.3760267255118303, "grad_norm": 1.3462213917891446, "learning_rate": 7.175325819053599e-06, "loss": 0.754, "step": 12269 }, { "epoch": 0.3760573740345715, "grad_norm": 1.273491729629337, "learning_rate": 7.174878924267346e-06, "loss": 0.5923, "step": 12270 }, { "epoch": 0.3760880225573127, "grad_norm": 1.2200273608039303, "learning_rate": 7.174432008051454e-06, "loss": 0.7358, "step": 12271 }, { "epoch": 0.3761186710800539, "grad_norm": 1.2827591826598552, "learning_rate": 7.1739850704103295e-06, "loss": 0.6301, "step": 12272 }, { "epoch": 0.37614931960279513, "grad_norm": 1.2657652169516043, "learning_rate": 7.17353811134837e-06, "loss": 0.7648, "step": 12273 }, { "epoch": 0.37617996812553633, "grad_norm": 1.0935250935761844, "learning_rate": 7.1730911308699865e-06, "loss": 0.6836, "step": 12274 }, { "epoch": 0.37621061664827754, "grad_norm": 1.17534835052329, "learning_rate": 7.172644128979578e-06, "loss": 0.6647, "step": 12275 }, { "epoch": 0.37624126517101875, "grad_norm": 1.332265809329491, "learning_rate": 7.172197105681553e-06, "loss": 0.6483, "step": 12276 }, { "epoch": 0.37627191369375995, "grad_norm": 1.3368286579065471, "learning_rate": 7.171750060980314e-06, "loss": 0.7063, "step": 12277 }, { "epoch": 0.37630256221650116, "grad_norm": 1.1470395092535675, "learning_rate": 7.171302994880264e-06, "loss": 0.597, "step": 12278 }, { "epoch": 0.37633321073924236, "grad_norm": 1.3816685384599559, "learning_rate": 7.170855907385812e-06, "loss": 0.6675, "step": 12279 }, { "epoch": 0.37636385926198357, "grad_norm": 1.6531681883386884, "learning_rate": 7.17040879850136e-06, "loss": 0.6491, "step": 12280 }, { "epoch": 0.3763945077847248, "grad_norm": 1.25317889522343, "learning_rate": 7.169961668231316e-06, "loss": 0.6451, "step": 12281 }, { "epoch": 0.376425156307466, "grad_norm": 1.10485265230241, "learning_rate": 7.169514516580083e-06, "loss": 0.5782, "step": 12282 }, { "epoch": 0.3764558048302072, "grad_norm": 0.5107392566491972, "learning_rate": 7.169067343552069e-06, "loss": 0.426, "step": 12283 }, { "epoch": 0.3764864533529484, "grad_norm": 1.198759286792061, "learning_rate": 7.1686201491516795e-06, "loss": 0.6675, "step": 12284 }, { "epoch": 0.3765171018756896, "grad_norm": 0.48615154448239944, "learning_rate": 7.168172933383322e-06, "loss": 0.4276, "step": 12285 }, { "epoch": 0.3765477503984308, "grad_norm": 1.3023156266655007, "learning_rate": 7.167725696251399e-06, "loss": 0.6819, "step": 12286 }, { "epoch": 0.376578398921172, "grad_norm": 1.2814786330450696, "learning_rate": 7.167278437760322e-06, "loss": 0.7982, "step": 12287 }, { "epoch": 0.3766090474439132, "grad_norm": 0.42140558638185605, "learning_rate": 7.1668311579144966e-06, "loss": 0.4215, "step": 12288 }, { "epoch": 0.3766396959666544, "grad_norm": 1.3645061797390334, "learning_rate": 7.166383856718328e-06, "loss": 0.6756, "step": 12289 }, { "epoch": 0.3766703444893956, "grad_norm": 1.3006202004540723, "learning_rate": 7.165936534176225e-06, "loss": 0.7566, "step": 12290 }, { "epoch": 0.37670099301213683, "grad_norm": 1.2767720758406333, "learning_rate": 7.165489190292596e-06, "loss": 0.6792, "step": 12291 }, { "epoch": 0.37673164153487804, "grad_norm": 1.1689211239192923, "learning_rate": 7.1650418250718475e-06, "loss": 0.6996, "step": 12292 }, { "epoch": 0.37676229005761924, "grad_norm": 1.2549680848877605, "learning_rate": 7.164594438518389e-06, "loss": 0.6952, "step": 12293 }, { "epoch": 0.37679293858036045, "grad_norm": 1.418135628137392, "learning_rate": 7.164147030636627e-06, "loss": 0.7863, "step": 12294 }, { "epoch": 0.37682358710310165, "grad_norm": 1.1902795072200802, "learning_rate": 7.16369960143097e-06, "loss": 0.7219, "step": 12295 }, { "epoch": 0.37685423562584286, "grad_norm": 1.2171023545154416, "learning_rate": 7.163252150905828e-06, "loss": 0.7436, "step": 12296 }, { "epoch": 0.37688488414858407, "grad_norm": 1.203575959630214, "learning_rate": 7.162804679065608e-06, "loss": 0.7277, "step": 12297 }, { "epoch": 0.3769155326713252, "grad_norm": 1.3514007835755637, "learning_rate": 7.162357185914721e-06, "loss": 0.6571, "step": 12298 }, { "epoch": 0.3769461811940664, "grad_norm": 1.2725883618044356, "learning_rate": 7.161909671457576e-06, "loss": 0.7238, "step": 12299 }, { "epoch": 0.3769768297168076, "grad_norm": 0.5064885858794788, "learning_rate": 7.161462135698581e-06, "loss": 0.4167, "step": 12300 }, { "epoch": 0.37700747823954883, "grad_norm": 1.2957483097790825, "learning_rate": 7.161014578642146e-06, "loss": 0.8107, "step": 12301 }, { "epoch": 0.37703812676229004, "grad_norm": 1.1461278926450056, "learning_rate": 7.160567000292682e-06, "loss": 0.7256, "step": 12302 }, { "epoch": 0.37706877528503124, "grad_norm": 1.1527158719642956, "learning_rate": 7.160119400654599e-06, "loss": 0.6319, "step": 12303 }, { "epoch": 0.37709942380777245, "grad_norm": 1.1866555317883436, "learning_rate": 7.159671779732305e-06, "loss": 0.6499, "step": 12304 }, { "epoch": 0.37713007233051365, "grad_norm": 1.277265914992037, "learning_rate": 7.1592241375302145e-06, "loss": 0.6556, "step": 12305 }, { "epoch": 0.37716072085325486, "grad_norm": 1.3302342388218704, "learning_rate": 7.158776474052735e-06, "loss": 0.7042, "step": 12306 }, { "epoch": 0.37719136937599607, "grad_norm": 1.2593250511179792, "learning_rate": 7.1583287893042795e-06, "loss": 0.6578, "step": 12307 }, { "epoch": 0.37722201789873727, "grad_norm": 1.2723135229016767, "learning_rate": 7.157881083289257e-06, "loss": 0.738, "step": 12308 }, { "epoch": 0.3772526664214785, "grad_norm": 1.3104585057764329, "learning_rate": 7.157433356012081e-06, "loss": 0.6422, "step": 12309 }, { "epoch": 0.3772833149442197, "grad_norm": 1.3066448714577983, "learning_rate": 7.156985607477163e-06, "loss": 0.6264, "step": 12310 }, { "epoch": 0.3773139634669609, "grad_norm": 0.4622228705448583, "learning_rate": 7.156537837688913e-06, "loss": 0.4269, "step": 12311 }, { "epoch": 0.3773446119897021, "grad_norm": 1.2290452878905505, "learning_rate": 7.156090046651742e-06, "loss": 0.6289, "step": 12312 }, { "epoch": 0.3773752605124433, "grad_norm": 1.383875879212177, "learning_rate": 7.155642234370066e-06, "loss": 0.6495, "step": 12313 }, { "epoch": 0.3774059090351845, "grad_norm": 1.3733136661466954, "learning_rate": 7.1551944008482964e-06, "loss": 0.679, "step": 12314 }, { "epoch": 0.3774365575579257, "grad_norm": 1.2185085869812784, "learning_rate": 7.154746546090844e-06, "loss": 0.6837, "step": 12315 }, { "epoch": 0.3774672060806669, "grad_norm": 1.1684537970928808, "learning_rate": 7.154298670102122e-06, "loss": 0.6262, "step": 12316 }, { "epoch": 0.3774978546034081, "grad_norm": 0.44369192143485897, "learning_rate": 7.1538507728865445e-06, "loss": 0.4125, "step": 12317 }, { "epoch": 0.37752850312614933, "grad_norm": 1.1931486311581405, "learning_rate": 7.153402854448525e-06, "loss": 0.7165, "step": 12318 }, { "epoch": 0.37755915164889053, "grad_norm": 1.2150936307767362, "learning_rate": 7.1529549147924735e-06, "loss": 0.6751, "step": 12319 }, { "epoch": 0.37758980017163174, "grad_norm": 1.255459367738067, "learning_rate": 7.152506953922808e-06, "loss": 0.6736, "step": 12320 }, { "epoch": 0.37762044869437295, "grad_norm": 1.0540238537886293, "learning_rate": 7.152058971843941e-06, "loss": 0.5407, "step": 12321 }, { "epoch": 0.37765109721711415, "grad_norm": 0.4727755542801087, "learning_rate": 7.151610968560287e-06, "loss": 0.466, "step": 12322 }, { "epoch": 0.37768174573985536, "grad_norm": 1.289491465362695, "learning_rate": 7.151162944076259e-06, "loss": 0.6553, "step": 12323 }, { "epoch": 0.37771239426259656, "grad_norm": 1.1787333933609563, "learning_rate": 7.150714898396273e-06, "loss": 0.7381, "step": 12324 }, { "epoch": 0.37774304278533777, "grad_norm": 1.3138207393626424, "learning_rate": 7.15026683152474e-06, "loss": 0.7135, "step": 12325 }, { "epoch": 0.377773691308079, "grad_norm": 1.158296040516612, "learning_rate": 7.149818743466081e-06, "loss": 0.7152, "step": 12326 }, { "epoch": 0.3778043398308202, "grad_norm": 1.2279719656181471, "learning_rate": 7.149370634224706e-06, "loss": 0.7723, "step": 12327 }, { "epoch": 0.3778349883535614, "grad_norm": 1.07021664879443, "learning_rate": 7.148922503805034e-06, "loss": 0.6486, "step": 12328 }, { "epoch": 0.37786563687630254, "grad_norm": 0.4614128315752804, "learning_rate": 7.1484743522114766e-06, "loss": 0.4309, "step": 12329 }, { "epoch": 0.37789628539904374, "grad_norm": 0.452932099444895, "learning_rate": 7.148026179448452e-06, "loss": 0.4254, "step": 12330 }, { "epoch": 0.37792693392178495, "grad_norm": 1.3451018894871711, "learning_rate": 7.1475779855203764e-06, "loss": 0.7192, "step": 12331 }, { "epoch": 0.37795758244452615, "grad_norm": 1.2468165193147926, "learning_rate": 7.147129770431666e-06, "loss": 0.6612, "step": 12332 }, { "epoch": 0.37798823096726736, "grad_norm": 1.1128023515835552, "learning_rate": 7.146681534186737e-06, "loss": 0.6421, "step": 12333 }, { "epoch": 0.37801887949000856, "grad_norm": 1.1310523755584352, "learning_rate": 7.146233276790003e-06, "loss": 0.7282, "step": 12334 }, { "epoch": 0.37804952801274977, "grad_norm": 0.47083137427895827, "learning_rate": 7.145784998245886e-06, "loss": 0.4417, "step": 12335 }, { "epoch": 0.378080176535491, "grad_norm": 1.049257334840632, "learning_rate": 7.145336698558798e-06, "loss": 0.5783, "step": 12336 }, { "epoch": 0.3781108250582322, "grad_norm": 1.4806254864709636, "learning_rate": 7.144888377733161e-06, "loss": 0.6979, "step": 12337 }, { "epoch": 0.3781414735809734, "grad_norm": 1.3407475663427557, "learning_rate": 7.144440035773388e-06, "loss": 0.6345, "step": 12338 }, { "epoch": 0.3781721221037146, "grad_norm": 1.1243301372508212, "learning_rate": 7.143991672683901e-06, "loss": 0.6488, "step": 12339 }, { "epoch": 0.3782027706264558, "grad_norm": 1.7223290394205302, "learning_rate": 7.143543288469113e-06, "loss": 0.5936, "step": 12340 }, { "epoch": 0.378233419149197, "grad_norm": 0.4532863854546607, "learning_rate": 7.143094883133447e-06, "loss": 0.4044, "step": 12341 }, { "epoch": 0.3782640676719382, "grad_norm": 1.448885460044943, "learning_rate": 7.142646456681317e-06, "loss": 0.6101, "step": 12342 }, { "epoch": 0.3782947161946794, "grad_norm": 0.4563515346685411, "learning_rate": 7.142198009117143e-06, "loss": 0.4465, "step": 12343 }, { "epoch": 0.3783253647174206, "grad_norm": 1.2660446210132057, "learning_rate": 7.141749540445344e-06, "loss": 0.6678, "step": 12344 }, { "epoch": 0.3783560132401618, "grad_norm": 1.2604772335192982, "learning_rate": 7.141301050670339e-06, "loss": 0.5737, "step": 12345 }, { "epoch": 0.37838666176290303, "grad_norm": 1.1773617431464625, "learning_rate": 7.140852539796548e-06, "loss": 0.6399, "step": 12346 }, { "epoch": 0.37841731028564424, "grad_norm": 2.0558171961303224, "learning_rate": 7.140404007828387e-06, "loss": 0.6132, "step": 12347 }, { "epoch": 0.37844795880838544, "grad_norm": 1.2978053746578726, "learning_rate": 7.1399554547702806e-06, "loss": 0.7133, "step": 12348 }, { "epoch": 0.37847860733112665, "grad_norm": 1.1730818195252453, "learning_rate": 7.139506880626645e-06, "loss": 0.7094, "step": 12349 }, { "epoch": 0.37850925585386785, "grad_norm": 0.4732568694810994, "learning_rate": 7.1390582854018995e-06, "loss": 0.4333, "step": 12350 }, { "epoch": 0.37853990437660906, "grad_norm": 1.338579956219833, "learning_rate": 7.138609669100465e-06, "loss": 0.5975, "step": 12351 }, { "epoch": 0.37857055289935027, "grad_norm": 0.4394525262017163, "learning_rate": 7.138161031726762e-06, "loss": 0.4102, "step": 12352 }, { "epoch": 0.37860120142209147, "grad_norm": 1.3436437979422744, "learning_rate": 7.137712373285213e-06, "loss": 0.771, "step": 12353 }, { "epoch": 0.3786318499448327, "grad_norm": 0.46077866363021347, "learning_rate": 7.1372636937802365e-06, "loss": 0.4231, "step": 12354 }, { "epoch": 0.3786624984675739, "grad_norm": 1.3473363287317734, "learning_rate": 7.136814993216253e-06, "loss": 0.6768, "step": 12355 }, { "epoch": 0.3786931469903151, "grad_norm": 1.2554668768566146, "learning_rate": 7.136366271597685e-06, "loss": 0.6131, "step": 12356 }, { "epoch": 0.3787237955130563, "grad_norm": 1.2741561081655006, "learning_rate": 7.135917528928955e-06, "loss": 0.7253, "step": 12357 }, { "epoch": 0.3787544440357975, "grad_norm": 1.3485663467238749, "learning_rate": 7.135468765214481e-06, "loss": 0.7103, "step": 12358 }, { "epoch": 0.3787850925585387, "grad_norm": 1.5388457940877764, "learning_rate": 7.135019980458688e-06, "loss": 0.6292, "step": 12359 }, { "epoch": 0.37881574108127986, "grad_norm": 0.5108266580607113, "learning_rate": 7.1345711746659975e-06, "loss": 0.4229, "step": 12360 }, { "epoch": 0.37884638960402106, "grad_norm": 1.331446917860749, "learning_rate": 7.134122347840831e-06, "loss": 0.7598, "step": 12361 }, { "epoch": 0.37887703812676227, "grad_norm": 1.27146022910882, "learning_rate": 7.133673499987609e-06, "loss": 0.6681, "step": 12362 }, { "epoch": 0.3789076866495035, "grad_norm": 1.216312333631944, "learning_rate": 7.133224631110758e-06, "loss": 0.6339, "step": 12363 }, { "epoch": 0.3789383351722447, "grad_norm": 1.2684300794634857, "learning_rate": 7.132775741214698e-06, "loss": 0.6559, "step": 12364 }, { "epoch": 0.3789689836949859, "grad_norm": 1.6173114805279107, "learning_rate": 7.132326830303853e-06, "loss": 0.6709, "step": 12365 }, { "epoch": 0.3789996322177271, "grad_norm": 1.2762730280932506, "learning_rate": 7.131877898382647e-06, "loss": 0.7161, "step": 12366 }, { "epoch": 0.3790302807404683, "grad_norm": 1.2100867794674786, "learning_rate": 7.131428945455501e-06, "loss": 0.6089, "step": 12367 }, { "epoch": 0.3790609292632095, "grad_norm": 1.2547978263391293, "learning_rate": 7.130979971526841e-06, "loss": 0.6861, "step": 12368 }, { "epoch": 0.3790915777859507, "grad_norm": 1.446990411312426, "learning_rate": 7.130530976601091e-06, "loss": 0.765, "step": 12369 }, { "epoch": 0.3791222263086919, "grad_norm": 1.3939671507058509, "learning_rate": 7.130081960682673e-06, "loss": 0.6885, "step": 12370 }, { "epoch": 0.3791528748314331, "grad_norm": 1.235468873837127, "learning_rate": 7.129632923776013e-06, "loss": 0.6453, "step": 12371 }, { "epoch": 0.3791835233541743, "grad_norm": 1.2429716387117549, "learning_rate": 7.129183865885535e-06, "loss": 0.6374, "step": 12372 }, { "epoch": 0.37921417187691553, "grad_norm": 1.2504865709599138, "learning_rate": 7.128734787015662e-06, "loss": 0.6477, "step": 12373 }, { "epoch": 0.37924482039965673, "grad_norm": 1.200838669744433, "learning_rate": 7.128285687170823e-06, "loss": 0.6773, "step": 12374 }, { "epoch": 0.37927546892239794, "grad_norm": 1.4296872811142503, "learning_rate": 7.127836566355438e-06, "loss": 0.666, "step": 12375 }, { "epoch": 0.37930611744513915, "grad_norm": 1.1945178525929303, "learning_rate": 7.1273874245739375e-06, "loss": 0.7217, "step": 12376 }, { "epoch": 0.37933676596788035, "grad_norm": 1.4576082337245932, "learning_rate": 7.126938261830743e-06, "loss": 0.6672, "step": 12377 }, { "epoch": 0.37936741449062156, "grad_norm": 1.3518539164759493, "learning_rate": 7.126489078130282e-06, "loss": 0.6739, "step": 12378 }, { "epoch": 0.37939806301336276, "grad_norm": 1.1857510456424858, "learning_rate": 7.126039873476979e-06, "loss": 0.7004, "step": 12379 }, { "epoch": 0.37942871153610397, "grad_norm": 0.48126796762579455, "learning_rate": 7.125590647875263e-06, "loss": 0.4405, "step": 12380 }, { "epoch": 0.3794593600588452, "grad_norm": 1.4413388915281962, "learning_rate": 7.125141401329557e-06, "loss": 0.7411, "step": 12381 }, { "epoch": 0.3794900085815864, "grad_norm": 1.2269993578315492, "learning_rate": 7.12469213384429e-06, "loss": 0.747, "step": 12382 }, { "epoch": 0.3795206571043276, "grad_norm": 1.4406921996628843, "learning_rate": 7.124242845423887e-06, "loss": 0.7525, "step": 12383 }, { "epoch": 0.3795513056270688, "grad_norm": 0.44655513895441684, "learning_rate": 7.123793536072776e-06, "loss": 0.415, "step": 12384 }, { "epoch": 0.37958195414981, "grad_norm": 1.1169474124114185, "learning_rate": 7.123344205795384e-06, "loss": 0.6679, "step": 12385 }, { "epoch": 0.3796126026725512, "grad_norm": 0.4432926574300901, "learning_rate": 7.122894854596139e-06, "loss": 0.4377, "step": 12386 }, { "epoch": 0.3796432511952924, "grad_norm": 1.3013124392705435, "learning_rate": 7.122445482479467e-06, "loss": 0.6641, "step": 12387 }, { "epoch": 0.3796738997180336, "grad_norm": 1.1891847881985387, "learning_rate": 7.121996089449795e-06, "loss": 0.744, "step": 12388 }, { "epoch": 0.3797045482407748, "grad_norm": 1.3765533533220606, "learning_rate": 7.121546675511555e-06, "loss": 0.7656, "step": 12389 }, { "epoch": 0.379735196763516, "grad_norm": 1.2985673146791683, "learning_rate": 7.12109724066917e-06, "loss": 0.7108, "step": 12390 }, { "epoch": 0.3797658452862572, "grad_norm": 1.3421124854894506, "learning_rate": 7.120647784927075e-06, "loss": 0.7322, "step": 12391 }, { "epoch": 0.3797964938089984, "grad_norm": 1.3268011131042003, "learning_rate": 7.120198308289693e-06, "loss": 0.6821, "step": 12392 }, { "epoch": 0.3798271423317396, "grad_norm": 1.3620037270565224, "learning_rate": 7.119748810761454e-06, "loss": 0.6621, "step": 12393 }, { "epoch": 0.3798577908544808, "grad_norm": 1.2758793148090477, "learning_rate": 7.119299292346788e-06, "loss": 0.6152, "step": 12394 }, { "epoch": 0.379888439377222, "grad_norm": 1.1918748815163545, "learning_rate": 7.118849753050126e-06, "loss": 0.6142, "step": 12395 }, { "epoch": 0.3799190878999632, "grad_norm": 1.2930607172369961, "learning_rate": 7.1184001928758915e-06, "loss": 0.6697, "step": 12396 }, { "epoch": 0.3799497364227044, "grad_norm": 1.1277788257807886, "learning_rate": 7.11795061182852e-06, "loss": 0.7124, "step": 12397 }, { "epoch": 0.3799803849454456, "grad_norm": 1.3300208492331327, "learning_rate": 7.117501009912437e-06, "loss": 0.7659, "step": 12398 }, { "epoch": 0.3800110334681868, "grad_norm": 0.4863191088175757, "learning_rate": 7.117051387132078e-06, "loss": 0.4175, "step": 12399 }, { "epoch": 0.380041681990928, "grad_norm": 1.2484905664002917, "learning_rate": 7.1166017434918685e-06, "loss": 0.7063, "step": 12400 }, { "epoch": 0.38007233051366923, "grad_norm": 1.1777350481576123, "learning_rate": 7.11615207899624e-06, "loss": 0.6319, "step": 12401 }, { "epoch": 0.38010297903641044, "grad_norm": 1.1747734940291388, "learning_rate": 7.115702393649625e-06, "loss": 0.6871, "step": 12402 }, { "epoch": 0.38013362755915164, "grad_norm": 1.4266452657462725, "learning_rate": 7.115252687456452e-06, "loss": 0.6978, "step": 12403 }, { "epoch": 0.38016427608189285, "grad_norm": 1.3134422949072078, "learning_rate": 7.114802960421155e-06, "loss": 0.696, "step": 12404 }, { "epoch": 0.38019492460463405, "grad_norm": 1.2673066827977149, "learning_rate": 7.114353212548159e-06, "loss": 0.7404, "step": 12405 }, { "epoch": 0.38022557312737526, "grad_norm": 1.2034020524936955, "learning_rate": 7.113903443841904e-06, "loss": 0.6214, "step": 12406 }, { "epoch": 0.38025622165011647, "grad_norm": 1.265269411494532, "learning_rate": 7.113453654306815e-06, "loss": 0.6837, "step": 12407 }, { "epoch": 0.38028687017285767, "grad_norm": 1.2122245280030817, "learning_rate": 7.113003843947328e-06, "loss": 0.6883, "step": 12408 }, { "epoch": 0.3803175186955989, "grad_norm": 1.3867479065287327, "learning_rate": 7.112554012767872e-06, "loss": 0.6744, "step": 12409 }, { "epoch": 0.3803481672183401, "grad_norm": 0.5212756233452678, "learning_rate": 7.112104160772883e-06, "loss": 0.4376, "step": 12410 }, { "epoch": 0.3803788157410813, "grad_norm": 1.2294143613165105, "learning_rate": 7.11165428796679e-06, "loss": 0.724, "step": 12411 }, { "epoch": 0.3804094642638225, "grad_norm": 1.1731547833628455, "learning_rate": 7.111204394354026e-06, "loss": 0.6854, "step": 12412 }, { "epoch": 0.3804401127865637, "grad_norm": 1.3981737992885677, "learning_rate": 7.110754479939025e-06, "loss": 0.6878, "step": 12413 }, { "epoch": 0.3804707613093049, "grad_norm": 1.4038588897748108, "learning_rate": 7.1103045447262205e-06, "loss": 0.7465, "step": 12414 }, { "epoch": 0.3805014098320461, "grad_norm": 1.3215450108547073, "learning_rate": 7.109854588720047e-06, "loss": 0.6683, "step": 12415 }, { "epoch": 0.3805320583547873, "grad_norm": 1.3280572660186882, "learning_rate": 7.1094046119249345e-06, "loss": 0.7198, "step": 12416 }, { "epoch": 0.3805627068775285, "grad_norm": 1.2523511882397593, "learning_rate": 7.108954614345319e-06, "loss": 0.6954, "step": 12417 }, { "epoch": 0.38059335540026973, "grad_norm": 1.2935028434351359, "learning_rate": 7.108504595985635e-06, "loss": 0.6948, "step": 12418 }, { "epoch": 0.38062400392301093, "grad_norm": 0.4373029349312259, "learning_rate": 7.108054556850316e-06, "loss": 0.4308, "step": 12419 }, { "epoch": 0.38065465244575214, "grad_norm": 1.1738929296316958, "learning_rate": 7.107604496943794e-06, "loss": 0.7419, "step": 12420 }, { "epoch": 0.38068530096849335, "grad_norm": 1.3197015139097636, "learning_rate": 7.107154416270508e-06, "loss": 0.7825, "step": 12421 }, { "epoch": 0.3807159494912345, "grad_norm": 1.2676777113036986, "learning_rate": 7.106704314834889e-06, "loss": 0.6691, "step": 12422 }, { "epoch": 0.3807465980139757, "grad_norm": 1.15976085600844, "learning_rate": 7.106254192641373e-06, "loss": 0.6801, "step": 12423 }, { "epoch": 0.3807772465367169, "grad_norm": 1.1064092089270932, "learning_rate": 7.1058040496943955e-06, "loss": 0.5211, "step": 12424 }, { "epoch": 0.3808078950594581, "grad_norm": 1.224666923574835, "learning_rate": 7.105353885998393e-06, "loss": 0.7602, "step": 12425 }, { "epoch": 0.3808385435821993, "grad_norm": 1.108260841916979, "learning_rate": 7.104903701557799e-06, "loss": 0.6336, "step": 12426 }, { "epoch": 0.3808691921049405, "grad_norm": 1.260580812494558, "learning_rate": 7.104453496377051e-06, "loss": 0.6898, "step": 12427 }, { "epoch": 0.38089984062768173, "grad_norm": 1.2072293558143776, "learning_rate": 7.104003270460585e-06, "loss": 0.6907, "step": 12428 }, { "epoch": 0.38093048915042294, "grad_norm": 1.2919899631911014, "learning_rate": 7.103553023812834e-06, "loss": 0.7228, "step": 12429 }, { "epoch": 0.38096113767316414, "grad_norm": 1.2581271436329546, "learning_rate": 7.10310275643824e-06, "loss": 0.6493, "step": 12430 }, { "epoch": 0.38099178619590535, "grad_norm": 0.5037485383266529, "learning_rate": 7.102652468341234e-06, "loss": 0.434, "step": 12431 }, { "epoch": 0.38102243471864655, "grad_norm": 1.228387862803857, "learning_rate": 7.102202159526256e-06, "loss": 0.6273, "step": 12432 }, { "epoch": 0.38105308324138776, "grad_norm": 1.2561572007503856, "learning_rate": 7.101751829997743e-06, "loss": 0.6703, "step": 12433 }, { "epoch": 0.38108373176412896, "grad_norm": 1.1190901579187484, "learning_rate": 7.10130147976013e-06, "loss": 0.6769, "step": 12434 }, { "epoch": 0.38111438028687017, "grad_norm": 1.317260251599424, "learning_rate": 7.100851108817857e-06, "loss": 0.665, "step": 12435 }, { "epoch": 0.3811450288096114, "grad_norm": 1.4208306027481834, "learning_rate": 7.100400717175359e-06, "loss": 0.6831, "step": 12436 }, { "epoch": 0.3811756773323526, "grad_norm": 1.3339560027496244, "learning_rate": 7.099950304837078e-06, "loss": 0.6517, "step": 12437 }, { "epoch": 0.3812063258550938, "grad_norm": 1.2032828973983853, "learning_rate": 7.099499871807447e-06, "loss": 0.6135, "step": 12438 }, { "epoch": 0.381236974377835, "grad_norm": 1.2865006229883562, "learning_rate": 7.099049418090907e-06, "loss": 0.6661, "step": 12439 }, { "epoch": 0.3812676229005762, "grad_norm": 1.2387202950898455, "learning_rate": 7.0985989436918965e-06, "loss": 0.5631, "step": 12440 }, { "epoch": 0.3812982714233174, "grad_norm": 1.335702882802763, "learning_rate": 7.098148448614855e-06, "loss": 0.6568, "step": 12441 }, { "epoch": 0.3813289199460586, "grad_norm": 1.2716129472968536, "learning_rate": 7.097697932864216e-06, "loss": 0.6253, "step": 12442 }, { "epoch": 0.3813595684687998, "grad_norm": 0.539889008658996, "learning_rate": 7.097247396444426e-06, "loss": 0.4501, "step": 12443 }, { "epoch": 0.381390216991541, "grad_norm": 1.2012884809242776, "learning_rate": 7.096796839359918e-06, "loss": 0.6825, "step": 12444 }, { "epoch": 0.3814208655142822, "grad_norm": 1.240936091208674, "learning_rate": 7.096346261615139e-06, "loss": 0.583, "step": 12445 }, { "epoch": 0.38145151403702343, "grad_norm": 0.46416087192309313, "learning_rate": 7.0958956632145206e-06, "loss": 0.4253, "step": 12446 }, { "epoch": 0.38148216255976464, "grad_norm": 1.3133369686616914, "learning_rate": 7.095445044162505e-06, "loss": 0.7486, "step": 12447 }, { "epoch": 0.38151281108250584, "grad_norm": 0.44132202658951436, "learning_rate": 7.094994404463534e-06, "loss": 0.4149, "step": 12448 }, { "epoch": 0.38154345960524705, "grad_norm": 1.259846405796557, "learning_rate": 7.09454374412205e-06, "loss": 0.731, "step": 12449 }, { "epoch": 0.38157410812798825, "grad_norm": 1.2865589661127959, "learning_rate": 7.094093063142487e-06, "loss": 0.6884, "step": 12450 }, { "epoch": 0.38160475665072946, "grad_norm": 1.2373424864342002, "learning_rate": 7.093642361529291e-06, "loss": 0.6826, "step": 12451 }, { "epoch": 0.38163540517347067, "grad_norm": 1.2435949289949557, "learning_rate": 7.0931916392869e-06, "loss": 0.6995, "step": 12452 }, { "epoch": 0.3816660536962118, "grad_norm": 1.3588113447847034, "learning_rate": 7.092740896419757e-06, "loss": 0.6338, "step": 12453 }, { "epoch": 0.381696702218953, "grad_norm": 1.1737043130504625, "learning_rate": 7.092290132932302e-06, "loss": 0.7182, "step": 12454 }, { "epoch": 0.3817273507416942, "grad_norm": 1.3395443396445845, "learning_rate": 7.09183934882898e-06, "loss": 0.774, "step": 12455 }, { "epoch": 0.38175799926443543, "grad_norm": 1.1385168643936427, "learning_rate": 7.091388544114225e-06, "loss": 0.5966, "step": 12456 }, { "epoch": 0.38178864778717664, "grad_norm": 1.3929451167689277, "learning_rate": 7.090937718792486e-06, "loss": 0.6753, "step": 12457 }, { "epoch": 0.38181929630991784, "grad_norm": 1.3267637183998608, "learning_rate": 7.090486872868203e-06, "loss": 0.7651, "step": 12458 }, { "epoch": 0.38184994483265905, "grad_norm": 1.3244080796320734, "learning_rate": 7.090036006345816e-06, "loss": 0.6934, "step": 12459 }, { "epoch": 0.38188059335540026, "grad_norm": 1.1881418653764388, "learning_rate": 7.089585119229772e-06, "loss": 0.5738, "step": 12460 }, { "epoch": 0.38191124187814146, "grad_norm": 1.1406406049200974, "learning_rate": 7.089134211524508e-06, "loss": 0.6066, "step": 12461 }, { "epoch": 0.38194189040088267, "grad_norm": 1.2995122809722646, "learning_rate": 7.088683283234474e-06, "loss": 0.7419, "step": 12462 }, { "epoch": 0.38197253892362387, "grad_norm": 1.3286197478488242, "learning_rate": 7.088232334364107e-06, "loss": 0.7343, "step": 12463 }, { "epoch": 0.3820031874463651, "grad_norm": 1.1822221296174036, "learning_rate": 7.087781364917853e-06, "loss": 0.6754, "step": 12464 }, { "epoch": 0.3820338359691063, "grad_norm": 1.4677507275593644, "learning_rate": 7.087330374900154e-06, "loss": 0.6796, "step": 12465 }, { "epoch": 0.3820644844918475, "grad_norm": 1.3355791205225627, "learning_rate": 7.086879364315455e-06, "loss": 0.7113, "step": 12466 }, { "epoch": 0.3820951330145887, "grad_norm": 1.3433751953099589, "learning_rate": 7.0864283331682e-06, "loss": 0.7496, "step": 12467 }, { "epoch": 0.3821257815373299, "grad_norm": 1.2869062784859933, "learning_rate": 7.085977281462834e-06, "loss": 0.6129, "step": 12468 }, { "epoch": 0.3821564300600711, "grad_norm": 1.270307566979253, "learning_rate": 7.085526209203799e-06, "loss": 0.6926, "step": 12469 }, { "epoch": 0.3821870785828123, "grad_norm": 1.1968491519626796, "learning_rate": 7.08507511639554e-06, "loss": 0.6708, "step": 12470 }, { "epoch": 0.3822177271055535, "grad_norm": 1.2467937556339552, "learning_rate": 7.084624003042504e-06, "loss": 0.7124, "step": 12471 }, { "epoch": 0.3822483756282947, "grad_norm": 1.1832565546790965, "learning_rate": 7.084172869149133e-06, "loss": 0.6542, "step": 12472 }, { "epoch": 0.38227902415103593, "grad_norm": 1.449419833597852, "learning_rate": 7.083721714719874e-06, "loss": 0.7314, "step": 12473 }, { "epoch": 0.38230967267377713, "grad_norm": 1.2245692481357544, "learning_rate": 7.0832705397591715e-06, "loss": 0.6565, "step": 12474 }, { "epoch": 0.38234032119651834, "grad_norm": 1.3353229656302001, "learning_rate": 7.082819344271472e-06, "loss": 0.6877, "step": 12475 }, { "epoch": 0.38237096971925955, "grad_norm": 1.2437173093479625, "learning_rate": 7.08236812826122e-06, "loss": 0.6206, "step": 12476 }, { "epoch": 0.38240161824200075, "grad_norm": 1.253434721186766, "learning_rate": 7.0819168917328625e-06, "loss": 0.625, "step": 12477 }, { "epoch": 0.38243226676474196, "grad_norm": 1.2035482306363483, "learning_rate": 7.081465634690844e-06, "loss": 0.6553, "step": 12478 }, { "epoch": 0.38246291528748316, "grad_norm": 1.2640020186818564, "learning_rate": 7.081014357139613e-06, "loss": 0.6457, "step": 12479 }, { "epoch": 0.38249356381022437, "grad_norm": 0.5744222193474002, "learning_rate": 7.080563059083616e-06, "loss": 0.4291, "step": 12480 }, { "epoch": 0.3825242123329656, "grad_norm": 1.2529541092316365, "learning_rate": 7.0801117405272975e-06, "loss": 0.6432, "step": 12481 }, { "epoch": 0.3825548608557068, "grad_norm": 0.5147830821022392, "learning_rate": 7.079660401475106e-06, "loss": 0.448, "step": 12482 }, { "epoch": 0.382585509378448, "grad_norm": 1.1378722226933777, "learning_rate": 7.079209041931489e-06, "loss": 0.6856, "step": 12483 }, { "epoch": 0.38261615790118914, "grad_norm": 1.3179725513348575, "learning_rate": 7.078757661900893e-06, "loss": 0.6997, "step": 12484 }, { "epoch": 0.38264680642393034, "grad_norm": 1.2436949112894073, "learning_rate": 7.078306261387765e-06, "loss": 0.6507, "step": 12485 }, { "epoch": 0.38267745494667155, "grad_norm": 0.506748044712746, "learning_rate": 7.077854840396554e-06, "loss": 0.4352, "step": 12486 }, { "epoch": 0.38270810346941275, "grad_norm": 1.2108232899296278, "learning_rate": 7.077403398931709e-06, "loss": 0.7361, "step": 12487 }, { "epoch": 0.38273875199215396, "grad_norm": 1.3544463531164166, "learning_rate": 7.0769519369976755e-06, "loss": 0.6074, "step": 12488 }, { "epoch": 0.38276940051489516, "grad_norm": 1.3943519813145893, "learning_rate": 7.076500454598903e-06, "loss": 0.7226, "step": 12489 }, { "epoch": 0.38280004903763637, "grad_norm": 1.3191905586748425, "learning_rate": 7.076048951739841e-06, "loss": 0.7274, "step": 12490 }, { "epoch": 0.3828306975603776, "grad_norm": 1.4368741623729127, "learning_rate": 7.075597428424939e-06, "loss": 0.7789, "step": 12491 }, { "epoch": 0.3828613460831188, "grad_norm": 1.169442864601462, "learning_rate": 7.075145884658642e-06, "loss": 0.6627, "step": 12492 }, { "epoch": 0.38289199460586, "grad_norm": 1.3198708206067744, "learning_rate": 7.074694320445401e-06, "loss": 0.7623, "step": 12493 }, { "epoch": 0.3829226431286012, "grad_norm": 1.149865771644293, "learning_rate": 7.074242735789668e-06, "loss": 0.6508, "step": 12494 }, { "epoch": 0.3829532916513424, "grad_norm": 1.1037070638737867, "learning_rate": 7.073791130695891e-06, "loss": 0.7142, "step": 12495 }, { "epoch": 0.3829839401740836, "grad_norm": 0.5611541831721066, "learning_rate": 7.073339505168517e-06, "loss": 0.3989, "step": 12496 }, { "epoch": 0.3830145886968248, "grad_norm": 1.167772820330695, "learning_rate": 7.072887859212001e-06, "loss": 0.6909, "step": 12497 }, { "epoch": 0.383045237219566, "grad_norm": 0.5299086549344322, "learning_rate": 7.072436192830788e-06, "loss": 0.4036, "step": 12498 }, { "epoch": 0.3830758857423072, "grad_norm": 0.46523642566589846, "learning_rate": 7.071984506029333e-06, "loss": 0.4364, "step": 12499 }, { "epoch": 0.3831065342650484, "grad_norm": 1.2691452855297929, "learning_rate": 7.071532798812084e-06, "loss": 0.6269, "step": 12500 }, { "epoch": 0.38313718278778963, "grad_norm": 1.3370644449754254, "learning_rate": 7.071081071183492e-06, "loss": 0.7047, "step": 12501 }, { "epoch": 0.38316783131053084, "grad_norm": 1.147056437388152, "learning_rate": 7.0706293231480074e-06, "loss": 0.6118, "step": 12502 }, { "epoch": 0.38319847983327204, "grad_norm": 1.2835732122043626, "learning_rate": 7.070177554710085e-06, "loss": 0.6275, "step": 12503 }, { "epoch": 0.38322912835601325, "grad_norm": 1.2017280487834165, "learning_rate": 7.069725765874171e-06, "loss": 0.726, "step": 12504 }, { "epoch": 0.38325977687875445, "grad_norm": 1.3653747845574278, "learning_rate": 7.06927395664472e-06, "loss": 0.6733, "step": 12505 }, { "epoch": 0.38329042540149566, "grad_norm": 1.313080860346863, "learning_rate": 7.068822127026183e-06, "loss": 0.6842, "step": 12506 }, { "epoch": 0.38332107392423687, "grad_norm": 1.2690739727059681, "learning_rate": 7.0683702770230135e-06, "loss": 0.6965, "step": 12507 }, { "epoch": 0.38335172244697807, "grad_norm": 0.7806542008760216, "learning_rate": 7.067918406639661e-06, "loss": 0.4177, "step": 12508 }, { "epoch": 0.3833823709697193, "grad_norm": 1.2280032387800082, "learning_rate": 7.067466515880581e-06, "loss": 0.6291, "step": 12509 }, { "epoch": 0.3834130194924605, "grad_norm": 1.3465058211095475, "learning_rate": 7.067014604750223e-06, "loss": 0.689, "step": 12510 }, { "epoch": 0.3834436680152017, "grad_norm": 1.1303891652031068, "learning_rate": 7.066562673253042e-06, "loss": 0.7032, "step": 12511 }, { "epoch": 0.3834743165379429, "grad_norm": 1.3451976056289638, "learning_rate": 7.0661107213934885e-06, "loss": 0.706, "step": 12512 }, { "epoch": 0.3835049650606841, "grad_norm": 1.142973534338821, "learning_rate": 7.065658749176019e-06, "loss": 0.636, "step": 12513 }, { "epoch": 0.3835356135834253, "grad_norm": 0.4563765561881565, "learning_rate": 7.0652067566050865e-06, "loss": 0.4149, "step": 12514 }, { "epoch": 0.38356626210616646, "grad_norm": 0.48569260915567747, "learning_rate": 7.064754743685141e-06, "loss": 0.4117, "step": 12515 }, { "epoch": 0.38359691062890766, "grad_norm": 1.389443694630143, "learning_rate": 7.064302710420641e-06, "loss": 0.7919, "step": 12516 }, { "epoch": 0.38362755915164887, "grad_norm": 1.2318539308427765, "learning_rate": 7.063850656816036e-06, "loss": 0.6828, "step": 12517 }, { "epoch": 0.3836582076743901, "grad_norm": 1.5017014696797282, "learning_rate": 7.063398582875785e-06, "loss": 0.6918, "step": 12518 }, { "epoch": 0.3836888561971313, "grad_norm": 1.3197831288490267, "learning_rate": 7.062946488604337e-06, "loss": 0.5894, "step": 12519 }, { "epoch": 0.3837195047198725, "grad_norm": 1.2257953610095842, "learning_rate": 7.06249437400615e-06, "loss": 0.7509, "step": 12520 }, { "epoch": 0.3837501532426137, "grad_norm": 1.1491117989711026, "learning_rate": 7.062042239085679e-06, "loss": 0.5644, "step": 12521 }, { "epoch": 0.3837808017653549, "grad_norm": 1.186562320532876, "learning_rate": 7.0615900838473785e-06, "loss": 0.6729, "step": 12522 }, { "epoch": 0.3838114502880961, "grad_norm": 1.268010216553178, "learning_rate": 7.061137908295703e-06, "loss": 0.7023, "step": 12523 }, { "epoch": 0.3838420988108373, "grad_norm": 1.340071558107731, "learning_rate": 7.060685712435107e-06, "loss": 0.6966, "step": 12524 }, { "epoch": 0.3838727473335785, "grad_norm": 1.3509431859167584, "learning_rate": 7.06023349627005e-06, "loss": 0.7296, "step": 12525 }, { "epoch": 0.3839033958563197, "grad_norm": 1.5609550642576686, "learning_rate": 7.059781259804984e-06, "loss": 0.7062, "step": 12526 }, { "epoch": 0.3839340443790609, "grad_norm": 1.2450955459323605, "learning_rate": 7.0593290030443665e-06, "loss": 0.7077, "step": 12527 }, { "epoch": 0.38396469290180213, "grad_norm": 1.225411634123344, "learning_rate": 7.058876725992653e-06, "loss": 0.5826, "step": 12528 }, { "epoch": 0.38399534142454333, "grad_norm": 1.1185353825442124, "learning_rate": 7.0584244286543e-06, "loss": 0.6877, "step": 12529 }, { "epoch": 0.38402598994728454, "grad_norm": 1.2252900109999938, "learning_rate": 7.0579721110337655e-06, "loss": 0.6725, "step": 12530 }, { "epoch": 0.38405663847002575, "grad_norm": 1.251396878612215, "learning_rate": 7.057519773135505e-06, "loss": 0.5368, "step": 12531 }, { "epoch": 0.38408728699276695, "grad_norm": 1.3333971044409376, "learning_rate": 7.057067414963974e-06, "loss": 0.6843, "step": 12532 }, { "epoch": 0.38411793551550816, "grad_norm": 1.3566267813537678, "learning_rate": 7.056615036523633e-06, "loss": 0.7918, "step": 12533 }, { "epoch": 0.38414858403824936, "grad_norm": 1.3331470491792183, "learning_rate": 7.056162637818939e-06, "loss": 0.7668, "step": 12534 }, { "epoch": 0.38417923256099057, "grad_norm": 1.3099077852963035, "learning_rate": 7.055710218854347e-06, "loss": 0.6678, "step": 12535 }, { "epoch": 0.3842098810837318, "grad_norm": 1.3753828312827288, "learning_rate": 7.055257779634316e-06, "loss": 0.6248, "step": 12536 }, { "epoch": 0.384240529606473, "grad_norm": 1.259449644544028, "learning_rate": 7.054805320163305e-06, "loss": 0.7058, "step": 12537 }, { "epoch": 0.3842711781292142, "grad_norm": 0.6708480962610746, "learning_rate": 7.05435284044577e-06, "loss": 0.4226, "step": 12538 }, { "epoch": 0.3843018266519554, "grad_norm": 1.2569933380204954, "learning_rate": 7.053900340486172e-06, "loss": 0.7377, "step": 12539 }, { "epoch": 0.3843324751746966, "grad_norm": 1.199874507931632, "learning_rate": 7.053447820288968e-06, "loss": 0.7059, "step": 12540 }, { "epoch": 0.3843631236974378, "grad_norm": 0.5160331967996731, "learning_rate": 7.052995279858619e-06, "loss": 0.4426, "step": 12541 }, { "epoch": 0.384393772220179, "grad_norm": 1.2118875303015013, "learning_rate": 7.05254271919958e-06, "loss": 0.6429, "step": 12542 }, { "epoch": 0.3844244207429202, "grad_norm": 1.1843725796859925, "learning_rate": 7.052090138316312e-06, "loss": 0.6155, "step": 12543 }, { "epoch": 0.3844550692656614, "grad_norm": 1.300220125766888, "learning_rate": 7.051637537213276e-06, "loss": 0.7184, "step": 12544 }, { "epoch": 0.3844857177884026, "grad_norm": 1.1858039010840786, "learning_rate": 7.051184915894932e-06, "loss": 0.7204, "step": 12545 }, { "epoch": 0.3845163663111438, "grad_norm": 0.502940251699725, "learning_rate": 7.050732274365737e-06, "loss": 0.4332, "step": 12546 }, { "epoch": 0.384547014833885, "grad_norm": 1.4692852421786418, "learning_rate": 7.050279612630151e-06, "loss": 0.7124, "step": 12547 }, { "epoch": 0.3845776633566262, "grad_norm": 1.299381994231121, "learning_rate": 7.049826930692636e-06, "loss": 0.6416, "step": 12548 }, { "epoch": 0.3846083118793674, "grad_norm": 1.3221945569725178, "learning_rate": 7.0493742285576525e-06, "loss": 0.678, "step": 12549 }, { "epoch": 0.3846389604021086, "grad_norm": 1.3256330226262716, "learning_rate": 7.048921506229659e-06, "loss": 0.7788, "step": 12550 }, { "epoch": 0.3846696089248498, "grad_norm": 1.2459733573140512, "learning_rate": 7.0484687637131185e-06, "loss": 0.7325, "step": 12551 }, { "epoch": 0.384700257447591, "grad_norm": 1.339858048057076, "learning_rate": 7.048016001012492e-06, "loss": 0.6357, "step": 12552 }, { "epoch": 0.3847309059703322, "grad_norm": 1.3122988218405889, "learning_rate": 7.047563218132238e-06, "loss": 0.62, "step": 12553 }, { "epoch": 0.3847615544930734, "grad_norm": 0.5190138764234183, "learning_rate": 7.047110415076821e-06, "loss": 0.4272, "step": 12554 }, { "epoch": 0.3847922030158146, "grad_norm": 1.1565511727816524, "learning_rate": 7.0466575918507e-06, "loss": 0.6031, "step": 12555 }, { "epoch": 0.38482285153855583, "grad_norm": 1.3730111347553875, "learning_rate": 7.04620474845834e-06, "loss": 0.7613, "step": 12556 }, { "epoch": 0.38485350006129704, "grad_norm": 1.4086561410583378, "learning_rate": 7.045751884904201e-06, "loss": 0.6455, "step": 12557 }, { "epoch": 0.38488414858403824, "grad_norm": 1.2870102222124433, "learning_rate": 7.045299001192743e-06, "loss": 0.5401, "step": 12558 }, { "epoch": 0.38491479710677945, "grad_norm": 1.2735141007573405, "learning_rate": 7.0448460973284325e-06, "loss": 0.6375, "step": 12559 }, { "epoch": 0.38494544562952066, "grad_norm": 1.4169463839251681, "learning_rate": 7.0443931733157285e-06, "loss": 0.7371, "step": 12560 }, { "epoch": 0.38497609415226186, "grad_norm": 1.1248860743110316, "learning_rate": 7.043940229159099e-06, "loss": 0.6178, "step": 12561 }, { "epoch": 0.38500674267500307, "grad_norm": 1.2805436292235939, "learning_rate": 7.043487264863e-06, "loss": 0.6194, "step": 12562 }, { "epoch": 0.38503739119774427, "grad_norm": 1.3667409092383618, "learning_rate": 7.0430342804318996e-06, "loss": 0.6467, "step": 12563 }, { "epoch": 0.3850680397204855, "grad_norm": 1.513083747342537, "learning_rate": 7.0425812758702595e-06, "loss": 0.6473, "step": 12564 }, { "epoch": 0.3850986882432267, "grad_norm": 1.2898844755100587, "learning_rate": 7.042128251182543e-06, "loss": 0.7172, "step": 12565 }, { "epoch": 0.3851293367659679, "grad_norm": 1.1273088026313207, "learning_rate": 7.041675206373215e-06, "loss": 0.7208, "step": 12566 }, { "epoch": 0.3851599852887091, "grad_norm": 1.1891801287837018, "learning_rate": 7.041222141446737e-06, "loss": 0.6748, "step": 12567 }, { "epoch": 0.3851906338114503, "grad_norm": 1.306464342762967, "learning_rate": 7.0407690564075795e-06, "loss": 0.7662, "step": 12568 }, { "epoch": 0.3852212823341915, "grad_norm": 1.143682640520371, "learning_rate": 7.0403159512601985e-06, "loss": 0.5933, "step": 12569 }, { "epoch": 0.3852519308569327, "grad_norm": 1.3317534846900017, "learning_rate": 7.039862826009063e-06, "loss": 0.59, "step": 12570 }, { "epoch": 0.3852825793796739, "grad_norm": 1.4047166319380844, "learning_rate": 7.039409680658636e-06, "loss": 0.7018, "step": 12571 }, { "epoch": 0.3853132279024151, "grad_norm": 1.3760626105659772, "learning_rate": 7.038956515213387e-06, "loss": 0.7333, "step": 12572 }, { "epoch": 0.38534387642515633, "grad_norm": 1.373660758274332, "learning_rate": 7.038503329677775e-06, "loss": 0.7347, "step": 12573 }, { "epoch": 0.38537452494789753, "grad_norm": 1.2618962001295377, "learning_rate": 7.03805012405627e-06, "loss": 0.691, "step": 12574 }, { "epoch": 0.38540517347063874, "grad_norm": 1.1795390905558067, "learning_rate": 7.037596898353333e-06, "loss": 0.7296, "step": 12575 }, { "epoch": 0.38543582199337995, "grad_norm": 0.486555665204736, "learning_rate": 7.037143652573435e-06, "loss": 0.4424, "step": 12576 }, { "epoch": 0.3854664705161211, "grad_norm": 1.3751611327172923, "learning_rate": 7.036690386721038e-06, "loss": 0.5959, "step": 12577 }, { "epoch": 0.3854971190388623, "grad_norm": 1.1145852465390989, "learning_rate": 7.0362371008006104e-06, "loss": 0.495, "step": 12578 }, { "epoch": 0.3855277675616035, "grad_norm": 1.297167693166599, "learning_rate": 7.035783794816616e-06, "loss": 0.7008, "step": 12579 }, { "epoch": 0.3855584160843447, "grad_norm": 1.2789488647848033, "learning_rate": 7.035330468773524e-06, "loss": 0.6222, "step": 12580 }, { "epoch": 0.3855890646070859, "grad_norm": 1.4820619451487014, "learning_rate": 7.034877122675801e-06, "loss": 0.704, "step": 12581 }, { "epoch": 0.3856197131298271, "grad_norm": 1.26619595730517, "learning_rate": 7.034423756527912e-06, "loss": 0.6515, "step": 12582 }, { "epoch": 0.38565036165256833, "grad_norm": 1.1841756332622253, "learning_rate": 7.033970370334325e-06, "loss": 0.614, "step": 12583 }, { "epoch": 0.38568101017530954, "grad_norm": 1.1028659151529085, "learning_rate": 7.033516964099508e-06, "loss": 0.6563, "step": 12584 }, { "epoch": 0.38571165869805074, "grad_norm": 1.3814501555679715, "learning_rate": 7.033063537827929e-06, "loss": 0.6641, "step": 12585 }, { "epoch": 0.38574230722079195, "grad_norm": 1.3024760589975894, "learning_rate": 7.032610091524052e-06, "loss": 0.6589, "step": 12586 }, { "epoch": 0.38577295574353315, "grad_norm": 1.2356447887844881, "learning_rate": 7.032156625192353e-06, "loss": 0.6035, "step": 12587 }, { "epoch": 0.38580360426627436, "grad_norm": 1.3277539358020127, "learning_rate": 7.031703138837289e-06, "loss": 0.7502, "step": 12588 }, { "epoch": 0.38583425278901556, "grad_norm": 1.3009753531532844, "learning_rate": 7.031249632463337e-06, "loss": 0.7031, "step": 12589 }, { "epoch": 0.38586490131175677, "grad_norm": 1.2413375878485065, "learning_rate": 7.030796106074962e-06, "loss": 0.6804, "step": 12590 }, { "epoch": 0.385895549834498, "grad_norm": 1.249627021148508, "learning_rate": 7.030342559676633e-06, "loss": 0.6055, "step": 12591 }, { "epoch": 0.3859261983572392, "grad_norm": 1.2093302414349911, "learning_rate": 7.029888993272821e-06, "loss": 0.6937, "step": 12592 }, { "epoch": 0.3859568468799804, "grad_norm": 1.3237966462434738, "learning_rate": 7.02943540686799e-06, "loss": 0.6591, "step": 12593 }, { "epoch": 0.3859874954027216, "grad_norm": 0.4980522454490326, "learning_rate": 7.028981800466617e-06, "loss": 0.4606, "step": 12594 }, { "epoch": 0.3860181439254628, "grad_norm": 1.1691179369952163, "learning_rate": 7.028528174073165e-06, "loss": 0.7132, "step": 12595 }, { "epoch": 0.386048792448204, "grad_norm": 1.366721851910489, "learning_rate": 7.028074527692106e-06, "loss": 0.7742, "step": 12596 }, { "epoch": 0.3860794409709452, "grad_norm": 1.211032216617028, "learning_rate": 7.027620861327908e-06, "loss": 0.7747, "step": 12597 }, { "epoch": 0.3861100894936864, "grad_norm": 1.2740618672516426, "learning_rate": 7.027167174985046e-06, "loss": 0.6862, "step": 12598 }, { "epoch": 0.3861407380164276, "grad_norm": 1.3014670304572984, "learning_rate": 7.026713468667985e-06, "loss": 0.7472, "step": 12599 }, { "epoch": 0.3861713865391688, "grad_norm": 1.3469276391013325, "learning_rate": 7.026259742381199e-06, "loss": 0.7035, "step": 12600 }, { "epoch": 0.38620203506191003, "grad_norm": 1.247615566671293, "learning_rate": 7.0258059961291555e-06, "loss": 0.6394, "step": 12601 }, { "epoch": 0.38623268358465124, "grad_norm": 1.006691433923189, "learning_rate": 7.025352229916329e-06, "loss": 0.5429, "step": 12602 }, { "epoch": 0.38626333210739244, "grad_norm": 0.49196217876152215, "learning_rate": 7.024898443747189e-06, "loss": 0.4119, "step": 12603 }, { "epoch": 0.38629398063013365, "grad_norm": 1.210615464636429, "learning_rate": 7.024444637626206e-06, "loss": 0.7067, "step": 12604 }, { "epoch": 0.38632462915287485, "grad_norm": 1.3051234281959727, "learning_rate": 7.023990811557851e-06, "loss": 0.5791, "step": 12605 }, { "epoch": 0.38635527767561606, "grad_norm": 1.3460381428337804, "learning_rate": 7.023536965546598e-06, "loss": 0.6854, "step": 12606 }, { "epoch": 0.38638592619835727, "grad_norm": 0.4173109159962635, "learning_rate": 7.023083099596917e-06, "loss": 0.4226, "step": 12607 }, { "epoch": 0.3864165747210984, "grad_norm": 1.3908895940931538, "learning_rate": 7.022629213713279e-06, "loss": 0.6958, "step": 12608 }, { "epoch": 0.3864472232438396, "grad_norm": 1.211692073909371, "learning_rate": 7.0221753079001595e-06, "loss": 0.6422, "step": 12609 }, { "epoch": 0.3864778717665808, "grad_norm": 1.1693514178217335, "learning_rate": 7.021721382162029e-06, "loss": 0.6092, "step": 12610 }, { "epoch": 0.38650852028932203, "grad_norm": 1.128373165032198, "learning_rate": 7.021267436503362e-06, "loss": 0.6612, "step": 12611 }, { "epoch": 0.38653916881206324, "grad_norm": 1.3733588287364882, "learning_rate": 7.0208134709286265e-06, "loss": 0.6546, "step": 12612 }, { "epoch": 0.38656981733480444, "grad_norm": 1.3630056065582055, "learning_rate": 7.020359485442302e-06, "loss": 0.6849, "step": 12613 }, { "epoch": 0.38660046585754565, "grad_norm": 1.2587434383783775, "learning_rate": 7.019905480048858e-06, "loss": 0.6718, "step": 12614 }, { "epoch": 0.38663111438028686, "grad_norm": 1.2583066068356648, "learning_rate": 7.019451454752767e-06, "loss": 0.6382, "step": 12615 }, { "epoch": 0.38666176290302806, "grad_norm": 1.1468331508732432, "learning_rate": 7.018997409558504e-06, "loss": 0.7482, "step": 12616 }, { "epoch": 0.38669241142576927, "grad_norm": 1.3348890463004126, "learning_rate": 7.018543344470544e-06, "loss": 0.726, "step": 12617 }, { "epoch": 0.3867230599485105, "grad_norm": 1.2047208136355436, "learning_rate": 7.01808925949336e-06, "loss": 0.6986, "step": 12618 }, { "epoch": 0.3867537084712517, "grad_norm": 1.2479259238661775, "learning_rate": 7.0176351546314256e-06, "loss": 0.7573, "step": 12619 }, { "epoch": 0.3867843569939929, "grad_norm": 1.2498881064229272, "learning_rate": 7.017181029889216e-06, "loss": 0.6081, "step": 12620 }, { "epoch": 0.3868150055167341, "grad_norm": 1.3137799448839789, "learning_rate": 7.016726885271206e-06, "loss": 0.6925, "step": 12621 }, { "epoch": 0.3868456540394753, "grad_norm": 0.4923610951493386, "learning_rate": 7.01627272078187e-06, "loss": 0.4377, "step": 12622 }, { "epoch": 0.3868763025622165, "grad_norm": 1.1228387704778882, "learning_rate": 7.0158185364256825e-06, "loss": 0.7453, "step": 12623 }, { "epoch": 0.3869069510849577, "grad_norm": 1.1175533444725982, "learning_rate": 7.0153643322071195e-06, "loss": 0.6454, "step": 12624 }, { "epoch": 0.3869375996076989, "grad_norm": 1.3135254586856726, "learning_rate": 7.014910108130655e-06, "loss": 0.6947, "step": 12625 }, { "epoch": 0.3869682481304401, "grad_norm": 1.3080370926481724, "learning_rate": 7.014455864200768e-06, "loss": 0.6635, "step": 12626 }, { "epoch": 0.3869988966531813, "grad_norm": 0.4756542154065871, "learning_rate": 7.01400160042193e-06, "loss": 0.4243, "step": 12627 }, { "epoch": 0.38702954517592253, "grad_norm": 1.4155209675744036, "learning_rate": 7.01354731679862e-06, "loss": 0.7558, "step": 12628 }, { "epoch": 0.38706019369866373, "grad_norm": 1.1708313896682303, "learning_rate": 7.013093013335312e-06, "loss": 0.6857, "step": 12629 }, { "epoch": 0.38709084222140494, "grad_norm": 1.3367752475736097, "learning_rate": 7.012638690036485e-06, "loss": 0.5691, "step": 12630 }, { "epoch": 0.38712149074414615, "grad_norm": 1.1716714889732365, "learning_rate": 7.012184346906612e-06, "loss": 0.6395, "step": 12631 }, { "epoch": 0.38715213926688735, "grad_norm": 1.2080884051677387, "learning_rate": 7.011729983950174e-06, "loss": 0.6932, "step": 12632 }, { "epoch": 0.38718278778962856, "grad_norm": 0.5045198575180971, "learning_rate": 7.011275601171643e-06, "loss": 0.4372, "step": 12633 }, { "epoch": 0.38721343631236976, "grad_norm": 1.2557653255089198, "learning_rate": 7.010821198575501e-06, "loss": 0.6298, "step": 12634 }, { "epoch": 0.38724408483511097, "grad_norm": 1.210176415856056, "learning_rate": 7.010366776166224e-06, "loss": 0.7202, "step": 12635 }, { "epoch": 0.3872747333578522, "grad_norm": 1.4177662200007006, "learning_rate": 7.009912333948287e-06, "loss": 0.7168, "step": 12636 }, { "epoch": 0.3873053818805934, "grad_norm": 1.1749974043706977, "learning_rate": 7.009457871926169e-06, "loss": 0.6726, "step": 12637 }, { "epoch": 0.3873360304033346, "grad_norm": 1.1537608484518724, "learning_rate": 7.009003390104351e-06, "loss": 0.6836, "step": 12638 }, { "epoch": 0.38736667892607574, "grad_norm": 1.180774810207015, "learning_rate": 7.008548888487308e-06, "loss": 0.6894, "step": 12639 }, { "epoch": 0.38739732744881694, "grad_norm": 1.3247871271477847, "learning_rate": 7.008094367079516e-06, "loss": 0.5716, "step": 12640 }, { "epoch": 0.38742797597155815, "grad_norm": 1.144625400846646, "learning_rate": 7.00763982588546e-06, "loss": 0.6613, "step": 12641 }, { "epoch": 0.38745862449429935, "grad_norm": 1.3255108573350112, "learning_rate": 7.007185264909613e-06, "loss": 0.6385, "step": 12642 }, { "epoch": 0.38748927301704056, "grad_norm": 1.2178879744623021, "learning_rate": 7.006730684156456e-06, "loss": 0.675, "step": 12643 }, { "epoch": 0.38751992153978176, "grad_norm": 0.5008723933672392, "learning_rate": 7.0062760836304685e-06, "loss": 0.4538, "step": 12644 }, { "epoch": 0.38755057006252297, "grad_norm": 1.1969089914172841, "learning_rate": 7.00582146333613e-06, "loss": 0.6261, "step": 12645 }, { "epoch": 0.3875812185852642, "grad_norm": 1.2023172860330391, "learning_rate": 7.0053668232779195e-06, "loss": 0.6465, "step": 12646 }, { "epoch": 0.3876118671080054, "grad_norm": 1.2083058753065359, "learning_rate": 7.004912163460316e-06, "loss": 0.6116, "step": 12647 }, { "epoch": 0.3876425156307466, "grad_norm": 1.1735294382584998, "learning_rate": 7.004457483887799e-06, "loss": 0.6052, "step": 12648 }, { "epoch": 0.3876731641534878, "grad_norm": 0.48570190492024357, "learning_rate": 7.004002784564852e-06, "loss": 0.4253, "step": 12649 }, { "epoch": 0.387703812676229, "grad_norm": 1.259609847880501, "learning_rate": 7.003548065495951e-06, "loss": 0.6476, "step": 12650 }, { "epoch": 0.3877344611989702, "grad_norm": 1.3461137469177042, "learning_rate": 7.003093326685578e-06, "loss": 0.6436, "step": 12651 }, { "epoch": 0.3877651097217114, "grad_norm": 1.1825881959181193, "learning_rate": 7.002638568138214e-06, "loss": 0.7412, "step": 12652 }, { "epoch": 0.3877957582444526, "grad_norm": 1.181890090204295, "learning_rate": 7.002183789858341e-06, "loss": 0.6842, "step": 12653 }, { "epoch": 0.3878264067671938, "grad_norm": 1.2851823877367337, "learning_rate": 7.00172899185044e-06, "loss": 0.6598, "step": 12654 }, { "epoch": 0.387857055289935, "grad_norm": 1.2202801730289632, "learning_rate": 7.0012741741189886e-06, "loss": 0.6388, "step": 12655 }, { "epoch": 0.38788770381267623, "grad_norm": 1.215865914449905, "learning_rate": 7.0008193366684706e-06, "loss": 0.6485, "step": 12656 }, { "epoch": 0.38791835233541744, "grad_norm": 1.4287818049582544, "learning_rate": 7.00036447950337e-06, "loss": 0.7021, "step": 12657 }, { "epoch": 0.38794900085815864, "grad_norm": 1.2101354031916813, "learning_rate": 6.999909602628164e-06, "loss": 0.6494, "step": 12658 }, { "epoch": 0.38797964938089985, "grad_norm": 1.2819769568382708, "learning_rate": 6.999454706047338e-06, "loss": 0.6037, "step": 12659 }, { "epoch": 0.38801029790364105, "grad_norm": 1.2685562316309806, "learning_rate": 6.998999789765372e-06, "loss": 0.711, "step": 12660 }, { "epoch": 0.38804094642638226, "grad_norm": 1.1978421802479506, "learning_rate": 6.998544853786753e-06, "loss": 0.7442, "step": 12661 }, { "epoch": 0.38807159494912347, "grad_norm": 0.5264745238898099, "learning_rate": 6.998089898115956e-06, "loss": 0.4078, "step": 12662 }, { "epoch": 0.38810224347186467, "grad_norm": 1.1433450370198297, "learning_rate": 6.99763492275747e-06, "loss": 0.676, "step": 12663 }, { "epoch": 0.3881328919946059, "grad_norm": 1.1860763075204674, "learning_rate": 6.997179927715777e-06, "loss": 0.7128, "step": 12664 }, { "epoch": 0.3881635405173471, "grad_norm": 1.237451872175727, "learning_rate": 6.996724912995359e-06, "loss": 0.6241, "step": 12665 }, { "epoch": 0.3881941890400883, "grad_norm": 1.299667081998215, "learning_rate": 6.996269878600698e-06, "loss": 0.6559, "step": 12666 }, { "epoch": 0.3882248375628295, "grad_norm": 1.5000844857296018, "learning_rate": 6.99581482453628e-06, "loss": 0.7833, "step": 12667 }, { "epoch": 0.3882554860855707, "grad_norm": 1.2503186380393405, "learning_rate": 6.995359750806587e-06, "loss": 0.7005, "step": 12668 }, { "epoch": 0.3882861346083119, "grad_norm": 1.2592803753730486, "learning_rate": 6.994904657416105e-06, "loss": 0.648, "step": 12669 }, { "epoch": 0.38831678313105306, "grad_norm": 1.1556765771925035, "learning_rate": 6.994449544369316e-06, "loss": 0.7501, "step": 12670 }, { "epoch": 0.38834743165379426, "grad_norm": 1.3222917867787223, "learning_rate": 6.993994411670706e-06, "loss": 0.6865, "step": 12671 }, { "epoch": 0.38837808017653547, "grad_norm": 1.4055246014079041, "learning_rate": 6.9935392593247595e-06, "loss": 0.6568, "step": 12672 }, { "epoch": 0.3884087286992767, "grad_norm": 0.4839922476031765, "learning_rate": 6.99308408733596e-06, "loss": 0.4253, "step": 12673 }, { "epoch": 0.3884393772220179, "grad_norm": 1.3842620890772397, "learning_rate": 6.992628895708792e-06, "loss": 0.7846, "step": 12674 }, { "epoch": 0.3884700257447591, "grad_norm": 1.3091815723272138, "learning_rate": 6.992173684447743e-06, "loss": 0.744, "step": 12675 }, { "epoch": 0.3885006742675003, "grad_norm": 1.2322473666020353, "learning_rate": 6.991718453557297e-06, "loss": 0.7037, "step": 12676 }, { "epoch": 0.3885313227902415, "grad_norm": 1.2599620056128005, "learning_rate": 6.991263203041938e-06, "loss": 0.6363, "step": 12677 }, { "epoch": 0.3885619713129827, "grad_norm": 1.2681581016472427, "learning_rate": 6.990807932906154e-06, "loss": 0.6494, "step": 12678 }, { "epoch": 0.3885926198357239, "grad_norm": 1.3392469226175707, "learning_rate": 6.99035264315443e-06, "loss": 0.7363, "step": 12679 }, { "epoch": 0.3886232683584651, "grad_norm": 0.4713925920571786, "learning_rate": 6.9898973337912534e-06, "loss": 0.4253, "step": 12680 }, { "epoch": 0.3886539168812063, "grad_norm": 1.2826685814241858, "learning_rate": 6.989442004821108e-06, "loss": 0.6912, "step": 12681 }, { "epoch": 0.3886845654039475, "grad_norm": 1.2488386044677557, "learning_rate": 6.988986656248482e-06, "loss": 0.6024, "step": 12682 }, { "epoch": 0.38871521392668873, "grad_norm": 1.1452488251855846, "learning_rate": 6.98853128807786e-06, "loss": 0.6672, "step": 12683 }, { "epoch": 0.38874586244942994, "grad_norm": 1.1619153890269986, "learning_rate": 6.988075900313734e-06, "loss": 0.5512, "step": 12684 }, { "epoch": 0.38877651097217114, "grad_norm": 1.1084812455136581, "learning_rate": 6.987620492960584e-06, "loss": 0.6266, "step": 12685 }, { "epoch": 0.38880715949491235, "grad_norm": 1.081675618947061, "learning_rate": 6.987165066022902e-06, "loss": 0.6626, "step": 12686 }, { "epoch": 0.38883780801765355, "grad_norm": 1.125089501008822, "learning_rate": 6.986709619505173e-06, "loss": 0.607, "step": 12687 }, { "epoch": 0.38886845654039476, "grad_norm": 1.4083450930414658, "learning_rate": 6.986254153411888e-06, "loss": 0.6305, "step": 12688 }, { "epoch": 0.38889910506313596, "grad_norm": 1.247755416995354, "learning_rate": 6.985798667747531e-06, "loss": 0.6248, "step": 12689 }, { "epoch": 0.38892975358587717, "grad_norm": 1.1295170042883453, "learning_rate": 6.985343162516591e-06, "loss": 0.6796, "step": 12690 }, { "epoch": 0.3889604021086184, "grad_norm": 1.4081911345947253, "learning_rate": 6.98488763772356e-06, "loss": 0.5926, "step": 12691 }, { "epoch": 0.3889910506313596, "grad_norm": 1.1681585861890842, "learning_rate": 6.9844320933729205e-06, "loss": 0.6727, "step": 12692 }, { "epoch": 0.3890216991541008, "grad_norm": 1.36185631420981, "learning_rate": 6.983976529469165e-06, "loss": 0.6174, "step": 12693 }, { "epoch": 0.389052347676842, "grad_norm": 1.1550258622409277, "learning_rate": 6.983520946016779e-06, "loss": 0.5959, "step": 12694 }, { "epoch": 0.3890829961995832, "grad_norm": 1.2125384834998112, "learning_rate": 6.983065343020258e-06, "loss": 0.592, "step": 12695 }, { "epoch": 0.3891136447223244, "grad_norm": 1.1454855659645071, "learning_rate": 6.982609720484082e-06, "loss": 0.7285, "step": 12696 }, { "epoch": 0.3891442932450656, "grad_norm": 1.3047488068550728, "learning_rate": 6.9821540784127485e-06, "loss": 0.7044, "step": 12697 }, { "epoch": 0.3891749417678068, "grad_norm": 0.5020992902870519, "learning_rate": 6.981698416810742e-06, "loss": 0.4273, "step": 12698 }, { "epoch": 0.389205590290548, "grad_norm": 0.5014717317119652, "learning_rate": 6.981242735682554e-06, "loss": 0.4165, "step": 12699 }, { "epoch": 0.3892362388132892, "grad_norm": 1.143440667446512, "learning_rate": 6.980787035032676e-06, "loss": 0.7065, "step": 12700 }, { "epoch": 0.3892668873360304, "grad_norm": 1.2071259059660642, "learning_rate": 6.980331314865596e-06, "loss": 0.6205, "step": 12701 }, { "epoch": 0.3892975358587716, "grad_norm": 1.258960978260185, "learning_rate": 6.9798755751858025e-06, "loss": 0.7029, "step": 12702 }, { "epoch": 0.3893281843815128, "grad_norm": 0.46644210749217907, "learning_rate": 6.979419815997791e-06, "loss": 0.4217, "step": 12703 }, { "epoch": 0.389358832904254, "grad_norm": 1.238494502874468, "learning_rate": 6.9789640373060486e-06, "loss": 0.6721, "step": 12704 }, { "epoch": 0.3893894814269952, "grad_norm": 1.1536983794108038, "learning_rate": 6.978508239115067e-06, "loss": 0.7062, "step": 12705 }, { "epoch": 0.3894201299497364, "grad_norm": 0.5206077828540646, "learning_rate": 6.9780524214293375e-06, "loss": 0.4347, "step": 12706 }, { "epoch": 0.3894507784724776, "grad_norm": 1.0928940200648363, "learning_rate": 6.977596584253352e-06, "loss": 0.6769, "step": 12707 }, { "epoch": 0.3894814269952188, "grad_norm": 0.5285202179045104, "learning_rate": 6.977140727591601e-06, "loss": 0.4321, "step": 12708 }, { "epoch": 0.38951207551796, "grad_norm": 0.4896856249148542, "learning_rate": 6.976684851448577e-06, "loss": 0.4182, "step": 12709 }, { "epoch": 0.3895427240407012, "grad_norm": 1.397913748779412, "learning_rate": 6.976228955828771e-06, "loss": 0.7345, "step": 12710 }, { "epoch": 0.38957337256344243, "grad_norm": 1.2990420287931344, "learning_rate": 6.975773040736675e-06, "loss": 0.7078, "step": 12711 }, { "epoch": 0.38960402108618364, "grad_norm": 1.2513380745634057, "learning_rate": 6.975317106176783e-06, "loss": 0.6874, "step": 12712 }, { "epoch": 0.38963466960892484, "grad_norm": 1.2712691188174374, "learning_rate": 6.9748611521535845e-06, "loss": 0.655, "step": 12713 }, { "epoch": 0.38966531813166605, "grad_norm": 1.135058667710132, "learning_rate": 6.974405178671575e-06, "loss": 0.6185, "step": 12714 }, { "epoch": 0.38969596665440726, "grad_norm": 1.2207848497636922, "learning_rate": 6.973949185735246e-06, "loss": 0.6301, "step": 12715 }, { "epoch": 0.38972661517714846, "grad_norm": 1.2797621935932912, "learning_rate": 6.973493173349089e-06, "loss": 0.6695, "step": 12716 }, { "epoch": 0.38975726369988967, "grad_norm": 1.2000333132199557, "learning_rate": 6.9730371415176014e-06, "loss": 0.6344, "step": 12717 }, { "epoch": 0.3897879122226309, "grad_norm": 1.144279463756171, "learning_rate": 6.9725810902452725e-06, "loss": 0.6255, "step": 12718 }, { "epoch": 0.3898185607453721, "grad_norm": 1.2893735427973536, "learning_rate": 6.972125019536599e-06, "loss": 0.6954, "step": 12719 }, { "epoch": 0.3898492092681133, "grad_norm": 1.3808469876410894, "learning_rate": 6.971668929396071e-06, "loss": 0.6464, "step": 12720 }, { "epoch": 0.3898798577908545, "grad_norm": 1.4655092872953264, "learning_rate": 6.971212819828185e-06, "loss": 0.6606, "step": 12721 }, { "epoch": 0.3899105063135957, "grad_norm": 0.8182243060736065, "learning_rate": 6.970756690837436e-06, "loss": 0.437, "step": 12722 }, { "epoch": 0.3899411548363369, "grad_norm": 0.7090629517556398, "learning_rate": 6.970300542428315e-06, "loss": 0.4054, "step": 12723 }, { "epoch": 0.3899718033590781, "grad_norm": 1.186770510323292, "learning_rate": 6.96984437460532e-06, "loss": 0.6021, "step": 12724 }, { "epoch": 0.3900024518818193, "grad_norm": 0.4647565030806324, "learning_rate": 6.969388187372944e-06, "loss": 0.4189, "step": 12725 }, { "epoch": 0.3900331004045605, "grad_norm": 1.3452008457429219, "learning_rate": 6.968931980735683e-06, "loss": 0.5777, "step": 12726 }, { "epoch": 0.3900637489273017, "grad_norm": 1.0907656753757125, "learning_rate": 6.968475754698032e-06, "loss": 0.6303, "step": 12727 }, { "epoch": 0.39009439745004293, "grad_norm": 1.2449003033135968, "learning_rate": 6.968019509264483e-06, "loss": 0.707, "step": 12728 }, { "epoch": 0.39012504597278413, "grad_norm": 1.32212880676443, "learning_rate": 6.967563244439537e-06, "loss": 0.6704, "step": 12729 }, { "epoch": 0.39015569449552534, "grad_norm": 1.338770274083547, "learning_rate": 6.9671069602276854e-06, "loss": 0.701, "step": 12730 }, { "epoch": 0.39018634301826655, "grad_norm": 0.808139107813196, "learning_rate": 6.966650656633424e-06, "loss": 0.4337, "step": 12731 }, { "epoch": 0.3902169915410077, "grad_norm": 1.3856054714381965, "learning_rate": 6.966194333661254e-06, "loss": 0.6952, "step": 12732 }, { "epoch": 0.3902476400637489, "grad_norm": 1.487096740187942, "learning_rate": 6.965737991315667e-06, "loss": 0.645, "step": 12733 }, { "epoch": 0.3902782885864901, "grad_norm": 1.1926944944397908, "learning_rate": 6.965281629601161e-06, "loss": 0.6958, "step": 12734 }, { "epoch": 0.3903089371092313, "grad_norm": 1.2679036464374343, "learning_rate": 6.9648252485222304e-06, "loss": 0.6837, "step": 12735 }, { "epoch": 0.3903395856319725, "grad_norm": 1.25319660976547, "learning_rate": 6.9643688480833746e-06, "loss": 0.6138, "step": 12736 }, { "epoch": 0.3903702341547137, "grad_norm": 1.372575655258163, "learning_rate": 6.96391242828909e-06, "loss": 0.6406, "step": 12737 }, { "epoch": 0.39040088267745493, "grad_norm": 0.600555334837907, "learning_rate": 6.963455989143876e-06, "loss": 0.4162, "step": 12738 }, { "epoch": 0.39043153120019614, "grad_norm": 0.5269553672562696, "learning_rate": 6.9629995306522245e-06, "loss": 0.4166, "step": 12739 }, { "epoch": 0.39046217972293734, "grad_norm": 1.287758926160336, "learning_rate": 6.962543052818638e-06, "loss": 0.6096, "step": 12740 }, { "epoch": 0.39049282824567855, "grad_norm": 0.4627932319857822, "learning_rate": 6.962086555647614e-06, "loss": 0.4224, "step": 12741 }, { "epoch": 0.39052347676841975, "grad_norm": 1.341833883317105, "learning_rate": 6.9616300391436456e-06, "loss": 0.633, "step": 12742 }, { "epoch": 0.39055412529116096, "grad_norm": 1.3763953781401388, "learning_rate": 6.961173503311237e-06, "loss": 0.6856, "step": 12743 }, { "epoch": 0.39058477381390216, "grad_norm": 0.5513993633184231, "learning_rate": 6.960716948154884e-06, "loss": 0.4133, "step": 12744 }, { "epoch": 0.39061542233664337, "grad_norm": 1.2720702581433392, "learning_rate": 6.960260373679085e-06, "loss": 0.717, "step": 12745 }, { "epoch": 0.3906460708593846, "grad_norm": 1.344414816609499, "learning_rate": 6.959803779888338e-06, "loss": 0.7015, "step": 12746 }, { "epoch": 0.3906767193821258, "grad_norm": 1.2664929359676884, "learning_rate": 6.959347166787144e-06, "loss": 0.6526, "step": 12747 }, { "epoch": 0.390707367904867, "grad_norm": 1.0948217413061054, "learning_rate": 6.95889053438e-06, "loss": 0.544, "step": 12748 }, { "epoch": 0.3907380164276082, "grad_norm": 1.138659002803271, "learning_rate": 6.958433882671408e-06, "loss": 0.6203, "step": 12749 }, { "epoch": 0.3907686649503494, "grad_norm": 1.3330807367568924, "learning_rate": 6.957977211665865e-06, "loss": 0.7842, "step": 12750 }, { "epoch": 0.3907993134730906, "grad_norm": 0.5637463764081295, "learning_rate": 6.957520521367871e-06, "loss": 0.4361, "step": 12751 }, { "epoch": 0.3908299619958318, "grad_norm": 0.5295852900792193, "learning_rate": 6.9570638117819266e-06, "loss": 0.4229, "step": 12752 }, { "epoch": 0.390860610518573, "grad_norm": 1.2319788165156442, "learning_rate": 6.9566070829125345e-06, "loss": 0.7083, "step": 12753 }, { "epoch": 0.3908912590413142, "grad_norm": 1.403290178404452, "learning_rate": 6.956150334764188e-06, "loss": 0.5629, "step": 12754 }, { "epoch": 0.3909219075640554, "grad_norm": 1.5217564312424083, "learning_rate": 6.9556935673413935e-06, "loss": 0.7411, "step": 12755 }, { "epoch": 0.39095255608679663, "grad_norm": 1.1438774840527584, "learning_rate": 6.95523678064865e-06, "loss": 0.6784, "step": 12756 }, { "epoch": 0.39098320460953784, "grad_norm": 1.3506201279220456, "learning_rate": 6.9547799746904575e-06, "loss": 0.6337, "step": 12757 }, { "epoch": 0.39101385313227904, "grad_norm": 1.2729293847434842, "learning_rate": 6.954323149471319e-06, "loss": 0.6056, "step": 12758 }, { "epoch": 0.39104450165502025, "grad_norm": 0.5621921396191478, "learning_rate": 6.953866304995733e-06, "loss": 0.4439, "step": 12759 }, { "epoch": 0.39107515017776145, "grad_norm": 0.5351299486572182, "learning_rate": 6.953409441268204e-06, "loss": 0.4347, "step": 12760 }, { "epoch": 0.39110579870050266, "grad_norm": 1.3653694775586842, "learning_rate": 6.952952558293231e-06, "loss": 0.6705, "step": 12761 }, { "epoch": 0.39113644722324387, "grad_norm": 1.175403578123421, "learning_rate": 6.952495656075318e-06, "loss": 0.6316, "step": 12762 }, { "epoch": 0.391167095745985, "grad_norm": 1.2097305199063466, "learning_rate": 6.952038734618964e-06, "loss": 0.635, "step": 12763 }, { "epoch": 0.3911977442687262, "grad_norm": 1.3824507569389117, "learning_rate": 6.951581793928674e-06, "loss": 0.6884, "step": 12764 }, { "epoch": 0.3912283927914674, "grad_norm": 0.46357763216991676, "learning_rate": 6.951124834008948e-06, "loss": 0.418, "step": 12765 }, { "epoch": 0.39125904131420863, "grad_norm": 0.5201048850822717, "learning_rate": 6.950667854864293e-06, "loss": 0.4645, "step": 12766 }, { "epoch": 0.39128968983694984, "grad_norm": 1.306914537759264, "learning_rate": 6.950210856499204e-06, "loss": 0.5257, "step": 12767 }, { "epoch": 0.39132033835969104, "grad_norm": 1.4536713919938933, "learning_rate": 6.949753838918192e-06, "loss": 0.6908, "step": 12768 }, { "epoch": 0.39135098688243225, "grad_norm": 1.3855534914711862, "learning_rate": 6.949296802125755e-06, "loss": 0.7093, "step": 12769 }, { "epoch": 0.39138163540517346, "grad_norm": 1.4390865390979486, "learning_rate": 6.948839746126399e-06, "loss": 0.5756, "step": 12770 }, { "epoch": 0.39141228392791466, "grad_norm": 1.2351570754223522, "learning_rate": 6.948382670924625e-06, "loss": 0.6078, "step": 12771 }, { "epoch": 0.39144293245065587, "grad_norm": 0.5381280296530245, "learning_rate": 6.947925576524939e-06, "loss": 0.4245, "step": 12772 }, { "epoch": 0.3914735809733971, "grad_norm": 1.1757662532332136, "learning_rate": 6.947468462931843e-06, "loss": 0.6622, "step": 12773 }, { "epoch": 0.3915042294961383, "grad_norm": 1.225270635780215, "learning_rate": 6.947011330149842e-06, "loss": 0.7146, "step": 12774 }, { "epoch": 0.3915348780188795, "grad_norm": 1.253574450058418, "learning_rate": 6.94655417818344e-06, "loss": 0.6779, "step": 12775 }, { "epoch": 0.3915655265416207, "grad_norm": 1.2098558491552711, "learning_rate": 6.9460970070371425e-06, "loss": 0.6392, "step": 12776 }, { "epoch": 0.3915961750643619, "grad_norm": 1.0943709104396788, "learning_rate": 6.945639816715454e-06, "loss": 0.6426, "step": 12777 }, { "epoch": 0.3916268235871031, "grad_norm": 1.2910197662449168, "learning_rate": 6.945182607222876e-06, "loss": 0.6761, "step": 12778 }, { "epoch": 0.3916574721098443, "grad_norm": 1.3638985301622946, "learning_rate": 6.944725378563918e-06, "loss": 0.7279, "step": 12779 }, { "epoch": 0.3916881206325855, "grad_norm": 1.1586524611465099, "learning_rate": 6.944268130743083e-06, "loss": 0.6252, "step": 12780 }, { "epoch": 0.3917187691553267, "grad_norm": 1.186272037925611, "learning_rate": 6.943810863764877e-06, "loss": 0.5783, "step": 12781 }, { "epoch": 0.3917494176780679, "grad_norm": 1.2816954409645802, "learning_rate": 6.943353577633803e-06, "loss": 0.72, "step": 12782 }, { "epoch": 0.39178006620080913, "grad_norm": 1.3984087686712907, "learning_rate": 6.9428962723543716e-06, "loss": 0.765, "step": 12783 }, { "epoch": 0.39181071472355034, "grad_norm": 1.2225682300519434, "learning_rate": 6.942438947931085e-06, "loss": 0.5998, "step": 12784 }, { "epoch": 0.39184136324629154, "grad_norm": 0.48408158062911705, "learning_rate": 6.94198160436845e-06, "loss": 0.4085, "step": 12785 }, { "epoch": 0.39187201176903275, "grad_norm": 1.147996349316709, "learning_rate": 6.941524241670975e-06, "loss": 0.6599, "step": 12786 }, { "epoch": 0.39190266029177395, "grad_norm": 1.2629018910623933, "learning_rate": 6.941066859843163e-06, "loss": 0.5942, "step": 12787 }, { "epoch": 0.39193330881451516, "grad_norm": 1.3154084415314118, "learning_rate": 6.940609458889525e-06, "loss": 0.6569, "step": 12788 }, { "epoch": 0.39196395733725636, "grad_norm": 0.44723981200791735, "learning_rate": 6.940152038814563e-06, "loss": 0.4033, "step": 12789 }, { "epoch": 0.39199460585999757, "grad_norm": 0.4711452792928197, "learning_rate": 6.939694599622788e-06, "loss": 0.4363, "step": 12790 }, { "epoch": 0.3920252543827388, "grad_norm": 1.1966004402465085, "learning_rate": 6.939237141318704e-06, "loss": 0.6565, "step": 12791 }, { "epoch": 0.39205590290548, "grad_norm": 1.4298680499410077, "learning_rate": 6.9387796639068224e-06, "loss": 0.7337, "step": 12792 }, { "epoch": 0.3920865514282212, "grad_norm": 1.1851605704649293, "learning_rate": 6.9383221673916475e-06, "loss": 0.6088, "step": 12793 }, { "epoch": 0.39211719995096234, "grad_norm": 1.1709617916726103, "learning_rate": 6.93786465177769e-06, "loss": 0.6177, "step": 12794 }, { "epoch": 0.39214784847370354, "grad_norm": 1.3977366225646128, "learning_rate": 6.937407117069454e-06, "loss": 0.6, "step": 12795 }, { "epoch": 0.39217849699644475, "grad_norm": 1.3175035578718197, "learning_rate": 6.936949563271452e-06, "loss": 0.6272, "step": 12796 }, { "epoch": 0.39220914551918595, "grad_norm": 1.2912528239730965, "learning_rate": 6.936491990388189e-06, "loss": 0.7213, "step": 12797 }, { "epoch": 0.39223979404192716, "grad_norm": 1.1680430436168014, "learning_rate": 6.936034398424175e-06, "loss": 0.6257, "step": 12798 }, { "epoch": 0.39227044256466836, "grad_norm": 1.2238149006083299, "learning_rate": 6.93557678738392e-06, "loss": 0.7011, "step": 12799 }, { "epoch": 0.39230109108740957, "grad_norm": 1.510015593115952, "learning_rate": 6.9351191572719304e-06, "loss": 0.6272, "step": 12800 }, { "epoch": 0.3923317396101508, "grad_norm": 1.231549471903329, "learning_rate": 6.9346615080927175e-06, "loss": 0.7194, "step": 12801 }, { "epoch": 0.392362388132892, "grad_norm": 0.4885520694853112, "learning_rate": 6.9342038398507875e-06, "loss": 0.4272, "step": 12802 }, { "epoch": 0.3923930366556332, "grad_norm": 0.4950650219040841, "learning_rate": 6.933746152550655e-06, "loss": 0.4342, "step": 12803 }, { "epoch": 0.3924236851783744, "grad_norm": 1.1671600121716943, "learning_rate": 6.933288446196825e-06, "loss": 0.7161, "step": 12804 }, { "epoch": 0.3924543337011156, "grad_norm": 0.4676455272734689, "learning_rate": 6.932830720793811e-06, "loss": 0.4439, "step": 12805 }, { "epoch": 0.3924849822238568, "grad_norm": 1.239363949871319, "learning_rate": 6.932372976346119e-06, "loss": 0.6791, "step": 12806 }, { "epoch": 0.392515630746598, "grad_norm": 0.4728900782919208, "learning_rate": 6.931915212858265e-06, "loss": 0.42, "step": 12807 }, { "epoch": 0.3925462792693392, "grad_norm": 1.2919364987813682, "learning_rate": 6.931457430334753e-06, "loss": 0.7654, "step": 12808 }, { "epoch": 0.3925769277920804, "grad_norm": 1.2690816955357962, "learning_rate": 6.930999628780097e-06, "loss": 0.649, "step": 12809 }, { "epoch": 0.3926075763148216, "grad_norm": 1.347688064447315, "learning_rate": 6.930541808198809e-06, "loss": 0.7272, "step": 12810 }, { "epoch": 0.39263822483756283, "grad_norm": 1.233595941800803, "learning_rate": 6.930083968595398e-06, "loss": 0.6726, "step": 12811 }, { "epoch": 0.39266887336030404, "grad_norm": 1.3569409382671924, "learning_rate": 6.929626109974377e-06, "loss": 0.7571, "step": 12812 }, { "epoch": 0.39269952188304524, "grad_norm": 1.283130201780262, "learning_rate": 6.929168232340253e-06, "loss": 0.7172, "step": 12813 }, { "epoch": 0.39273017040578645, "grad_norm": 1.3769762141986084, "learning_rate": 6.928710335697544e-06, "loss": 0.7152, "step": 12814 }, { "epoch": 0.39276081892852766, "grad_norm": 1.3105917433671292, "learning_rate": 6.9282524200507585e-06, "loss": 0.6794, "step": 12815 }, { "epoch": 0.39279146745126886, "grad_norm": 1.4296093208189071, "learning_rate": 6.927794485404407e-06, "loss": 0.7568, "step": 12816 }, { "epoch": 0.39282211597401007, "grad_norm": 1.1865235216120549, "learning_rate": 6.927336531763002e-06, "loss": 0.6842, "step": 12817 }, { "epoch": 0.39285276449675127, "grad_norm": 1.3350903028609595, "learning_rate": 6.926878559131061e-06, "loss": 0.6222, "step": 12818 }, { "epoch": 0.3928834130194925, "grad_norm": 1.1388550372305342, "learning_rate": 6.92642056751309e-06, "loss": 0.6754, "step": 12819 }, { "epoch": 0.3929140615422337, "grad_norm": 1.0985851661488102, "learning_rate": 6.925962556913605e-06, "loss": 0.5573, "step": 12820 }, { "epoch": 0.3929447100649749, "grad_norm": 1.2944120680926774, "learning_rate": 6.925504527337117e-06, "loss": 0.7074, "step": 12821 }, { "epoch": 0.3929753585877161, "grad_norm": 1.4503915399263272, "learning_rate": 6.925046478788142e-06, "loss": 0.6387, "step": 12822 }, { "epoch": 0.3930060071104573, "grad_norm": 0.5267368131494106, "learning_rate": 6.92458841127119e-06, "loss": 0.4379, "step": 12823 }, { "epoch": 0.3930366556331985, "grad_norm": 1.261939026152026, "learning_rate": 6.924130324790776e-06, "loss": 0.6451, "step": 12824 }, { "epoch": 0.39306730415593966, "grad_norm": 1.273901606758324, "learning_rate": 6.923672219351414e-06, "loss": 0.7221, "step": 12825 }, { "epoch": 0.39309795267868086, "grad_norm": 1.1883540873135665, "learning_rate": 6.923214094957618e-06, "loss": 0.6441, "step": 12826 }, { "epoch": 0.39312860120142207, "grad_norm": 1.062564097151081, "learning_rate": 6.922755951613901e-06, "loss": 0.6486, "step": 12827 }, { "epoch": 0.3931592497241633, "grad_norm": 1.2872479022039092, "learning_rate": 6.922297789324777e-06, "loss": 0.5611, "step": 12828 }, { "epoch": 0.3931898982469045, "grad_norm": 1.26822567510172, "learning_rate": 6.921839608094761e-06, "loss": 0.6848, "step": 12829 }, { "epoch": 0.3932205467696457, "grad_norm": 1.1301111025343764, "learning_rate": 6.92138140792837e-06, "loss": 0.7058, "step": 12830 }, { "epoch": 0.3932511952923869, "grad_norm": 1.3994352513669404, "learning_rate": 6.920923188830113e-06, "loss": 0.6747, "step": 12831 }, { "epoch": 0.3932818438151281, "grad_norm": 1.2092481369971926, "learning_rate": 6.9204649508045095e-06, "loss": 0.6992, "step": 12832 }, { "epoch": 0.3933124923378693, "grad_norm": 1.2157487628373471, "learning_rate": 6.920006693856074e-06, "loss": 0.6301, "step": 12833 }, { "epoch": 0.3933431408606105, "grad_norm": 1.3510851954505456, "learning_rate": 6.919548417989321e-06, "loss": 0.7555, "step": 12834 }, { "epoch": 0.3933737893833517, "grad_norm": 0.5061705757898324, "learning_rate": 6.919090123208767e-06, "loss": 0.4446, "step": 12835 }, { "epoch": 0.3934044379060929, "grad_norm": 1.0692281537367156, "learning_rate": 6.918631809518926e-06, "loss": 0.5791, "step": 12836 }, { "epoch": 0.3934350864288341, "grad_norm": 1.3487960769713856, "learning_rate": 6.918173476924316e-06, "loss": 0.7317, "step": 12837 }, { "epoch": 0.39346573495157533, "grad_norm": 1.21929823923548, "learning_rate": 6.917715125429452e-06, "loss": 0.6999, "step": 12838 }, { "epoch": 0.39349638347431654, "grad_norm": 1.1211131983107196, "learning_rate": 6.917256755038848e-06, "loss": 0.6306, "step": 12839 }, { "epoch": 0.39352703199705774, "grad_norm": 1.2208671693624111, "learning_rate": 6.916798365757025e-06, "loss": 0.6941, "step": 12840 }, { "epoch": 0.39355768051979895, "grad_norm": 1.2863003538527067, "learning_rate": 6.916339957588496e-06, "loss": 0.6978, "step": 12841 }, { "epoch": 0.39358832904254015, "grad_norm": 1.345256185171482, "learning_rate": 6.91588153053778e-06, "loss": 0.6772, "step": 12842 }, { "epoch": 0.39361897756528136, "grad_norm": 1.2327355173460817, "learning_rate": 6.915423084609392e-06, "loss": 0.6429, "step": 12843 }, { "epoch": 0.39364962608802256, "grad_norm": 1.3162380325129985, "learning_rate": 6.914964619807851e-06, "loss": 0.7205, "step": 12844 }, { "epoch": 0.39368027461076377, "grad_norm": 1.4065345465871726, "learning_rate": 6.914506136137674e-06, "loss": 0.7609, "step": 12845 }, { "epoch": 0.393710923133505, "grad_norm": 1.359160766873583, "learning_rate": 6.914047633603378e-06, "loss": 0.7132, "step": 12846 }, { "epoch": 0.3937415716562462, "grad_norm": 0.4827727970088341, "learning_rate": 6.91358911220948e-06, "loss": 0.4459, "step": 12847 }, { "epoch": 0.3937722201789874, "grad_norm": 1.26310987138152, "learning_rate": 6.913130571960499e-06, "loss": 0.7159, "step": 12848 }, { "epoch": 0.3938028687017286, "grad_norm": 1.3807002871321636, "learning_rate": 6.912672012860954e-06, "loss": 0.774, "step": 12849 }, { "epoch": 0.3938335172244698, "grad_norm": 0.44250725375764655, "learning_rate": 6.912213434915362e-06, "loss": 0.4382, "step": 12850 }, { "epoch": 0.393864165747211, "grad_norm": 1.0877420467353982, "learning_rate": 6.91175483812824e-06, "loss": 0.5995, "step": 12851 }, { "epoch": 0.3938948142699522, "grad_norm": 0.4487353635554961, "learning_rate": 6.911296222504111e-06, "loss": 0.4035, "step": 12852 }, { "epoch": 0.3939254627926934, "grad_norm": 1.2129850440919205, "learning_rate": 6.910837588047491e-06, "loss": 0.6924, "step": 12853 }, { "epoch": 0.3939561113154346, "grad_norm": 1.1641146138740182, "learning_rate": 6.910378934762898e-06, "loss": 0.6867, "step": 12854 }, { "epoch": 0.3939867598381758, "grad_norm": 1.3612241035023285, "learning_rate": 6.909920262654852e-06, "loss": 0.6864, "step": 12855 }, { "epoch": 0.394017408360917, "grad_norm": 1.1323966941664836, "learning_rate": 6.909461571727874e-06, "loss": 0.5102, "step": 12856 }, { "epoch": 0.3940480568836582, "grad_norm": 1.4401780684901198, "learning_rate": 6.909002861986485e-06, "loss": 0.6845, "step": 12857 }, { "epoch": 0.3940787054063994, "grad_norm": 1.2371554730148684, "learning_rate": 6.908544133435199e-06, "loss": 0.7019, "step": 12858 }, { "epoch": 0.3941093539291406, "grad_norm": 1.241068604387184, "learning_rate": 6.90808538607854e-06, "loss": 0.7017, "step": 12859 }, { "epoch": 0.3941400024518818, "grad_norm": 0.9910886968856224, "learning_rate": 6.907626619921027e-06, "loss": 0.5956, "step": 12860 }, { "epoch": 0.394170650974623, "grad_norm": 1.3342277669196154, "learning_rate": 6.907167834967183e-06, "loss": 0.6095, "step": 12861 }, { "epoch": 0.3942012994973642, "grad_norm": 1.31299465158576, "learning_rate": 6.906709031221524e-06, "loss": 0.7072, "step": 12862 }, { "epoch": 0.3942319480201054, "grad_norm": 1.2906899459426961, "learning_rate": 6.906250208688575e-06, "loss": 0.7022, "step": 12863 }, { "epoch": 0.3942625965428466, "grad_norm": 1.3196838465442793, "learning_rate": 6.9057913673728535e-06, "loss": 0.6745, "step": 12864 }, { "epoch": 0.3942932450655878, "grad_norm": 0.4851150073633588, "learning_rate": 6.905332507278882e-06, "loss": 0.4191, "step": 12865 }, { "epoch": 0.39432389358832903, "grad_norm": 1.3459776673486765, "learning_rate": 6.904873628411184e-06, "loss": 0.6866, "step": 12866 }, { "epoch": 0.39435454211107024, "grad_norm": 1.255820197417071, "learning_rate": 6.904414730774277e-06, "loss": 0.7132, "step": 12867 }, { "epoch": 0.39438519063381144, "grad_norm": 1.4977652492218394, "learning_rate": 6.903955814372684e-06, "loss": 0.7429, "step": 12868 }, { "epoch": 0.39441583915655265, "grad_norm": 1.1451633255452145, "learning_rate": 6.903496879210927e-06, "loss": 0.6958, "step": 12869 }, { "epoch": 0.39444648767929386, "grad_norm": 1.086132915102682, "learning_rate": 6.90303792529353e-06, "loss": 0.6042, "step": 12870 }, { "epoch": 0.39447713620203506, "grad_norm": 1.1908242359722443, "learning_rate": 6.902578952625012e-06, "loss": 0.6739, "step": 12871 }, { "epoch": 0.39450778472477627, "grad_norm": 1.2810385305533807, "learning_rate": 6.9021199612098976e-06, "loss": 0.5815, "step": 12872 }, { "epoch": 0.3945384332475175, "grad_norm": 1.4441654251002323, "learning_rate": 6.901660951052707e-06, "loss": 0.672, "step": 12873 }, { "epoch": 0.3945690817702587, "grad_norm": 1.4093188141889454, "learning_rate": 6.901201922157967e-06, "loss": 0.643, "step": 12874 }, { "epoch": 0.3945997302929999, "grad_norm": 1.4149027695613756, "learning_rate": 6.900742874530195e-06, "loss": 0.6804, "step": 12875 }, { "epoch": 0.3946303788157411, "grad_norm": 1.1024644708002236, "learning_rate": 6.90028380817392e-06, "loss": 0.6441, "step": 12876 }, { "epoch": 0.3946610273384823, "grad_norm": 1.2850948363834007, "learning_rate": 6.899824723093661e-06, "loss": 0.6999, "step": 12877 }, { "epoch": 0.3946916758612235, "grad_norm": 1.1510803402114844, "learning_rate": 6.899365619293943e-06, "loss": 0.646, "step": 12878 }, { "epoch": 0.3947223243839647, "grad_norm": 1.341772425644704, "learning_rate": 6.89890649677929e-06, "loss": 0.6989, "step": 12879 }, { "epoch": 0.3947529729067059, "grad_norm": 0.4886791534601706, "learning_rate": 6.898447355554225e-06, "loss": 0.4327, "step": 12880 }, { "epoch": 0.3947836214294471, "grad_norm": 0.4599759157643302, "learning_rate": 6.8979881956232734e-06, "loss": 0.4232, "step": 12881 }, { "epoch": 0.3948142699521883, "grad_norm": 1.1697515543571764, "learning_rate": 6.8975290169909555e-06, "loss": 0.6585, "step": 12882 }, { "epoch": 0.39484491847492953, "grad_norm": 1.1985778961663942, "learning_rate": 6.8970698196618016e-06, "loss": 0.6532, "step": 12883 }, { "epoch": 0.39487556699767073, "grad_norm": 1.4628852443875706, "learning_rate": 6.896610603640332e-06, "loss": 0.7052, "step": 12884 }, { "epoch": 0.39490621552041194, "grad_norm": 1.25509175930554, "learning_rate": 6.896151368931075e-06, "loss": 0.7087, "step": 12885 }, { "epoch": 0.39493686404315315, "grad_norm": 1.3141897533253215, "learning_rate": 6.8956921155385505e-06, "loss": 0.6814, "step": 12886 }, { "epoch": 0.3949675125658943, "grad_norm": 1.113972822819308, "learning_rate": 6.895232843467289e-06, "loss": 0.6663, "step": 12887 }, { "epoch": 0.3949981610886355, "grad_norm": 1.1319245687412023, "learning_rate": 6.894773552721812e-06, "loss": 0.5874, "step": 12888 }, { "epoch": 0.3950288096113767, "grad_norm": 1.3696474117510604, "learning_rate": 6.8943142433066466e-06, "loss": 0.7304, "step": 12889 }, { "epoch": 0.3950594581341179, "grad_norm": 1.1969066634514656, "learning_rate": 6.893854915226318e-06, "loss": 0.6974, "step": 12890 }, { "epoch": 0.3950901066568591, "grad_norm": 0.5093944466434587, "learning_rate": 6.893395568485352e-06, "loss": 0.4191, "step": 12891 }, { "epoch": 0.3951207551796003, "grad_norm": 1.064162056580802, "learning_rate": 6.892936203088278e-06, "loss": 0.6419, "step": 12892 }, { "epoch": 0.39515140370234153, "grad_norm": 1.1841007332389386, "learning_rate": 6.892476819039616e-06, "loss": 0.6556, "step": 12893 }, { "epoch": 0.39518205222508274, "grad_norm": 0.47746713952367914, "learning_rate": 6.892017416343897e-06, "loss": 0.4177, "step": 12894 }, { "epoch": 0.39521270074782394, "grad_norm": 1.310633729293069, "learning_rate": 6.891557995005646e-06, "loss": 0.7572, "step": 12895 }, { "epoch": 0.39524334927056515, "grad_norm": 0.476344466523723, "learning_rate": 6.891098555029389e-06, "loss": 0.4209, "step": 12896 }, { "epoch": 0.39527399779330635, "grad_norm": 1.1181899547633491, "learning_rate": 6.890639096419656e-06, "loss": 0.6132, "step": 12897 }, { "epoch": 0.39530464631604756, "grad_norm": 1.2225724762536696, "learning_rate": 6.8901796191809715e-06, "loss": 0.6823, "step": 12898 }, { "epoch": 0.39533529483878876, "grad_norm": 1.2103927427512, "learning_rate": 6.889720123317863e-06, "loss": 0.7117, "step": 12899 }, { "epoch": 0.39536594336152997, "grad_norm": 1.3101103368031406, "learning_rate": 6.88926060883486e-06, "loss": 0.5834, "step": 12900 }, { "epoch": 0.3953965918842712, "grad_norm": 0.46350508432225074, "learning_rate": 6.888801075736487e-06, "loss": 0.395, "step": 12901 }, { "epoch": 0.3954272404070124, "grad_norm": 1.349549828440699, "learning_rate": 6.888341524027275e-06, "loss": 0.7398, "step": 12902 }, { "epoch": 0.3954578889297536, "grad_norm": 1.286959056582448, "learning_rate": 6.8878819537117514e-06, "loss": 0.7482, "step": 12903 }, { "epoch": 0.3954885374524948, "grad_norm": 1.0874736449305662, "learning_rate": 6.887422364794443e-06, "loss": 0.6245, "step": 12904 }, { "epoch": 0.395519185975236, "grad_norm": 1.1967021112887402, "learning_rate": 6.886962757279878e-06, "loss": 0.542, "step": 12905 }, { "epoch": 0.3955498344979772, "grad_norm": 1.2318335367868316, "learning_rate": 6.8865031311725885e-06, "loss": 0.7582, "step": 12906 }, { "epoch": 0.3955804830207184, "grad_norm": 1.3063249358602125, "learning_rate": 6.8860434864771e-06, "loss": 0.5995, "step": 12907 }, { "epoch": 0.3956111315434596, "grad_norm": 1.2426184860782374, "learning_rate": 6.885583823197941e-06, "loss": 0.7533, "step": 12908 }, { "epoch": 0.3956417800662008, "grad_norm": 1.4607833598556654, "learning_rate": 6.885124141339643e-06, "loss": 0.689, "step": 12909 }, { "epoch": 0.395672428588942, "grad_norm": 1.3211404408075829, "learning_rate": 6.8846644409067345e-06, "loss": 0.7311, "step": 12910 }, { "epoch": 0.39570307711168323, "grad_norm": 1.3577286547106895, "learning_rate": 6.884204721903747e-06, "loss": 0.7137, "step": 12911 }, { "epoch": 0.39573372563442444, "grad_norm": 1.2149344216393843, "learning_rate": 6.883744984335205e-06, "loss": 0.7134, "step": 12912 }, { "epoch": 0.39576437415716564, "grad_norm": 1.3095437161621837, "learning_rate": 6.883285228205645e-06, "loss": 0.6584, "step": 12913 }, { "epoch": 0.39579502267990685, "grad_norm": 1.2762271452037848, "learning_rate": 6.882825453519591e-06, "loss": 0.6894, "step": 12914 }, { "epoch": 0.39582567120264806, "grad_norm": 1.3433721768122382, "learning_rate": 6.882365660281578e-06, "loss": 0.6329, "step": 12915 }, { "epoch": 0.39585631972538926, "grad_norm": 1.2216973592677247, "learning_rate": 6.881905848496133e-06, "loss": 0.6745, "step": 12916 }, { "epoch": 0.39588696824813047, "grad_norm": 1.297135113856185, "learning_rate": 6.88144601816779e-06, "loss": 0.6405, "step": 12917 }, { "epoch": 0.3959176167708716, "grad_norm": 1.355739446518003, "learning_rate": 6.880986169301076e-06, "loss": 0.7166, "step": 12918 }, { "epoch": 0.3959482652936128, "grad_norm": 1.1541583132639064, "learning_rate": 6.8805263019005265e-06, "loss": 0.6771, "step": 12919 }, { "epoch": 0.39597891381635403, "grad_norm": 0.5965175930146902, "learning_rate": 6.880066415970668e-06, "loss": 0.4441, "step": 12920 }, { "epoch": 0.39600956233909523, "grad_norm": 1.1763412115232847, "learning_rate": 6.879606511516035e-06, "loss": 0.62, "step": 12921 }, { "epoch": 0.39604021086183644, "grad_norm": 1.3034032158069375, "learning_rate": 6.879146588541158e-06, "loss": 0.5961, "step": 12922 }, { "epoch": 0.39607085938457764, "grad_norm": 1.2215439784437265, "learning_rate": 6.878686647050567e-06, "loss": 0.6509, "step": 12923 }, { "epoch": 0.39610150790731885, "grad_norm": 0.46224052355919604, "learning_rate": 6.878226687048798e-06, "loss": 0.4476, "step": 12924 }, { "epoch": 0.39613215643006006, "grad_norm": 1.3754917432238425, "learning_rate": 6.8777667085403795e-06, "loss": 0.7082, "step": 12925 }, { "epoch": 0.39616280495280126, "grad_norm": 1.1691423238943097, "learning_rate": 6.8773067115298476e-06, "loss": 0.6342, "step": 12926 }, { "epoch": 0.39619345347554247, "grad_norm": 1.2742757686214714, "learning_rate": 6.8768466960217306e-06, "loss": 0.7625, "step": 12927 }, { "epoch": 0.3962241019982837, "grad_norm": 1.3023450253930529, "learning_rate": 6.876386662020562e-06, "loss": 0.5501, "step": 12928 }, { "epoch": 0.3962547505210249, "grad_norm": 1.2015437201619183, "learning_rate": 6.875926609530876e-06, "loss": 0.5614, "step": 12929 }, { "epoch": 0.3962853990437661, "grad_norm": 1.100698825326681, "learning_rate": 6.875466538557207e-06, "loss": 0.6881, "step": 12930 }, { "epoch": 0.3963160475665073, "grad_norm": 1.4244401167838696, "learning_rate": 6.8750064491040845e-06, "loss": 0.6887, "step": 12931 }, { "epoch": 0.3963466960892485, "grad_norm": 1.2394630004689737, "learning_rate": 6.874546341176045e-06, "loss": 0.696, "step": 12932 }, { "epoch": 0.3963773446119897, "grad_norm": 0.4805031173022079, "learning_rate": 6.874086214777619e-06, "loss": 0.4356, "step": 12933 }, { "epoch": 0.3964079931347309, "grad_norm": 1.342932972453673, "learning_rate": 6.873626069913344e-06, "loss": 0.5919, "step": 12934 }, { "epoch": 0.3964386416574721, "grad_norm": 1.2121272329015516, "learning_rate": 6.8731659065877505e-06, "loss": 0.6946, "step": 12935 }, { "epoch": 0.3964692901802133, "grad_norm": 1.3232568930198119, "learning_rate": 6.8727057248053745e-06, "loss": 0.7949, "step": 12936 }, { "epoch": 0.3964999387029545, "grad_norm": 0.4357649971915181, "learning_rate": 6.87224552457075e-06, "loss": 0.4358, "step": 12937 }, { "epoch": 0.39653058722569573, "grad_norm": 0.4417944049854333, "learning_rate": 6.871785305888411e-06, "loss": 0.4166, "step": 12938 }, { "epoch": 0.39656123574843694, "grad_norm": 1.2445732459442587, "learning_rate": 6.8713250687628926e-06, "loss": 0.6094, "step": 12939 }, { "epoch": 0.39659188427117814, "grad_norm": 1.2798583231568494, "learning_rate": 6.87086481319873e-06, "loss": 0.6354, "step": 12940 }, { "epoch": 0.39662253279391935, "grad_norm": 1.2864310465150843, "learning_rate": 6.870404539200457e-06, "loss": 0.6494, "step": 12941 }, { "epoch": 0.39665318131666055, "grad_norm": 0.5058455852429539, "learning_rate": 6.869944246772611e-06, "loss": 0.4309, "step": 12942 }, { "epoch": 0.39668382983940176, "grad_norm": 1.2004724643371394, "learning_rate": 6.869483935919724e-06, "loss": 0.6202, "step": 12943 }, { "epoch": 0.39671447836214296, "grad_norm": 1.1942719501478476, "learning_rate": 6.869023606646334e-06, "loss": 0.7457, "step": 12944 }, { "epoch": 0.39674512688488417, "grad_norm": 0.4467103402364446, "learning_rate": 6.868563258956976e-06, "loss": 0.4276, "step": 12945 }, { "epoch": 0.3967757754076254, "grad_norm": 1.3306603496518294, "learning_rate": 6.868102892856186e-06, "loss": 0.6491, "step": 12946 }, { "epoch": 0.3968064239303666, "grad_norm": 1.2224666866692628, "learning_rate": 6.867642508348502e-06, "loss": 0.6932, "step": 12947 }, { "epoch": 0.3968370724531078, "grad_norm": 1.1627251523810946, "learning_rate": 6.867182105438457e-06, "loss": 0.6564, "step": 12948 }, { "epoch": 0.39686772097584894, "grad_norm": 1.3068696069706933, "learning_rate": 6.866721684130588e-06, "loss": 0.7358, "step": 12949 }, { "epoch": 0.39689836949859014, "grad_norm": 1.1600846019207811, "learning_rate": 6.866261244429435e-06, "loss": 0.6199, "step": 12950 }, { "epoch": 0.39692901802133135, "grad_norm": 0.46011239780756125, "learning_rate": 6.86580078633953e-06, "loss": 0.4349, "step": 12951 }, { "epoch": 0.39695966654407255, "grad_norm": 1.4567098797387026, "learning_rate": 6.865340309865413e-06, "loss": 0.6872, "step": 12952 }, { "epoch": 0.39699031506681376, "grad_norm": 1.1837200392897178, "learning_rate": 6.864879815011622e-06, "loss": 0.6708, "step": 12953 }, { "epoch": 0.39702096358955496, "grad_norm": 0.44597908357714533, "learning_rate": 6.8644193017826935e-06, "loss": 0.416, "step": 12954 }, { "epoch": 0.39705161211229617, "grad_norm": 1.3045977098708461, "learning_rate": 6.863958770183163e-06, "loss": 0.6149, "step": 12955 }, { "epoch": 0.3970822606350374, "grad_norm": 1.4730457431207498, "learning_rate": 6.86349822021757e-06, "loss": 0.6503, "step": 12956 }, { "epoch": 0.3971129091577786, "grad_norm": 1.1390690834798898, "learning_rate": 6.863037651890453e-06, "loss": 0.536, "step": 12957 }, { "epoch": 0.3971435576805198, "grad_norm": 0.44192894493147744, "learning_rate": 6.862577065206349e-06, "loss": 0.4195, "step": 12958 }, { "epoch": 0.397174206203261, "grad_norm": 1.088298802191952, "learning_rate": 6.862116460169796e-06, "loss": 0.5942, "step": 12959 }, { "epoch": 0.3972048547260022, "grad_norm": 1.153152564284779, "learning_rate": 6.8616558367853336e-06, "loss": 0.6167, "step": 12960 }, { "epoch": 0.3972355032487434, "grad_norm": 1.3689027632342659, "learning_rate": 6.861195195057501e-06, "loss": 0.7405, "step": 12961 }, { "epoch": 0.3972661517714846, "grad_norm": 1.2358182508240998, "learning_rate": 6.860734534990834e-06, "loss": 0.6378, "step": 12962 }, { "epoch": 0.3972968002942258, "grad_norm": 1.3257540576073341, "learning_rate": 6.860273856589874e-06, "loss": 0.7513, "step": 12963 }, { "epoch": 0.397327448816967, "grad_norm": 1.4306985081414598, "learning_rate": 6.859813159859161e-06, "loss": 0.612, "step": 12964 }, { "epoch": 0.3973580973397082, "grad_norm": 1.0982734969024073, "learning_rate": 6.859352444803233e-06, "loss": 0.6738, "step": 12965 }, { "epoch": 0.39738874586244943, "grad_norm": 0.4872334827028336, "learning_rate": 6.858891711426627e-06, "loss": 0.4119, "step": 12966 }, { "epoch": 0.39741939438519064, "grad_norm": 1.1152650484526883, "learning_rate": 6.858430959733888e-06, "loss": 0.6316, "step": 12967 }, { "epoch": 0.39745004290793184, "grad_norm": 1.275802641708804, "learning_rate": 6.857970189729552e-06, "loss": 0.6756, "step": 12968 }, { "epoch": 0.39748069143067305, "grad_norm": 1.3613544981803516, "learning_rate": 6.857509401418161e-06, "loss": 0.7621, "step": 12969 }, { "epoch": 0.39751133995341426, "grad_norm": 1.3488584642729178, "learning_rate": 6.857048594804254e-06, "loss": 0.6791, "step": 12970 }, { "epoch": 0.39754198847615546, "grad_norm": 1.2241552331137329, "learning_rate": 6.856587769892372e-06, "loss": 0.7568, "step": 12971 }, { "epoch": 0.39757263699889667, "grad_norm": 0.4719309000518555, "learning_rate": 6.8561269266870555e-06, "loss": 0.4134, "step": 12972 }, { "epoch": 0.3976032855216379, "grad_norm": 1.3834572042357036, "learning_rate": 6.855666065192848e-06, "loss": 0.7085, "step": 12973 }, { "epoch": 0.3976339340443791, "grad_norm": 1.3360786290100422, "learning_rate": 6.855205185414284e-06, "loss": 0.6372, "step": 12974 }, { "epoch": 0.3976645825671203, "grad_norm": 1.2080122644456064, "learning_rate": 6.854744287355912e-06, "loss": 0.6649, "step": 12975 }, { "epoch": 0.3976952310898615, "grad_norm": 1.385453101890774, "learning_rate": 6.854283371022269e-06, "loss": 0.6323, "step": 12976 }, { "epoch": 0.3977258796126027, "grad_norm": 1.1075797052683436, "learning_rate": 6.853822436417896e-06, "loss": 0.6572, "step": 12977 }, { "epoch": 0.3977565281353439, "grad_norm": 1.3053658055658481, "learning_rate": 6.853361483547338e-06, "loss": 0.6676, "step": 12978 }, { "epoch": 0.3977871766580851, "grad_norm": 1.3565041341837674, "learning_rate": 6.852900512415134e-06, "loss": 0.697, "step": 12979 }, { "epoch": 0.39781782518082626, "grad_norm": 1.2051467883586373, "learning_rate": 6.852439523025829e-06, "loss": 0.6963, "step": 12980 }, { "epoch": 0.39784847370356746, "grad_norm": 1.242614306879491, "learning_rate": 6.851978515383962e-06, "loss": 0.6651, "step": 12981 }, { "epoch": 0.39787912222630867, "grad_norm": 1.1673800281148938, "learning_rate": 6.851517489494076e-06, "loss": 0.6026, "step": 12982 }, { "epoch": 0.3979097707490499, "grad_norm": 1.2546656899710973, "learning_rate": 6.851056445360714e-06, "loss": 0.6284, "step": 12983 }, { "epoch": 0.3979404192717911, "grad_norm": 1.3204873245350313, "learning_rate": 6.850595382988422e-06, "loss": 0.7003, "step": 12984 }, { "epoch": 0.3979710677945323, "grad_norm": 1.3054525160590453, "learning_rate": 6.850134302381738e-06, "loss": 0.7161, "step": 12985 }, { "epoch": 0.3980017163172735, "grad_norm": 1.213631551217234, "learning_rate": 6.849673203545208e-06, "loss": 0.655, "step": 12986 }, { "epoch": 0.3980323648400147, "grad_norm": 1.2375454498255398, "learning_rate": 6.849212086483374e-06, "loss": 0.7059, "step": 12987 }, { "epoch": 0.3980630133627559, "grad_norm": 1.283728745995888, "learning_rate": 6.848750951200782e-06, "loss": 0.7017, "step": 12988 }, { "epoch": 0.3980936618854971, "grad_norm": 1.7507567385762202, "learning_rate": 6.848289797701972e-06, "loss": 0.5803, "step": 12989 }, { "epoch": 0.3981243104082383, "grad_norm": 1.374554903513405, "learning_rate": 6.847828625991492e-06, "loss": 0.7505, "step": 12990 }, { "epoch": 0.3981549589309795, "grad_norm": 1.2413609117350344, "learning_rate": 6.847367436073881e-06, "loss": 0.6642, "step": 12991 }, { "epoch": 0.3981856074537207, "grad_norm": 1.4511720013192213, "learning_rate": 6.8469062279536865e-06, "loss": 0.7475, "step": 12992 }, { "epoch": 0.39821625597646193, "grad_norm": 1.2498007731875684, "learning_rate": 6.8464450016354546e-06, "loss": 0.7608, "step": 12993 }, { "epoch": 0.39824690449920314, "grad_norm": 1.946094870507122, "learning_rate": 6.845983757123726e-06, "loss": 0.701, "step": 12994 }, { "epoch": 0.39827755302194434, "grad_norm": 1.1618974514921403, "learning_rate": 6.845522494423047e-06, "loss": 0.6126, "step": 12995 }, { "epoch": 0.39830820154468555, "grad_norm": 0.4921021626193126, "learning_rate": 6.845061213537962e-06, "loss": 0.4098, "step": 12996 }, { "epoch": 0.39833885006742675, "grad_norm": 1.318559750509284, "learning_rate": 6.844599914473019e-06, "loss": 0.6912, "step": 12997 }, { "epoch": 0.39836949859016796, "grad_norm": 1.233273241640953, "learning_rate": 6.844138597232759e-06, "loss": 0.5988, "step": 12998 }, { "epoch": 0.39840014711290916, "grad_norm": 1.2349265157281342, "learning_rate": 6.843677261821732e-06, "loss": 0.7599, "step": 12999 }, { "epoch": 0.39843079563565037, "grad_norm": 1.3196352219470362, "learning_rate": 6.843215908244478e-06, "loss": 0.7725, "step": 13000 }, { "epoch": 0.3984614441583916, "grad_norm": 1.4519569929811293, "learning_rate": 6.842754536505549e-06, "loss": 0.7962, "step": 13001 }, { "epoch": 0.3984920926811328, "grad_norm": 1.1576632166655747, "learning_rate": 6.842293146609485e-06, "loss": 0.6506, "step": 13002 }, { "epoch": 0.398522741203874, "grad_norm": 1.3925762688526464, "learning_rate": 6.841831738560838e-06, "loss": 0.7568, "step": 13003 }, { "epoch": 0.3985533897266152, "grad_norm": 1.2801203541278001, "learning_rate": 6.841370312364151e-06, "loss": 0.6117, "step": 13004 }, { "epoch": 0.3985840382493564, "grad_norm": 1.4718006325190927, "learning_rate": 6.84090886802397e-06, "loss": 0.5952, "step": 13005 }, { "epoch": 0.3986146867720976, "grad_norm": 1.275487990607227, "learning_rate": 6.8404474055448434e-06, "loss": 0.6966, "step": 13006 }, { "epoch": 0.3986453352948388, "grad_norm": 0.48789928890758816, "learning_rate": 6.8399859249313186e-06, "loss": 0.4123, "step": 13007 }, { "epoch": 0.39867598381758, "grad_norm": 1.3553425751611563, "learning_rate": 6.839524426187941e-06, "loss": 0.7563, "step": 13008 }, { "epoch": 0.3987066323403212, "grad_norm": 0.5022351496224601, "learning_rate": 6.839062909319258e-06, "loss": 0.4328, "step": 13009 }, { "epoch": 0.3987372808630624, "grad_norm": 1.2276435986677667, "learning_rate": 6.838601374329819e-06, "loss": 0.7408, "step": 13010 }, { "epoch": 0.3987679293858036, "grad_norm": 1.2970708786370535, "learning_rate": 6.838139821224169e-06, "loss": 0.5905, "step": 13011 }, { "epoch": 0.3987985779085448, "grad_norm": 1.3552559338580694, "learning_rate": 6.837678250006859e-06, "loss": 0.6565, "step": 13012 }, { "epoch": 0.398829226431286, "grad_norm": 1.2240067482145243, "learning_rate": 6.837216660682432e-06, "loss": 0.6888, "step": 13013 }, { "epoch": 0.3988598749540272, "grad_norm": 1.2695330404778895, "learning_rate": 6.83675505325544e-06, "loss": 0.6578, "step": 13014 }, { "epoch": 0.3988905234767684, "grad_norm": 1.322857119016737, "learning_rate": 6.836293427730431e-06, "loss": 0.6711, "step": 13015 }, { "epoch": 0.3989211719995096, "grad_norm": 1.257023275113706, "learning_rate": 6.835831784111955e-06, "loss": 0.7182, "step": 13016 }, { "epoch": 0.3989518205222508, "grad_norm": 1.2842235364420498, "learning_rate": 6.835370122404555e-06, "loss": 0.6615, "step": 13017 }, { "epoch": 0.398982469044992, "grad_norm": 0.5277574898950729, "learning_rate": 6.834908442612786e-06, "loss": 0.4312, "step": 13018 }, { "epoch": 0.3990131175677332, "grad_norm": 1.2146553530842492, "learning_rate": 6.834446744741195e-06, "loss": 0.5958, "step": 13019 }, { "epoch": 0.3990437660904744, "grad_norm": 1.08344942738314, "learning_rate": 6.8339850287943285e-06, "loss": 0.5559, "step": 13020 }, { "epoch": 0.39907441461321563, "grad_norm": 1.2742813982711267, "learning_rate": 6.83352329477674e-06, "loss": 0.6723, "step": 13021 }, { "epoch": 0.39910506313595684, "grad_norm": 1.2328053127502878, "learning_rate": 6.833061542692976e-06, "loss": 0.6532, "step": 13022 }, { "epoch": 0.39913571165869804, "grad_norm": 1.2415744852503694, "learning_rate": 6.83259977254759e-06, "loss": 0.6044, "step": 13023 }, { "epoch": 0.39916636018143925, "grad_norm": 1.1025076208289908, "learning_rate": 6.8321379843451286e-06, "loss": 0.6191, "step": 13024 }, { "epoch": 0.39919700870418046, "grad_norm": 1.2965618041964375, "learning_rate": 6.831676178090142e-06, "loss": 0.7318, "step": 13025 }, { "epoch": 0.39922765722692166, "grad_norm": 1.176421997950545, "learning_rate": 6.831214353787182e-06, "loss": 0.6038, "step": 13026 }, { "epoch": 0.39925830574966287, "grad_norm": 1.174213170483428, "learning_rate": 6.8307525114407994e-06, "loss": 0.7401, "step": 13027 }, { "epoch": 0.3992889542724041, "grad_norm": 1.2433938338172383, "learning_rate": 6.830290651055541e-06, "loss": 0.5963, "step": 13028 }, { "epoch": 0.3993196027951453, "grad_norm": 0.47218462501729846, "learning_rate": 6.8298287726359625e-06, "loss": 0.4165, "step": 13029 }, { "epoch": 0.3993502513178865, "grad_norm": 1.221309511293209, "learning_rate": 6.829366876186614e-06, "loss": 0.6872, "step": 13030 }, { "epoch": 0.3993808998406277, "grad_norm": 1.2978762065907998, "learning_rate": 6.828904961712043e-06, "loss": 0.6114, "step": 13031 }, { "epoch": 0.3994115483633689, "grad_norm": 1.4001263756901703, "learning_rate": 6.828443029216805e-06, "loss": 0.7016, "step": 13032 }, { "epoch": 0.3994421968861101, "grad_norm": 1.229319329083697, "learning_rate": 6.82798107870545e-06, "loss": 0.6544, "step": 13033 }, { "epoch": 0.3994728454088513, "grad_norm": 1.2064362242936049, "learning_rate": 6.82751911018253e-06, "loss": 0.7245, "step": 13034 }, { "epoch": 0.3995034939315925, "grad_norm": 1.1479995996518795, "learning_rate": 6.8270571236525955e-06, "loss": 0.6955, "step": 13035 }, { "epoch": 0.3995341424543337, "grad_norm": 1.299816406308221, "learning_rate": 6.8265951191202005e-06, "loss": 0.803, "step": 13036 }, { "epoch": 0.3995647909770749, "grad_norm": 1.1568447288393475, "learning_rate": 6.826133096589895e-06, "loss": 0.7363, "step": 13037 }, { "epoch": 0.39959543949981613, "grad_norm": 1.297196907745472, "learning_rate": 6.825671056066237e-06, "loss": 0.6926, "step": 13038 }, { "epoch": 0.39962608802255734, "grad_norm": 1.140654425039958, "learning_rate": 6.8252089975537705e-06, "loss": 0.6699, "step": 13039 }, { "epoch": 0.39965673654529854, "grad_norm": 1.283478903672416, "learning_rate": 6.824746921057054e-06, "loss": 0.6827, "step": 13040 }, { "epoch": 0.39968738506803975, "grad_norm": 1.3826453474274294, "learning_rate": 6.824284826580639e-06, "loss": 0.6931, "step": 13041 }, { "epoch": 0.3997180335907809, "grad_norm": 1.277867884236692, "learning_rate": 6.82382271412908e-06, "loss": 0.7113, "step": 13042 }, { "epoch": 0.3997486821135221, "grad_norm": 1.2225033611848333, "learning_rate": 6.823360583706928e-06, "loss": 0.512, "step": 13043 }, { "epoch": 0.3997793306362633, "grad_norm": 0.5259677605656459, "learning_rate": 6.822898435318739e-06, "loss": 0.4124, "step": 13044 }, { "epoch": 0.3998099791590045, "grad_norm": 1.15690583328123, "learning_rate": 6.822436268969064e-06, "loss": 0.656, "step": 13045 }, { "epoch": 0.3998406276817457, "grad_norm": 1.1585165841076943, "learning_rate": 6.821974084662458e-06, "loss": 0.5626, "step": 13046 }, { "epoch": 0.3998712762044869, "grad_norm": 0.45705415122575443, "learning_rate": 6.821511882403477e-06, "loss": 0.434, "step": 13047 }, { "epoch": 0.39990192472722813, "grad_norm": 1.303176622101622, "learning_rate": 6.82104966219667e-06, "loss": 0.6386, "step": 13048 }, { "epoch": 0.39993257324996934, "grad_norm": 0.43262724462046465, "learning_rate": 6.820587424046598e-06, "loss": 0.418, "step": 13049 }, { "epoch": 0.39996322177271054, "grad_norm": 1.302843317202899, "learning_rate": 6.820125167957812e-06, "loss": 0.5596, "step": 13050 }, { "epoch": 0.39999387029545175, "grad_norm": 1.243307615702497, "learning_rate": 6.819662893934866e-06, "loss": 0.678, "step": 13051 }, { "epoch": 0.40002451881819295, "grad_norm": 0.4651919622530417, "learning_rate": 6.819200601982316e-06, "loss": 0.4112, "step": 13052 }, { "epoch": 0.40005516734093416, "grad_norm": 0.4547222109759282, "learning_rate": 6.818738292104719e-06, "loss": 0.437, "step": 13053 }, { "epoch": 0.40008581586367536, "grad_norm": 0.443510234628293, "learning_rate": 6.818275964306624e-06, "loss": 0.4307, "step": 13054 }, { "epoch": 0.40011646438641657, "grad_norm": 0.43337158767425443, "learning_rate": 6.817813618592595e-06, "loss": 0.4316, "step": 13055 }, { "epoch": 0.4001471129091578, "grad_norm": 1.1489500932246517, "learning_rate": 6.817351254967179e-06, "loss": 0.7621, "step": 13056 }, { "epoch": 0.400177761431899, "grad_norm": 1.3893040285974385, "learning_rate": 6.816888873434939e-06, "loss": 0.6995, "step": 13057 }, { "epoch": 0.4002084099546402, "grad_norm": 1.1473321311428015, "learning_rate": 6.816426474000428e-06, "loss": 0.6728, "step": 13058 }, { "epoch": 0.4002390584773814, "grad_norm": 1.3571971615217788, "learning_rate": 6.815964056668203e-06, "loss": 0.6782, "step": 13059 }, { "epoch": 0.4002697070001226, "grad_norm": 1.2033782072772987, "learning_rate": 6.815501621442817e-06, "loss": 0.6915, "step": 13060 }, { "epoch": 0.4003003555228638, "grad_norm": 1.2368560910573556, "learning_rate": 6.815039168328831e-06, "loss": 0.6914, "step": 13061 }, { "epoch": 0.400331004045605, "grad_norm": 1.2805128741936853, "learning_rate": 6.814576697330799e-06, "loss": 0.7025, "step": 13062 }, { "epoch": 0.4003616525683462, "grad_norm": 1.2354035534660532, "learning_rate": 6.814114208453277e-06, "loss": 0.6392, "step": 13063 }, { "epoch": 0.4003923010910874, "grad_norm": 1.1461350161310442, "learning_rate": 6.813651701700826e-06, "loss": 0.6383, "step": 13064 }, { "epoch": 0.4004229496138286, "grad_norm": 0.4817813395706102, "learning_rate": 6.813189177078e-06, "loss": 0.4272, "step": 13065 }, { "epoch": 0.40045359813656983, "grad_norm": 1.2714316859807997, "learning_rate": 6.812726634589357e-06, "loss": 0.643, "step": 13066 }, { "epoch": 0.40048424665931104, "grad_norm": 1.2466957268348595, "learning_rate": 6.812264074239454e-06, "loss": 0.6252, "step": 13067 }, { "epoch": 0.40051489518205224, "grad_norm": 1.4050238234770291, "learning_rate": 6.8118014960328506e-06, "loss": 0.7063, "step": 13068 }, { "epoch": 0.40054554370479345, "grad_norm": 1.4810007020454796, "learning_rate": 6.811338899974102e-06, "loss": 0.7174, "step": 13069 }, { "epoch": 0.40057619222753466, "grad_norm": 1.3000884500810517, "learning_rate": 6.8108762860677695e-06, "loss": 0.6669, "step": 13070 }, { "epoch": 0.40060684075027586, "grad_norm": 1.2176746211651155, "learning_rate": 6.810413654318409e-06, "loss": 0.6597, "step": 13071 }, { "epoch": 0.40063748927301707, "grad_norm": 1.3172864162206106, "learning_rate": 6.809951004730578e-06, "loss": 0.6318, "step": 13072 }, { "epoch": 0.4006681377957582, "grad_norm": 1.1115526774753528, "learning_rate": 6.8094883373088385e-06, "loss": 0.5816, "step": 13073 }, { "epoch": 0.4006987863184994, "grad_norm": 1.3143821288752873, "learning_rate": 6.809025652057747e-06, "loss": 0.6782, "step": 13074 }, { "epoch": 0.40072943484124063, "grad_norm": 1.1923623404329813, "learning_rate": 6.808562948981863e-06, "loss": 0.6281, "step": 13075 }, { "epoch": 0.40076008336398183, "grad_norm": 1.2215753874759294, "learning_rate": 6.808100228085745e-06, "loss": 0.7244, "step": 13076 }, { "epoch": 0.40079073188672304, "grad_norm": 1.0204348019813203, "learning_rate": 6.807637489373954e-06, "loss": 0.6239, "step": 13077 }, { "epoch": 0.40082138040946425, "grad_norm": 1.3370795478607431, "learning_rate": 6.807174732851046e-06, "loss": 0.6577, "step": 13078 }, { "epoch": 0.40085202893220545, "grad_norm": 1.0451825477096748, "learning_rate": 6.806711958521584e-06, "loss": 0.5526, "step": 13079 }, { "epoch": 0.40088267745494666, "grad_norm": 1.1284618150093375, "learning_rate": 6.806249166390129e-06, "loss": 0.5756, "step": 13080 }, { "epoch": 0.40091332597768786, "grad_norm": 1.4065147760834928, "learning_rate": 6.805786356461237e-06, "loss": 0.692, "step": 13081 }, { "epoch": 0.40094397450042907, "grad_norm": 1.0699810028605936, "learning_rate": 6.80532352873947e-06, "loss": 0.6308, "step": 13082 }, { "epoch": 0.4009746230231703, "grad_norm": 1.1257928534448651, "learning_rate": 6.804860683229387e-06, "loss": 0.5314, "step": 13083 }, { "epoch": 0.4010052715459115, "grad_norm": 0.5266449027486186, "learning_rate": 6.804397819935552e-06, "loss": 0.4134, "step": 13084 }, { "epoch": 0.4010359200686527, "grad_norm": 1.185606384888321, "learning_rate": 6.803934938862523e-06, "loss": 0.5642, "step": 13085 }, { "epoch": 0.4010665685913939, "grad_norm": 1.1616055455397865, "learning_rate": 6.803472040014862e-06, "loss": 0.6694, "step": 13086 }, { "epoch": 0.4010972171141351, "grad_norm": 1.368331486262509, "learning_rate": 6.803009123397128e-06, "loss": 0.6978, "step": 13087 }, { "epoch": 0.4011278656368763, "grad_norm": 1.216760630944334, "learning_rate": 6.802546189013886e-06, "loss": 0.6929, "step": 13088 }, { "epoch": 0.4011585141596175, "grad_norm": 1.1919615205154752, "learning_rate": 6.802083236869692e-06, "loss": 0.6267, "step": 13089 }, { "epoch": 0.4011891626823587, "grad_norm": 1.2556179786278439, "learning_rate": 6.801620266969113e-06, "loss": 0.6405, "step": 13090 }, { "epoch": 0.4012198112050999, "grad_norm": 1.4676897489269651, "learning_rate": 6.801157279316708e-06, "loss": 0.6579, "step": 13091 }, { "epoch": 0.4012504597278411, "grad_norm": 1.2010839157273592, "learning_rate": 6.800694273917041e-06, "loss": 0.6344, "step": 13092 }, { "epoch": 0.40128110825058233, "grad_norm": 1.154168683846428, "learning_rate": 6.80023125077467e-06, "loss": 0.6243, "step": 13093 }, { "epoch": 0.40131175677332354, "grad_norm": 1.3212742317038129, "learning_rate": 6.799768209894162e-06, "loss": 0.7255, "step": 13094 }, { "epoch": 0.40134240529606474, "grad_norm": 1.196778504660594, "learning_rate": 6.799305151280076e-06, "loss": 0.6622, "step": 13095 }, { "epoch": 0.40137305381880595, "grad_norm": 1.1897651536466467, "learning_rate": 6.798842074936978e-06, "loss": 0.6116, "step": 13096 }, { "epoch": 0.40140370234154715, "grad_norm": 1.248687308616545, "learning_rate": 6.7983789808694255e-06, "loss": 0.625, "step": 13097 }, { "epoch": 0.40143435086428836, "grad_norm": 1.1503310350118099, "learning_rate": 6.7979158690819865e-06, "loss": 0.6579, "step": 13098 }, { "epoch": 0.40146499938702956, "grad_norm": 1.2784117159643305, "learning_rate": 6.797452739579223e-06, "loss": 0.6855, "step": 13099 }, { "epoch": 0.40149564790977077, "grad_norm": 0.4684179948009856, "learning_rate": 6.796989592365697e-06, "loss": 0.4043, "step": 13100 }, { "epoch": 0.401526296432512, "grad_norm": 1.133926565583266, "learning_rate": 6.796526427445973e-06, "loss": 0.6596, "step": 13101 }, { "epoch": 0.4015569449552532, "grad_norm": 1.350242938427619, "learning_rate": 6.796063244824613e-06, "loss": 0.717, "step": 13102 }, { "epoch": 0.4015875934779944, "grad_norm": 1.5509375497595779, "learning_rate": 6.7956000445061856e-06, "loss": 0.7685, "step": 13103 }, { "epoch": 0.40161824200073554, "grad_norm": 1.1515701690084374, "learning_rate": 6.795136826495249e-06, "loss": 0.6004, "step": 13104 }, { "epoch": 0.40164889052347674, "grad_norm": 1.564562621679609, "learning_rate": 6.7946735907963715e-06, "loss": 0.6991, "step": 13105 }, { "epoch": 0.40167953904621795, "grad_norm": 1.2179551152979515, "learning_rate": 6.794210337414113e-06, "loss": 0.6777, "step": 13106 }, { "epoch": 0.40171018756895915, "grad_norm": 1.1680775207389489, "learning_rate": 6.793747066353044e-06, "loss": 0.6696, "step": 13107 }, { "epoch": 0.40174083609170036, "grad_norm": 1.140369236921735, "learning_rate": 6.793283777617725e-06, "loss": 0.6828, "step": 13108 }, { "epoch": 0.40177148461444157, "grad_norm": 1.274692906738757, "learning_rate": 6.792820471212724e-06, "loss": 0.6925, "step": 13109 }, { "epoch": 0.40180213313718277, "grad_norm": 1.1484906266651793, "learning_rate": 6.792357147142601e-06, "loss": 0.6555, "step": 13110 }, { "epoch": 0.401832781659924, "grad_norm": 1.3498385202837626, "learning_rate": 6.791893805411928e-06, "loss": 0.712, "step": 13111 }, { "epoch": 0.4018634301826652, "grad_norm": 1.1429332557567005, "learning_rate": 6.791430446025263e-06, "loss": 0.576, "step": 13112 }, { "epoch": 0.4018940787054064, "grad_norm": 1.3742944192142352, "learning_rate": 6.790967068987177e-06, "loss": 0.7266, "step": 13113 }, { "epoch": 0.4019247272281476, "grad_norm": 1.1995873887064408, "learning_rate": 6.790503674302235e-06, "loss": 0.6456, "step": 13114 }, { "epoch": 0.4019553757508888, "grad_norm": 0.45449971557179536, "learning_rate": 6.7900402619750015e-06, "loss": 0.4284, "step": 13115 }, { "epoch": 0.40198602427363, "grad_norm": 1.327450069260988, "learning_rate": 6.789576832010044e-06, "loss": 0.6302, "step": 13116 }, { "epoch": 0.4020166727963712, "grad_norm": 1.3387475440429957, "learning_rate": 6.7891133844119276e-06, "loss": 0.6434, "step": 13117 }, { "epoch": 0.4020473213191124, "grad_norm": 0.4565875343239809, "learning_rate": 6.788649919185218e-06, "loss": 0.422, "step": 13118 }, { "epoch": 0.4020779698418536, "grad_norm": 0.47220524968195116, "learning_rate": 6.788186436334485e-06, "loss": 0.4339, "step": 13119 }, { "epoch": 0.4021086183645948, "grad_norm": 1.1917937997983135, "learning_rate": 6.787722935864294e-06, "loss": 0.6333, "step": 13120 }, { "epoch": 0.40213926688733603, "grad_norm": 1.194016848281292, "learning_rate": 6.787259417779209e-06, "loss": 0.6938, "step": 13121 }, { "epoch": 0.40216991541007724, "grad_norm": 1.1322807982080438, "learning_rate": 6.786795882083801e-06, "loss": 0.6354, "step": 13122 }, { "epoch": 0.40220056393281844, "grad_norm": 0.4458440766437505, "learning_rate": 6.7863323287826365e-06, "loss": 0.4218, "step": 13123 }, { "epoch": 0.40223121245555965, "grad_norm": 0.4259296645079342, "learning_rate": 6.785868757880283e-06, "loss": 0.4193, "step": 13124 }, { "epoch": 0.40226186097830086, "grad_norm": 0.4352628280343277, "learning_rate": 6.785405169381305e-06, "loss": 0.4047, "step": 13125 }, { "epoch": 0.40229250950104206, "grad_norm": 1.2216492257525005, "learning_rate": 6.784941563290276e-06, "loss": 0.6444, "step": 13126 }, { "epoch": 0.40232315802378327, "grad_norm": 1.1515126684067432, "learning_rate": 6.78447793961176e-06, "loss": 0.6855, "step": 13127 }, { "epoch": 0.4023538065465245, "grad_norm": 1.1215664558541147, "learning_rate": 6.784014298350326e-06, "loss": 0.6788, "step": 13128 }, { "epoch": 0.4023844550692657, "grad_norm": 1.2741784546257897, "learning_rate": 6.783550639510542e-06, "loss": 0.7296, "step": 13129 }, { "epoch": 0.4024151035920069, "grad_norm": 0.4498314367193098, "learning_rate": 6.783086963096979e-06, "loss": 0.4201, "step": 13130 }, { "epoch": 0.4024457521147481, "grad_norm": 1.3409747173004756, "learning_rate": 6.782623269114203e-06, "loss": 0.6606, "step": 13131 }, { "epoch": 0.4024764006374893, "grad_norm": 1.217500727250871, "learning_rate": 6.782159557566783e-06, "loss": 0.6309, "step": 13132 }, { "epoch": 0.4025070491602305, "grad_norm": 1.2874627984975808, "learning_rate": 6.7816958284592896e-06, "loss": 0.677, "step": 13133 }, { "epoch": 0.4025376976829717, "grad_norm": 1.0862355622781956, "learning_rate": 6.781232081796292e-06, "loss": 0.6674, "step": 13134 }, { "epoch": 0.40256834620571286, "grad_norm": 1.355892259587688, "learning_rate": 6.780768317582358e-06, "loss": 0.6925, "step": 13135 }, { "epoch": 0.40259899472845406, "grad_norm": 1.1702427550517678, "learning_rate": 6.7803045358220575e-06, "loss": 0.6709, "step": 13136 }, { "epoch": 0.40262964325119527, "grad_norm": 0.4914625188848046, "learning_rate": 6.7798407365199624e-06, "loss": 0.4145, "step": 13137 }, { "epoch": 0.4026602917739365, "grad_norm": 1.30612439226852, "learning_rate": 6.7793769196806414e-06, "loss": 0.6421, "step": 13138 }, { "epoch": 0.4026909402966777, "grad_norm": 0.4548845466816998, "learning_rate": 6.778913085308663e-06, "loss": 0.4358, "step": 13139 }, { "epoch": 0.4027215888194189, "grad_norm": 1.169526955541012, "learning_rate": 6.7784492334086e-06, "loss": 0.5917, "step": 13140 }, { "epoch": 0.4027522373421601, "grad_norm": 1.2949320737921612, "learning_rate": 6.77798536398502e-06, "loss": 0.7227, "step": 13141 }, { "epoch": 0.4027828858649013, "grad_norm": 0.4738354096176864, "learning_rate": 6.777521477042497e-06, "loss": 0.4443, "step": 13142 }, { "epoch": 0.4028135343876425, "grad_norm": 1.0705760353993283, "learning_rate": 6.777057572585599e-06, "loss": 0.5687, "step": 13143 }, { "epoch": 0.4028441829103837, "grad_norm": 1.2746045802999855, "learning_rate": 6.776593650618899e-06, "loss": 0.7429, "step": 13144 }, { "epoch": 0.4028748314331249, "grad_norm": 1.1275993676194727, "learning_rate": 6.776129711146966e-06, "loss": 0.7073, "step": 13145 }, { "epoch": 0.4029054799558661, "grad_norm": 1.3207043373423426, "learning_rate": 6.775665754174374e-06, "loss": 0.7047, "step": 13146 }, { "epoch": 0.4029361284786073, "grad_norm": 1.290962899323848, "learning_rate": 6.775201779705692e-06, "loss": 0.7236, "step": 13147 }, { "epoch": 0.40296677700134853, "grad_norm": 1.2857377262186707, "learning_rate": 6.774737787745492e-06, "loss": 0.6308, "step": 13148 }, { "epoch": 0.40299742552408974, "grad_norm": 1.2681185479121189, "learning_rate": 6.774273778298347e-06, "loss": 0.7626, "step": 13149 }, { "epoch": 0.40302807404683094, "grad_norm": 1.2159908158094308, "learning_rate": 6.773809751368831e-06, "loss": 0.6233, "step": 13150 }, { "epoch": 0.40305872256957215, "grad_norm": 0.4761885444005401, "learning_rate": 6.773345706961509e-06, "loss": 0.415, "step": 13151 }, { "epoch": 0.40308937109231335, "grad_norm": 1.1346466994751074, "learning_rate": 6.772881645080962e-06, "loss": 0.6032, "step": 13152 }, { "epoch": 0.40312001961505456, "grad_norm": 1.5453023596909936, "learning_rate": 6.772417565731756e-06, "loss": 0.7446, "step": 13153 }, { "epoch": 0.40315066813779576, "grad_norm": 1.1373402225260543, "learning_rate": 6.771953468918467e-06, "loss": 0.5773, "step": 13154 }, { "epoch": 0.40318131666053697, "grad_norm": 1.1438608752714265, "learning_rate": 6.771489354645668e-06, "loss": 0.6583, "step": 13155 }, { "epoch": 0.4032119651832782, "grad_norm": 1.0338992853300086, "learning_rate": 6.771025222917931e-06, "loss": 0.6547, "step": 13156 }, { "epoch": 0.4032426137060194, "grad_norm": 1.2304646594104565, "learning_rate": 6.77056107373983e-06, "loss": 0.6986, "step": 13157 }, { "epoch": 0.4032732622287606, "grad_norm": 1.3080348379697604, "learning_rate": 6.770096907115935e-06, "loss": 0.6841, "step": 13158 }, { "epoch": 0.4033039107515018, "grad_norm": 1.3066256065370547, "learning_rate": 6.769632723050824e-06, "loss": 0.6808, "step": 13159 }, { "epoch": 0.403334559274243, "grad_norm": 1.3441651780787167, "learning_rate": 6.769168521549069e-06, "loss": 0.6919, "step": 13160 }, { "epoch": 0.4033652077969842, "grad_norm": 1.2238854390215625, "learning_rate": 6.768704302615245e-06, "loss": 0.648, "step": 13161 }, { "epoch": 0.4033958563197254, "grad_norm": 1.2150024571521754, "learning_rate": 6.768240066253923e-06, "loss": 0.6414, "step": 13162 }, { "epoch": 0.4034265048424666, "grad_norm": 1.25537954307695, "learning_rate": 6.767775812469679e-06, "loss": 0.7947, "step": 13163 }, { "epoch": 0.4034571533652078, "grad_norm": 1.1999315286599803, "learning_rate": 6.767311541267089e-06, "loss": 0.6934, "step": 13164 }, { "epoch": 0.403487801887949, "grad_norm": 1.1716688899775822, "learning_rate": 6.766847252650726e-06, "loss": 0.723, "step": 13165 }, { "epoch": 0.4035184504106902, "grad_norm": 0.443816380834822, "learning_rate": 6.766382946625164e-06, "loss": 0.422, "step": 13166 }, { "epoch": 0.4035490989334314, "grad_norm": 1.2667794763627622, "learning_rate": 6.76591862319498e-06, "loss": 0.6319, "step": 13167 }, { "epoch": 0.4035797474561726, "grad_norm": 0.4725815295391891, "learning_rate": 6.7654542823647475e-06, "loss": 0.4591, "step": 13168 }, { "epoch": 0.4036103959789138, "grad_norm": 1.1952166909002542, "learning_rate": 6.764989924139043e-06, "loss": 0.5379, "step": 13169 }, { "epoch": 0.403641044501655, "grad_norm": 1.1720715883512853, "learning_rate": 6.764525548522441e-06, "loss": 0.6423, "step": 13170 }, { "epoch": 0.4036716930243962, "grad_norm": 1.2372560052086259, "learning_rate": 6.764061155519515e-06, "loss": 0.6134, "step": 13171 }, { "epoch": 0.4037023415471374, "grad_norm": 1.3843370432874644, "learning_rate": 6.763596745134845e-06, "loss": 0.6618, "step": 13172 }, { "epoch": 0.4037329900698786, "grad_norm": 1.2990785041478887, "learning_rate": 6.763132317373004e-06, "loss": 0.6935, "step": 13173 }, { "epoch": 0.4037636385926198, "grad_norm": 1.2076354540391405, "learning_rate": 6.762667872238572e-06, "loss": 0.5896, "step": 13174 }, { "epoch": 0.40379428711536103, "grad_norm": 1.3855781812958783, "learning_rate": 6.762203409736119e-06, "loss": 0.6973, "step": 13175 }, { "epoch": 0.40382493563810223, "grad_norm": 1.246710210375955, "learning_rate": 6.761738929870227e-06, "loss": 0.6086, "step": 13176 }, { "epoch": 0.40385558416084344, "grad_norm": 1.2818875070856783, "learning_rate": 6.761274432645471e-06, "loss": 0.6581, "step": 13177 }, { "epoch": 0.40388623268358465, "grad_norm": 0.4798135216749695, "learning_rate": 6.7608099180664255e-06, "loss": 0.4331, "step": 13178 }, { "epoch": 0.40391688120632585, "grad_norm": 1.3904447197783003, "learning_rate": 6.76034538613767e-06, "loss": 0.6702, "step": 13179 }, { "epoch": 0.40394752972906706, "grad_norm": 1.2806989223423015, "learning_rate": 6.759880836863781e-06, "loss": 0.7079, "step": 13180 }, { "epoch": 0.40397817825180826, "grad_norm": 1.3526515387846982, "learning_rate": 6.759416270249337e-06, "loss": 0.6991, "step": 13181 }, { "epoch": 0.40400882677454947, "grad_norm": 1.1574114496590313, "learning_rate": 6.758951686298913e-06, "loss": 0.6562, "step": 13182 }, { "epoch": 0.4040394752972907, "grad_norm": 1.189481913209317, "learning_rate": 6.758487085017088e-06, "loss": 0.6241, "step": 13183 }, { "epoch": 0.4040701238200319, "grad_norm": 1.2339876025385565, "learning_rate": 6.7580224664084405e-06, "loss": 0.6687, "step": 13184 }, { "epoch": 0.4041007723427731, "grad_norm": 1.1434726382708065, "learning_rate": 6.757557830477548e-06, "loss": 0.6647, "step": 13185 }, { "epoch": 0.4041314208655143, "grad_norm": 1.2196500640401136, "learning_rate": 6.757093177228987e-06, "loss": 0.6936, "step": 13186 }, { "epoch": 0.4041620693882555, "grad_norm": 1.3475245721573434, "learning_rate": 6.756628506667339e-06, "loss": 0.6939, "step": 13187 }, { "epoch": 0.4041927179109967, "grad_norm": 1.2604745600690943, "learning_rate": 6.7561638187971804e-06, "loss": 0.6988, "step": 13188 }, { "epoch": 0.4042233664337379, "grad_norm": 1.1986358072725154, "learning_rate": 6.755699113623091e-06, "loss": 0.6311, "step": 13189 }, { "epoch": 0.4042540149564791, "grad_norm": 1.3109725654939643, "learning_rate": 6.755234391149646e-06, "loss": 0.6939, "step": 13190 }, { "epoch": 0.4042846634792203, "grad_norm": 1.3528684844393883, "learning_rate": 6.754769651381431e-06, "loss": 0.6203, "step": 13191 }, { "epoch": 0.4043153120019615, "grad_norm": 1.2468958991113774, "learning_rate": 6.75430489432302e-06, "loss": 0.6798, "step": 13192 }, { "epoch": 0.40434596052470273, "grad_norm": 1.2251702828914564, "learning_rate": 6.753840119978995e-06, "loss": 0.7166, "step": 13193 }, { "epoch": 0.40437660904744394, "grad_norm": 1.2670223350474255, "learning_rate": 6.753375328353933e-06, "loss": 0.6373, "step": 13194 }, { "epoch": 0.40440725757018514, "grad_norm": 1.1797942386586018, "learning_rate": 6.752910519452417e-06, "loss": 0.7614, "step": 13195 }, { "epoch": 0.40443790609292635, "grad_norm": 1.2622036454449534, "learning_rate": 6.752445693279024e-06, "loss": 0.7312, "step": 13196 }, { "epoch": 0.4044685546156675, "grad_norm": 1.249694470815288, "learning_rate": 6.751980849838336e-06, "loss": 0.6916, "step": 13197 }, { "epoch": 0.4044992031384087, "grad_norm": 1.180669341640609, "learning_rate": 6.7515159891349314e-06, "loss": 0.606, "step": 13198 }, { "epoch": 0.4045298516611499, "grad_norm": 1.2095257644454134, "learning_rate": 6.751051111173391e-06, "loss": 0.6584, "step": 13199 }, { "epoch": 0.4045605001838911, "grad_norm": 1.317945069503066, "learning_rate": 6.750586215958299e-06, "loss": 0.6293, "step": 13200 }, { "epoch": 0.4045911487066323, "grad_norm": 1.3835274777745619, "learning_rate": 6.75012130349423e-06, "loss": 0.6438, "step": 13201 }, { "epoch": 0.4046217972293735, "grad_norm": 1.2147667557929887, "learning_rate": 6.749656373785769e-06, "loss": 0.7355, "step": 13202 }, { "epoch": 0.40465244575211473, "grad_norm": 1.364423124246656, "learning_rate": 6.749191426837496e-06, "loss": 0.5852, "step": 13203 }, { "epoch": 0.40468309427485594, "grad_norm": 1.0827661654309673, "learning_rate": 6.748726462653994e-06, "loss": 0.5886, "step": 13204 }, { "epoch": 0.40471374279759714, "grad_norm": 1.1677600376366715, "learning_rate": 6.7482614812398405e-06, "loss": 0.6465, "step": 13205 }, { "epoch": 0.40474439132033835, "grad_norm": 1.1865872543416611, "learning_rate": 6.747796482599621e-06, "loss": 0.5812, "step": 13206 }, { "epoch": 0.40477503984307955, "grad_norm": 1.2078078896530542, "learning_rate": 6.747331466737914e-06, "loss": 0.6176, "step": 13207 }, { "epoch": 0.40480568836582076, "grad_norm": 1.3901681289411254, "learning_rate": 6.7468664336593044e-06, "loss": 0.58, "step": 13208 }, { "epoch": 0.40483633688856197, "grad_norm": 1.1393904349439792, "learning_rate": 6.746401383368372e-06, "loss": 0.6535, "step": 13209 }, { "epoch": 0.40486698541130317, "grad_norm": 1.3113450167894811, "learning_rate": 6.7459363158697e-06, "loss": 0.6696, "step": 13210 }, { "epoch": 0.4048976339340444, "grad_norm": 1.3908652562164832, "learning_rate": 6.745471231167871e-06, "loss": 0.6183, "step": 13211 }, { "epoch": 0.4049282824567856, "grad_norm": 1.161995237495509, "learning_rate": 6.745006129267467e-06, "loss": 0.6894, "step": 13212 }, { "epoch": 0.4049589309795268, "grad_norm": 0.4925682436990157, "learning_rate": 6.7445410101730716e-06, "loss": 0.4257, "step": 13213 }, { "epoch": 0.404989579502268, "grad_norm": 1.441241413839341, "learning_rate": 6.744075873889266e-06, "loss": 0.7236, "step": 13214 }, { "epoch": 0.4050202280250092, "grad_norm": 1.3482990584230616, "learning_rate": 6.743610720420637e-06, "loss": 0.7402, "step": 13215 }, { "epoch": 0.4050508765477504, "grad_norm": 0.45898020450342736, "learning_rate": 6.743145549771764e-06, "loss": 0.4283, "step": 13216 }, { "epoch": 0.4050815250704916, "grad_norm": 1.257789481628573, "learning_rate": 6.742680361947231e-06, "loss": 0.5974, "step": 13217 }, { "epoch": 0.4051121735932328, "grad_norm": 1.1288617743963352, "learning_rate": 6.742215156951624e-06, "loss": 0.5715, "step": 13218 }, { "epoch": 0.405142822115974, "grad_norm": 1.1945580114901326, "learning_rate": 6.741749934789526e-06, "loss": 0.6831, "step": 13219 }, { "epoch": 0.4051734706387152, "grad_norm": 1.2713094881655318, "learning_rate": 6.741284695465518e-06, "loss": 0.6734, "step": 13220 }, { "epoch": 0.40520411916145643, "grad_norm": 1.0981224015884534, "learning_rate": 6.740819438984187e-06, "loss": 0.6379, "step": 13221 }, { "epoch": 0.40523476768419764, "grad_norm": 1.067091332190967, "learning_rate": 6.740354165350117e-06, "loss": 0.6973, "step": 13222 }, { "epoch": 0.40526541620693884, "grad_norm": 1.3099826345943624, "learning_rate": 6.739888874567893e-06, "loss": 0.6257, "step": 13223 }, { "epoch": 0.40529606472968005, "grad_norm": 1.2653485953782673, "learning_rate": 6.739423566642098e-06, "loss": 0.6942, "step": 13224 }, { "epoch": 0.40532671325242126, "grad_norm": 1.1350648777371168, "learning_rate": 6.738958241577317e-06, "loss": 0.6443, "step": 13225 }, { "epoch": 0.40535736177516246, "grad_norm": 1.28314106091944, "learning_rate": 6.738492899378136e-06, "loss": 0.7383, "step": 13226 }, { "epoch": 0.40538801029790367, "grad_norm": 1.3084068954512957, "learning_rate": 6.73802754004914e-06, "loss": 0.6308, "step": 13227 }, { "epoch": 0.4054186588206448, "grad_norm": 1.3299612785343864, "learning_rate": 6.737562163594914e-06, "loss": 0.6248, "step": 13228 }, { "epoch": 0.405449307343386, "grad_norm": 1.3380047272510918, "learning_rate": 6.737096770020042e-06, "loss": 0.5814, "step": 13229 }, { "epoch": 0.40547995586612723, "grad_norm": 1.5397525123548164, "learning_rate": 6.736631359329112e-06, "loss": 0.6559, "step": 13230 }, { "epoch": 0.40551060438886843, "grad_norm": 1.220166233792407, "learning_rate": 6.736165931526711e-06, "loss": 0.729, "step": 13231 }, { "epoch": 0.40554125291160964, "grad_norm": 0.5162053681691924, "learning_rate": 6.73570048661742e-06, "loss": 0.4162, "step": 13232 }, { "epoch": 0.40557190143435085, "grad_norm": 0.5191751169955585, "learning_rate": 6.735235024605829e-06, "loss": 0.4203, "step": 13233 }, { "epoch": 0.40560254995709205, "grad_norm": 1.2321945424784746, "learning_rate": 6.734769545496523e-06, "loss": 0.6211, "step": 13234 }, { "epoch": 0.40563319847983326, "grad_norm": 1.3059829229390418, "learning_rate": 6.734304049294089e-06, "loss": 0.6842, "step": 13235 }, { "epoch": 0.40566384700257446, "grad_norm": 1.2352671471086447, "learning_rate": 6.7338385360031135e-06, "loss": 0.6525, "step": 13236 }, { "epoch": 0.40569449552531567, "grad_norm": 1.1295662577813657, "learning_rate": 6.7333730056281825e-06, "loss": 0.5631, "step": 13237 }, { "epoch": 0.4057251440480569, "grad_norm": 1.1299196651899177, "learning_rate": 6.732907458173885e-06, "loss": 0.6037, "step": 13238 }, { "epoch": 0.4057557925707981, "grad_norm": 1.2955573613200413, "learning_rate": 6.732441893644807e-06, "loss": 0.6611, "step": 13239 }, { "epoch": 0.4057864410935393, "grad_norm": 1.3390291505777112, "learning_rate": 6.731976312045534e-06, "loss": 0.7325, "step": 13240 }, { "epoch": 0.4058170896162805, "grad_norm": 1.264279596708025, "learning_rate": 6.731510713380657e-06, "loss": 0.6679, "step": 13241 }, { "epoch": 0.4058477381390217, "grad_norm": 1.0960731066742815, "learning_rate": 6.7310450976547616e-06, "loss": 0.7071, "step": 13242 }, { "epoch": 0.4058783866617629, "grad_norm": 1.4701730777418396, "learning_rate": 6.730579464872435e-06, "loss": 0.6816, "step": 13243 }, { "epoch": 0.4059090351845041, "grad_norm": 1.2780197116780652, "learning_rate": 6.730113815038266e-06, "loss": 0.6924, "step": 13244 }, { "epoch": 0.4059396837072453, "grad_norm": 1.0473843024907534, "learning_rate": 6.729648148156844e-06, "loss": 0.5317, "step": 13245 }, { "epoch": 0.4059703322299865, "grad_norm": 1.0747579596032544, "learning_rate": 6.729182464232758e-06, "loss": 0.5719, "step": 13246 }, { "epoch": 0.4060009807527277, "grad_norm": 0.6001138977144728, "learning_rate": 6.728716763270592e-06, "loss": 0.4448, "step": 13247 }, { "epoch": 0.40603162927546893, "grad_norm": 1.2723645760229803, "learning_rate": 6.728251045274937e-06, "loss": 0.696, "step": 13248 }, { "epoch": 0.40606227779821014, "grad_norm": 0.5265143139919837, "learning_rate": 6.727785310250384e-06, "loss": 0.4168, "step": 13249 }, { "epoch": 0.40609292632095134, "grad_norm": 1.133547655838094, "learning_rate": 6.72731955820152e-06, "loss": 0.6515, "step": 13250 }, { "epoch": 0.40612357484369255, "grad_norm": 1.1357319400145247, "learning_rate": 6.726853789132933e-06, "loss": 0.637, "step": 13251 }, { "epoch": 0.40615422336643375, "grad_norm": 1.2578606615663392, "learning_rate": 6.7263880030492155e-06, "loss": 0.6861, "step": 13252 }, { "epoch": 0.40618487188917496, "grad_norm": 1.2341592939990935, "learning_rate": 6.725922199954955e-06, "loss": 0.5569, "step": 13253 }, { "epoch": 0.40621552041191616, "grad_norm": 1.4679808352045522, "learning_rate": 6.725456379854742e-06, "loss": 0.7339, "step": 13254 }, { "epoch": 0.40624616893465737, "grad_norm": 1.2196264709736737, "learning_rate": 6.724990542753164e-06, "loss": 0.6452, "step": 13255 }, { "epoch": 0.4062768174573986, "grad_norm": 1.2137907505546575, "learning_rate": 6.724524688654814e-06, "loss": 0.6386, "step": 13256 }, { "epoch": 0.4063074659801398, "grad_norm": 0.6622195753729467, "learning_rate": 6.72405881756428e-06, "loss": 0.4243, "step": 13257 }, { "epoch": 0.406338114502881, "grad_norm": 0.6177792976169177, "learning_rate": 6.723592929486156e-06, "loss": 0.4399, "step": 13258 }, { "epoch": 0.4063687630256222, "grad_norm": 1.2818086931822517, "learning_rate": 6.7231270244250266e-06, "loss": 0.6607, "step": 13259 }, { "epoch": 0.40639941154836334, "grad_norm": 1.1794490949432375, "learning_rate": 6.722661102385488e-06, "loss": 0.7078, "step": 13260 }, { "epoch": 0.40643006007110455, "grad_norm": 0.43911603423996687, "learning_rate": 6.722195163372128e-06, "loss": 0.4272, "step": 13261 }, { "epoch": 0.40646070859384575, "grad_norm": 1.14719509996267, "learning_rate": 6.721729207389538e-06, "loss": 0.641, "step": 13262 }, { "epoch": 0.40649135711658696, "grad_norm": 1.1957281772545354, "learning_rate": 6.72126323444231e-06, "loss": 0.5984, "step": 13263 }, { "epoch": 0.40652200563932817, "grad_norm": 1.2681301314880353, "learning_rate": 6.720797244535036e-06, "loss": 0.6221, "step": 13264 }, { "epoch": 0.40655265416206937, "grad_norm": 1.1647670630186573, "learning_rate": 6.720331237672305e-06, "loss": 0.6688, "step": 13265 }, { "epoch": 0.4065833026848106, "grad_norm": 1.2822434806509586, "learning_rate": 6.71986521385871e-06, "loss": 0.7097, "step": 13266 }, { "epoch": 0.4066139512075518, "grad_norm": 1.3353577422568061, "learning_rate": 6.7193991730988435e-06, "loss": 0.727, "step": 13267 }, { "epoch": 0.406644599730293, "grad_norm": 1.3090659554514095, "learning_rate": 6.718933115397296e-06, "loss": 0.689, "step": 13268 }, { "epoch": 0.4066752482530342, "grad_norm": 1.188825322947965, "learning_rate": 6.718467040758663e-06, "loss": 0.6013, "step": 13269 }, { "epoch": 0.4067058967757754, "grad_norm": 1.2387039625824703, "learning_rate": 6.718000949187533e-06, "loss": 0.6729, "step": 13270 }, { "epoch": 0.4067365452985166, "grad_norm": 1.2621429161935869, "learning_rate": 6.7175348406884995e-06, "loss": 0.7167, "step": 13271 }, { "epoch": 0.4067671938212578, "grad_norm": 1.232373638314269, "learning_rate": 6.717068715266157e-06, "loss": 0.6596, "step": 13272 }, { "epoch": 0.406797842343999, "grad_norm": 1.1234081433755074, "learning_rate": 6.716602572925099e-06, "loss": 0.6324, "step": 13273 }, { "epoch": 0.4068284908667402, "grad_norm": 1.35947772753269, "learning_rate": 6.716136413669912e-06, "loss": 0.7075, "step": 13274 }, { "epoch": 0.40685913938948143, "grad_norm": 1.1725921245798159, "learning_rate": 6.715670237505198e-06, "loss": 0.6302, "step": 13275 }, { "epoch": 0.40688978791222263, "grad_norm": 1.1984679341758322, "learning_rate": 6.715204044435543e-06, "loss": 0.6737, "step": 13276 }, { "epoch": 0.40692043643496384, "grad_norm": 1.4324472163043687, "learning_rate": 6.7147378344655455e-06, "loss": 0.679, "step": 13277 }, { "epoch": 0.40695108495770504, "grad_norm": 1.33981655719288, "learning_rate": 6.714271607599797e-06, "loss": 0.7124, "step": 13278 }, { "epoch": 0.40698173348044625, "grad_norm": 1.264945501907628, "learning_rate": 6.713805363842893e-06, "loss": 0.6384, "step": 13279 }, { "epoch": 0.40701238200318746, "grad_norm": 1.4091679375874973, "learning_rate": 6.7133391031994236e-06, "loss": 0.7181, "step": 13280 }, { "epoch": 0.40704303052592866, "grad_norm": 1.4624151223132176, "learning_rate": 6.712872825673987e-06, "loss": 0.7317, "step": 13281 }, { "epoch": 0.40707367904866987, "grad_norm": 1.1214424307903763, "learning_rate": 6.712406531271176e-06, "loss": 0.587, "step": 13282 }, { "epoch": 0.4071043275714111, "grad_norm": 1.25886515714795, "learning_rate": 6.711940219995585e-06, "loss": 0.6693, "step": 13283 }, { "epoch": 0.4071349760941523, "grad_norm": 0.8864516913577994, "learning_rate": 6.711473891851812e-06, "loss": 0.4199, "step": 13284 }, { "epoch": 0.4071656246168935, "grad_norm": 1.3190796532865703, "learning_rate": 6.711007546844444e-06, "loss": 0.6867, "step": 13285 }, { "epoch": 0.4071962731396347, "grad_norm": 1.1215986426754405, "learning_rate": 6.710541184978084e-06, "loss": 0.5917, "step": 13286 }, { "epoch": 0.4072269216623759, "grad_norm": 1.1800686901179653, "learning_rate": 6.7100748062573225e-06, "loss": 0.6169, "step": 13287 }, { "epoch": 0.4072575701851171, "grad_norm": 1.2847028551105395, "learning_rate": 6.709608410686759e-06, "loss": 0.6835, "step": 13288 }, { "epoch": 0.4072882187078583, "grad_norm": 1.1155306167847443, "learning_rate": 6.7091419982709836e-06, "loss": 0.5836, "step": 13289 }, { "epoch": 0.4073188672305995, "grad_norm": 1.3670064062891982, "learning_rate": 6.7086755690145965e-06, "loss": 0.573, "step": 13290 }, { "epoch": 0.40734951575334066, "grad_norm": 1.0776863368971925, "learning_rate": 6.7082091229221904e-06, "loss": 0.6095, "step": 13291 }, { "epoch": 0.40738016427608187, "grad_norm": 1.2545140361155065, "learning_rate": 6.707742659998364e-06, "loss": 0.7681, "step": 13292 }, { "epoch": 0.4074108127988231, "grad_norm": 1.3532707204679826, "learning_rate": 6.707276180247712e-06, "loss": 0.6483, "step": 13293 }, { "epoch": 0.4074414613215643, "grad_norm": 1.22585749729378, "learning_rate": 6.706809683674829e-06, "loss": 0.6816, "step": 13294 }, { "epoch": 0.4074721098443055, "grad_norm": 0.5355521282060717, "learning_rate": 6.706343170284315e-06, "loss": 0.418, "step": 13295 }, { "epoch": 0.4075027583670467, "grad_norm": 1.1930806954828246, "learning_rate": 6.705876640080766e-06, "loss": 0.7639, "step": 13296 }, { "epoch": 0.4075334068897879, "grad_norm": 1.145319629373813, "learning_rate": 6.7054100930687785e-06, "loss": 0.6254, "step": 13297 }, { "epoch": 0.4075640554125291, "grad_norm": 1.3937888046445666, "learning_rate": 6.704943529252947e-06, "loss": 0.6949, "step": 13298 }, { "epoch": 0.4075947039352703, "grad_norm": 1.268201093198108, "learning_rate": 6.7044769486378715e-06, "loss": 0.6943, "step": 13299 }, { "epoch": 0.4076253524580115, "grad_norm": 1.2662849131406786, "learning_rate": 6.704010351228149e-06, "loss": 0.6685, "step": 13300 }, { "epoch": 0.4076560009807527, "grad_norm": 1.3175992904687766, "learning_rate": 6.703543737028375e-06, "loss": 0.7019, "step": 13301 }, { "epoch": 0.4076866495034939, "grad_norm": 0.5187702287479486, "learning_rate": 6.7030771060431495e-06, "loss": 0.4181, "step": 13302 }, { "epoch": 0.40771729802623513, "grad_norm": 1.2208061303702646, "learning_rate": 6.70261045827707e-06, "loss": 0.649, "step": 13303 }, { "epoch": 0.40774794654897634, "grad_norm": 2.939767901215139, "learning_rate": 6.702143793734735e-06, "loss": 0.7004, "step": 13304 }, { "epoch": 0.40777859507171754, "grad_norm": 1.1252381860112814, "learning_rate": 6.7016771124207404e-06, "loss": 0.678, "step": 13305 }, { "epoch": 0.40780924359445875, "grad_norm": 1.192967390022485, "learning_rate": 6.701210414339685e-06, "loss": 0.5924, "step": 13306 }, { "epoch": 0.40783989211719995, "grad_norm": 1.2699166403220503, "learning_rate": 6.7007436994961685e-06, "loss": 0.5715, "step": 13307 }, { "epoch": 0.40787054063994116, "grad_norm": 1.2956300659957432, "learning_rate": 6.7002769678947895e-06, "loss": 0.6722, "step": 13308 }, { "epoch": 0.40790118916268236, "grad_norm": 1.1812702196937352, "learning_rate": 6.699810219540146e-06, "loss": 0.5966, "step": 13309 }, { "epoch": 0.40793183768542357, "grad_norm": 1.2557546250532345, "learning_rate": 6.699343454436839e-06, "loss": 0.6325, "step": 13310 }, { "epoch": 0.4079624862081648, "grad_norm": 1.1916107947882224, "learning_rate": 6.698876672589465e-06, "loss": 0.6535, "step": 13311 }, { "epoch": 0.407993134730906, "grad_norm": 1.1749851670213045, "learning_rate": 6.698409874002626e-06, "loss": 0.5761, "step": 13312 }, { "epoch": 0.4080237832536472, "grad_norm": 1.156564541068204, "learning_rate": 6.697943058680918e-06, "loss": 0.732, "step": 13313 }, { "epoch": 0.4080544317763884, "grad_norm": 1.2333274231062628, "learning_rate": 6.697476226628943e-06, "loss": 0.641, "step": 13314 }, { "epoch": 0.4080850802991296, "grad_norm": 1.3994206908300233, "learning_rate": 6.697009377851301e-06, "loss": 0.6981, "step": 13315 }, { "epoch": 0.4081157288218708, "grad_norm": 1.3575381297531084, "learning_rate": 6.696542512352592e-06, "loss": 0.6645, "step": 13316 }, { "epoch": 0.408146377344612, "grad_norm": 1.315341420252045, "learning_rate": 6.696075630137413e-06, "loss": 0.6868, "step": 13317 }, { "epoch": 0.4081770258673532, "grad_norm": 1.2925172588407703, "learning_rate": 6.6956087312103694e-06, "loss": 0.6866, "step": 13318 }, { "epoch": 0.4082076743900944, "grad_norm": 0.4529334535963702, "learning_rate": 6.695141815576058e-06, "loss": 0.3995, "step": 13319 }, { "epoch": 0.4082383229128356, "grad_norm": 1.2444479689893897, "learning_rate": 6.694674883239081e-06, "loss": 0.6106, "step": 13320 }, { "epoch": 0.40826897143557683, "grad_norm": 1.3031409767257305, "learning_rate": 6.694207934204038e-06, "loss": 0.7188, "step": 13321 }, { "epoch": 0.408299619958318, "grad_norm": 1.2576321265418784, "learning_rate": 6.693740968475531e-06, "loss": 0.6248, "step": 13322 }, { "epoch": 0.4083302684810592, "grad_norm": 0.4581035620168838, "learning_rate": 6.693273986058162e-06, "loss": 0.4504, "step": 13323 }, { "epoch": 0.4083609170038004, "grad_norm": 1.1064730146649937, "learning_rate": 6.69280698695653e-06, "loss": 0.6608, "step": 13324 }, { "epoch": 0.4083915655265416, "grad_norm": 1.272451196431314, "learning_rate": 6.692339971175239e-06, "loss": 0.6267, "step": 13325 }, { "epoch": 0.4084222140492828, "grad_norm": 1.3661656518217413, "learning_rate": 6.691872938718887e-06, "loss": 0.7326, "step": 13326 }, { "epoch": 0.408452862572024, "grad_norm": 1.1968952017803605, "learning_rate": 6.691405889592081e-06, "loss": 0.6104, "step": 13327 }, { "epoch": 0.4084835110947652, "grad_norm": 1.1484823163189637, "learning_rate": 6.6909388237994175e-06, "loss": 0.5609, "step": 13328 }, { "epoch": 0.4085141596175064, "grad_norm": 0.4695620256058838, "learning_rate": 6.690471741345503e-06, "loss": 0.4514, "step": 13329 }, { "epoch": 0.40854480814024763, "grad_norm": 1.290318476333426, "learning_rate": 6.690004642234935e-06, "loss": 0.6931, "step": 13330 }, { "epoch": 0.40857545666298883, "grad_norm": 1.390374338422011, "learning_rate": 6.6895375264723225e-06, "loss": 0.6879, "step": 13331 }, { "epoch": 0.40860610518573004, "grad_norm": 1.3852237706450854, "learning_rate": 6.689070394062261e-06, "loss": 0.6948, "step": 13332 }, { "epoch": 0.40863675370847125, "grad_norm": 1.1042330256879993, "learning_rate": 6.688603245009359e-06, "loss": 0.6984, "step": 13333 }, { "epoch": 0.40866740223121245, "grad_norm": 1.3409908385572913, "learning_rate": 6.6881360793182155e-06, "loss": 0.6574, "step": 13334 }, { "epoch": 0.40869805075395366, "grad_norm": 1.0771179708642808, "learning_rate": 6.687668896993438e-06, "loss": 0.6392, "step": 13335 }, { "epoch": 0.40872869927669486, "grad_norm": 1.319346219604906, "learning_rate": 6.687201698039625e-06, "loss": 0.6767, "step": 13336 }, { "epoch": 0.40875934779943607, "grad_norm": 1.4051621483950338, "learning_rate": 6.686734482461381e-06, "loss": 0.7084, "step": 13337 }, { "epoch": 0.4087899963221773, "grad_norm": 1.338278356359858, "learning_rate": 6.686267250263314e-06, "loss": 0.6579, "step": 13338 }, { "epoch": 0.4088206448449185, "grad_norm": 1.2010565231990886, "learning_rate": 6.685800001450023e-06, "loss": 0.7646, "step": 13339 }, { "epoch": 0.4088512933676597, "grad_norm": 1.216966386621883, "learning_rate": 6.685332736026111e-06, "loss": 0.6523, "step": 13340 }, { "epoch": 0.4088819418904009, "grad_norm": 1.286300049191703, "learning_rate": 6.684865453996185e-06, "loss": 0.6696, "step": 13341 }, { "epoch": 0.4089125904131421, "grad_norm": 1.3120598155047107, "learning_rate": 6.684398155364852e-06, "loss": 0.7391, "step": 13342 }, { "epoch": 0.4089432389358833, "grad_norm": 1.3150360319674588, "learning_rate": 6.68393084013671e-06, "loss": 0.7619, "step": 13343 }, { "epoch": 0.4089738874586245, "grad_norm": 1.157488716599509, "learning_rate": 6.683463508316367e-06, "loss": 0.6209, "step": 13344 }, { "epoch": 0.4090045359813657, "grad_norm": 0.4876389503995231, "learning_rate": 6.682996159908426e-06, "loss": 0.4353, "step": 13345 }, { "epoch": 0.4090351845041069, "grad_norm": 1.3495615535207752, "learning_rate": 6.682528794917495e-06, "loss": 0.7194, "step": 13346 }, { "epoch": 0.4090658330268481, "grad_norm": 1.2685562898711644, "learning_rate": 6.682061413348178e-06, "loss": 0.6433, "step": 13347 }, { "epoch": 0.40909648154958933, "grad_norm": 0.4571258373109847, "learning_rate": 6.681594015205078e-06, "loss": 0.4318, "step": 13348 }, { "epoch": 0.40912713007233054, "grad_norm": 0.4544075147697138, "learning_rate": 6.681126600492802e-06, "loss": 0.425, "step": 13349 }, { "epoch": 0.40915777859507174, "grad_norm": 1.104173738866302, "learning_rate": 6.680659169215956e-06, "loss": 0.7054, "step": 13350 }, { "epoch": 0.40918842711781295, "grad_norm": 1.343203960993906, "learning_rate": 6.6801917213791454e-06, "loss": 0.656, "step": 13351 }, { "epoch": 0.40921907564055415, "grad_norm": 1.1672605161047782, "learning_rate": 6.679724256986974e-06, "loss": 0.6693, "step": 13352 }, { "epoch": 0.4092497241632953, "grad_norm": 1.2916481011939362, "learning_rate": 6.679256776044052e-06, "loss": 0.7082, "step": 13353 }, { "epoch": 0.4092803726860365, "grad_norm": 1.210963333310173, "learning_rate": 6.6787892785549825e-06, "loss": 0.6664, "step": 13354 }, { "epoch": 0.4093110212087777, "grad_norm": 1.2563748933863246, "learning_rate": 6.678321764524373e-06, "loss": 0.6435, "step": 13355 }, { "epoch": 0.4093416697315189, "grad_norm": 1.268951609592792, "learning_rate": 6.67785423395683e-06, "loss": 0.6826, "step": 13356 }, { "epoch": 0.4093723182542601, "grad_norm": 1.2649219643042366, "learning_rate": 6.677386686856959e-06, "loss": 0.6996, "step": 13357 }, { "epoch": 0.40940296677700133, "grad_norm": 1.1261616804009678, "learning_rate": 6.6769191232293685e-06, "loss": 0.7097, "step": 13358 }, { "epoch": 0.40943361529974254, "grad_norm": 0.460361585251631, "learning_rate": 6.676451543078664e-06, "loss": 0.4219, "step": 13359 }, { "epoch": 0.40946426382248374, "grad_norm": 1.2872625113391831, "learning_rate": 6.675983946409454e-06, "loss": 0.5731, "step": 13360 }, { "epoch": 0.40949491234522495, "grad_norm": 0.47530476354883855, "learning_rate": 6.675516333226346e-06, "loss": 0.4453, "step": 13361 }, { "epoch": 0.40952556086796615, "grad_norm": 1.4316961303901072, "learning_rate": 6.6750487035339465e-06, "loss": 0.6818, "step": 13362 }, { "epoch": 0.40955620939070736, "grad_norm": 1.1858318205799285, "learning_rate": 6.674581057336862e-06, "loss": 0.6008, "step": 13363 }, { "epoch": 0.40958685791344857, "grad_norm": 1.1223208686516173, "learning_rate": 6.674113394639704e-06, "loss": 0.6166, "step": 13364 }, { "epoch": 0.40961750643618977, "grad_norm": 1.2170476208937397, "learning_rate": 6.673645715447078e-06, "loss": 0.6896, "step": 13365 }, { "epoch": 0.409648154958931, "grad_norm": 1.404488987742941, "learning_rate": 6.673178019763592e-06, "loss": 0.6479, "step": 13366 }, { "epoch": 0.4096788034816722, "grad_norm": 1.146513261725763, "learning_rate": 6.672710307593855e-06, "loss": 0.7212, "step": 13367 }, { "epoch": 0.4097094520044134, "grad_norm": 0.4668206490318029, "learning_rate": 6.672242578942475e-06, "loss": 0.4197, "step": 13368 }, { "epoch": 0.4097401005271546, "grad_norm": 1.366138742959762, "learning_rate": 6.671774833814062e-06, "loss": 0.689, "step": 13369 }, { "epoch": 0.4097707490498958, "grad_norm": 1.1689871433186751, "learning_rate": 6.671307072213223e-06, "loss": 0.677, "step": 13370 }, { "epoch": 0.409801397572637, "grad_norm": 1.392677811101394, "learning_rate": 6.6708392941445675e-06, "loss": 0.7399, "step": 13371 }, { "epoch": 0.4098320460953782, "grad_norm": 1.8396451430537106, "learning_rate": 6.670371499612705e-06, "loss": 0.6822, "step": 13372 }, { "epoch": 0.4098626946181194, "grad_norm": 1.5098090697686086, "learning_rate": 6.669903688622246e-06, "loss": 0.6584, "step": 13373 }, { "epoch": 0.4098933431408606, "grad_norm": 1.2675844647891361, "learning_rate": 6.669435861177798e-06, "loss": 0.7046, "step": 13374 }, { "epoch": 0.4099239916636018, "grad_norm": 1.136272217619923, "learning_rate": 6.668968017283971e-06, "loss": 0.5445, "step": 13375 }, { "epoch": 0.40995464018634303, "grad_norm": 1.2004387826427616, "learning_rate": 6.668500156945376e-06, "loss": 0.6982, "step": 13376 }, { "epoch": 0.40998528870908424, "grad_norm": 1.271553465957807, "learning_rate": 6.668032280166621e-06, "loss": 0.6246, "step": 13377 }, { "epoch": 0.41001593723182544, "grad_norm": 1.0895730794822733, "learning_rate": 6.667564386952316e-06, "loss": 0.6738, "step": 13378 }, { "epoch": 0.41004658575456665, "grad_norm": 1.2711589775703436, "learning_rate": 6.667096477307075e-06, "loss": 0.7026, "step": 13379 }, { "epoch": 0.41007723427730786, "grad_norm": 1.2023204595494967, "learning_rate": 6.666628551235504e-06, "loss": 0.6757, "step": 13380 }, { "epoch": 0.41010788280004906, "grad_norm": 1.2296372911509643, "learning_rate": 6.666160608742217e-06, "loss": 0.6869, "step": 13381 }, { "epoch": 0.41013853132279027, "grad_norm": 1.0896307645006453, "learning_rate": 6.665692649831822e-06, "loss": 0.6295, "step": 13382 }, { "epoch": 0.4101691798455315, "grad_norm": 1.2340020319392078, "learning_rate": 6.665224674508932e-06, "loss": 0.6202, "step": 13383 }, { "epoch": 0.4101998283682726, "grad_norm": 1.1760024683655974, "learning_rate": 6.664756682778156e-06, "loss": 0.6539, "step": 13384 }, { "epoch": 0.41023047689101383, "grad_norm": 1.2825139666741119, "learning_rate": 6.6642886746441085e-06, "loss": 0.6404, "step": 13385 }, { "epoch": 0.41026112541375503, "grad_norm": 1.3039332613408132, "learning_rate": 6.6638206501113965e-06, "loss": 0.5999, "step": 13386 }, { "epoch": 0.41029177393649624, "grad_norm": 1.2902104802548464, "learning_rate": 6.663352609184635e-06, "loss": 0.6851, "step": 13387 }, { "epoch": 0.41032242245923745, "grad_norm": 1.4051535503922576, "learning_rate": 6.662884551868436e-06, "loss": 0.6877, "step": 13388 }, { "epoch": 0.41035307098197865, "grad_norm": 1.3691765525457222, "learning_rate": 6.662416478167407e-06, "loss": 0.6965, "step": 13389 }, { "epoch": 0.41038371950471986, "grad_norm": 1.2134440873714234, "learning_rate": 6.661948388086166e-06, "loss": 0.6391, "step": 13390 }, { "epoch": 0.41041436802746106, "grad_norm": 1.2119942562915855, "learning_rate": 6.66148028162932e-06, "loss": 0.6298, "step": 13391 }, { "epoch": 0.41044501655020227, "grad_norm": 0.5140873360110295, "learning_rate": 6.661012158801487e-06, "loss": 0.4314, "step": 13392 }, { "epoch": 0.4104756650729435, "grad_norm": 1.2296430878240823, "learning_rate": 6.660544019607272e-06, "loss": 0.6223, "step": 13393 }, { "epoch": 0.4105063135956847, "grad_norm": 1.1855970375578142, "learning_rate": 6.660075864051294e-06, "loss": 0.6209, "step": 13394 }, { "epoch": 0.4105369621184259, "grad_norm": 1.2014856618379761, "learning_rate": 6.659607692138164e-06, "loss": 0.5875, "step": 13395 }, { "epoch": 0.4105676106411671, "grad_norm": 1.2265635410667939, "learning_rate": 6.659139503872496e-06, "loss": 0.6772, "step": 13396 }, { "epoch": 0.4105982591639083, "grad_norm": 1.1461730021013496, "learning_rate": 6.658671299258899e-06, "loss": 0.6749, "step": 13397 }, { "epoch": 0.4106289076866495, "grad_norm": 1.116691792071544, "learning_rate": 6.658203078301991e-06, "loss": 0.6933, "step": 13398 }, { "epoch": 0.4106595562093907, "grad_norm": 0.4667939586816919, "learning_rate": 6.657734841006383e-06, "loss": 0.4111, "step": 13399 }, { "epoch": 0.4106902047321319, "grad_norm": 1.1309793041256775, "learning_rate": 6.6572665873766914e-06, "loss": 0.645, "step": 13400 }, { "epoch": 0.4107208532548731, "grad_norm": 1.1113220223118394, "learning_rate": 6.6567983174175255e-06, "loss": 0.6232, "step": 13401 }, { "epoch": 0.4107515017776143, "grad_norm": 1.2204751275599668, "learning_rate": 6.656330031133503e-06, "loss": 0.6251, "step": 13402 }, { "epoch": 0.41078215030035553, "grad_norm": 1.325041029372933, "learning_rate": 6.655861728529237e-06, "loss": 0.6998, "step": 13403 }, { "epoch": 0.41081279882309674, "grad_norm": 1.259194560121807, "learning_rate": 6.655393409609342e-06, "loss": 0.7149, "step": 13404 }, { "epoch": 0.41084344734583794, "grad_norm": 1.2170538270728806, "learning_rate": 6.654925074378432e-06, "loss": 0.7022, "step": 13405 }, { "epoch": 0.41087409586857915, "grad_norm": 1.1080939449352136, "learning_rate": 6.6544567228411206e-06, "loss": 0.7017, "step": 13406 }, { "epoch": 0.41090474439132035, "grad_norm": 1.181502303358706, "learning_rate": 6.653988355002026e-06, "loss": 0.6429, "step": 13407 }, { "epoch": 0.41093539291406156, "grad_norm": 1.1481591158095874, "learning_rate": 6.653519970865759e-06, "loss": 0.6083, "step": 13408 }, { "epoch": 0.41096604143680276, "grad_norm": 1.3410497689525631, "learning_rate": 6.653051570436938e-06, "loss": 0.7811, "step": 13409 }, { "epoch": 0.41099668995954397, "grad_norm": 1.3394536778223052, "learning_rate": 6.652583153720176e-06, "loss": 0.609, "step": 13410 }, { "epoch": 0.4110273384822852, "grad_norm": 1.2482810679457554, "learning_rate": 6.65211472072009e-06, "loss": 0.7169, "step": 13411 }, { "epoch": 0.4110579870050264, "grad_norm": 1.1536089018905367, "learning_rate": 6.651646271441295e-06, "loss": 0.64, "step": 13412 }, { "epoch": 0.4110886355277676, "grad_norm": 1.096472299512822, "learning_rate": 6.651177805888407e-06, "loss": 0.5945, "step": 13413 }, { "epoch": 0.4111192840505088, "grad_norm": 1.235538079767391, "learning_rate": 6.650709324066041e-06, "loss": 0.6036, "step": 13414 }, { "epoch": 0.41114993257324994, "grad_norm": 0.4702825105567334, "learning_rate": 6.650240825978813e-06, "loss": 0.4159, "step": 13415 }, { "epoch": 0.41118058109599115, "grad_norm": 1.3301675590933064, "learning_rate": 6.6497723116313405e-06, "loss": 0.7028, "step": 13416 }, { "epoch": 0.41121122961873235, "grad_norm": 1.245576974028931, "learning_rate": 6.649303781028239e-06, "loss": 0.6656, "step": 13417 }, { "epoch": 0.41124187814147356, "grad_norm": 1.4006078998887266, "learning_rate": 6.648835234174126e-06, "loss": 0.6641, "step": 13418 }, { "epoch": 0.41127252666421477, "grad_norm": 1.1687656584352717, "learning_rate": 6.648366671073617e-06, "loss": 0.7299, "step": 13419 }, { "epoch": 0.41130317518695597, "grad_norm": 1.2503998453320022, "learning_rate": 6.647898091731331e-06, "loss": 0.6865, "step": 13420 }, { "epoch": 0.4113338237096972, "grad_norm": 1.2846097139941228, "learning_rate": 6.64742949615188e-06, "loss": 0.6893, "step": 13421 }, { "epoch": 0.4113644722324384, "grad_norm": 1.1546975800776038, "learning_rate": 6.646960884339888e-06, "loss": 0.6813, "step": 13422 }, { "epoch": 0.4113951207551796, "grad_norm": 1.231558324484596, "learning_rate": 6.646492256299968e-06, "loss": 0.663, "step": 13423 }, { "epoch": 0.4114257692779208, "grad_norm": 1.3152769500942796, "learning_rate": 6.6460236120367384e-06, "loss": 0.7006, "step": 13424 }, { "epoch": 0.411456417800662, "grad_norm": 1.3577540489689353, "learning_rate": 6.645554951554817e-06, "loss": 0.735, "step": 13425 }, { "epoch": 0.4114870663234032, "grad_norm": 1.3574252293430065, "learning_rate": 6.645086274858822e-06, "loss": 0.7911, "step": 13426 }, { "epoch": 0.4115177148461444, "grad_norm": 1.2777446743205547, "learning_rate": 6.644617581953371e-06, "loss": 0.6597, "step": 13427 }, { "epoch": 0.4115483633688856, "grad_norm": 1.4756132337534005, "learning_rate": 6.644148872843081e-06, "loss": 0.6502, "step": 13428 }, { "epoch": 0.4115790118916268, "grad_norm": 1.5611765919411593, "learning_rate": 6.643680147532572e-06, "loss": 0.6871, "step": 13429 }, { "epoch": 0.41160966041436803, "grad_norm": 1.1111333006897743, "learning_rate": 6.643211406026463e-06, "loss": 0.6058, "step": 13430 }, { "epoch": 0.41164030893710923, "grad_norm": 1.0732717744842095, "learning_rate": 6.642742648329371e-06, "loss": 0.6348, "step": 13431 }, { "epoch": 0.41167095745985044, "grad_norm": 1.0734040118001895, "learning_rate": 6.642273874445914e-06, "loss": 0.6751, "step": 13432 }, { "epoch": 0.41170160598259165, "grad_norm": 1.2202366381002672, "learning_rate": 6.641805084380715e-06, "loss": 0.712, "step": 13433 }, { "epoch": 0.41173225450533285, "grad_norm": 1.2410723147313474, "learning_rate": 6.641336278138387e-06, "loss": 0.6312, "step": 13434 }, { "epoch": 0.41176290302807406, "grad_norm": 1.3487265842797385, "learning_rate": 6.640867455723556e-06, "loss": 0.6192, "step": 13435 }, { "epoch": 0.41179355155081526, "grad_norm": 1.2372237735551952, "learning_rate": 6.6403986171408365e-06, "loss": 0.6878, "step": 13436 }, { "epoch": 0.41182420007355647, "grad_norm": 1.3350110266169664, "learning_rate": 6.63992976239485e-06, "loss": 0.6149, "step": 13437 }, { "epoch": 0.4118548485962977, "grad_norm": 1.3432411694463509, "learning_rate": 6.639460891490217e-06, "loss": 0.617, "step": 13438 }, { "epoch": 0.4118854971190389, "grad_norm": 1.2897150518435425, "learning_rate": 6.6389920044315545e-06, "loss": 0.6426, "step": 13439 }, { "epoch": 0.4119161456417801, "grad_norm": 1.2307822430839968, "learning_rate": 6.638523101223485e-06, "loss": 0.6552, "step": 13440 }, { "epoch": 0.4119467941645213, "grad_norm": 1.2832232321350734, "learning_rate": 6.638054181870629e-06, "loss": 0.7033, "step": 13441 }, { "epoch": 0.4119774426872625, "grad_norm": 1.1533569565697166, "learning_rate": 6.637585246377605e-06, "loss": 0.7536, "step": 13442 }, { "epoch": 0.4120080912100037, "grad_norm": 1.3079252687499638, "learning_rate": 6.637116294749035e-06, "loss": 0.7269, "step": 13443 }, { "epoch": 0.4120387397327449, "grad_norm": 1.1815340383971367, "learning_rate": 6.6366473269895395e-06, "loss": 0.6061, "step": 13444 }, { "epoch": 0.4120693882554861, "grad_norm": 1.3116588334282218, "learning_rate": 6.636178343103739e-06, "loss": 0.5897, "step": 13445 }, { "epoch": 0.41210003677822726, "grad_norm": 1.2690037334111284, "learning_rate": 6.635709343096255e-06, "loss": 0.6944, "step": 13446 }, { "epoch": 0.41213068530096847, "grad_norm": 1.210812229240356, "learning_rate": 6.635240326971707e-06, "loss": 0.6811, "step": 13447 }, { "epoch": 0.4121613338237097, "grad_norm": 0.5484349724820099, "learning_rate": 6.634771294734719e-06, "loss": 0.4394, "step": 13448 }, { "epoch": 0.4121919823464509, "grad_norm": 1.114503308313743, "learning_rate": 6.63430224638991e-06, "loss": 0.6081, "step": 13449 }, { "epoch": 0.4122226308691921, "grad_norm": 1.3187965231094767, "learning_rate": 6.633833181941905e-06, "loss": 0.6647, "step": 13450 }, { "epoch": 0.4122532793919333, "grad_norm": 0.4560131447461902, "learning_rate": 6.633364101395321e-06, "loss": 0.4148, "step": 13451 }, { "epoch": 0.4122839279146745, "grad_norm": 1.306844557191835, "learning_rate": 6.632895004754785e-06, "loss": 0.7068, "step": 13452 }, { "epoch": 0.4123145764374157, "grad_norm": 1.2265843805864372, "learning_rate": 6.632425892024914e-06, "loss": 0.6502, "step": 13453 }, { "epoch": 0.4123452249601569, "grad_norm": 0.4550437284062024, "learning_rate": 6.631956763210335e-06, "loss": 0.4262, "step": 13454 }, { "epoch": 0.4123758734828981, "grad_norm": 1.2665670910306623, "learning_rate": 6.6314876183156686e-06, "loss": 0.6291, "step": 13455 }, { "epoch": 0.4124065220056393, "grad_norm": 1.2052992094345978, "learning_rate": 6.631018457345536e-06, "loss": 0.6641, "step": 13456 }, { "epoch": 0.4124371705283805, "grad_norm": 1.094079646231324, "learning_rate": 6.630549280304561e-06, "loss": 0.6275, "step": 13457 }, { "epoch": 0.41246781905112173, "grad_norm": 1.1705539342074973, "learning_rate": 6.630080087197368e-06, "loss": 0.6005, "step": 13458 }, { "epoch": 0.41249846757386294, "grad_norm": 0.48549596089252206, "learning_rate": 6.629610878028579e-06, "loss": 0.4265, "step": 13459 }, { "epoch": 0.41252911609660414, "grad_norm": 0.46216341571429775, "learning_rate": 6.629141652802815e-06, "loss": 0.4182, "step": 13460 }, { "epoch": 0.41255976461934535, "grad_norm": 1.0950693279422858, "learning_rate": 6.628672411524704e-06, "loss": 0.5961, "step": 13461 }, { "epoch": 0.41259041314208655, "grad_norm": 1.326497682191626, "learning_rate": 6.628203154198865e-06, "loss": 0.6801, "step": 13462 }, { "epoch": 0.41262106166482776, "grad_norm": 1.1422811208355808, "learning_rate": 6.627733880829926e-06, "loss": 0.6147, "step": 13463 }, { "epoch": 0.41265171018756897, "grad_norm": 1.0743044037260234, "learning_rate": 6.627264591422507e-06, "loss": 0.6512, "step": 13464 }, { "epoch": 0.41268235871031017, "grad_norm": 1.354913272427083, "learning_rate": 6.626795285981235e-06, "loss": 0.7309, "step": 13465 }, { "epoch": 0.4127130072330514, "grad_norm": 1.17296404727694, "learning_rate": 6.6263259645107305e-06, "loss": 0.6562, "step": 13466 }, { "epoch": 0.4127436557557926, "grad_norm": 1.1222845628144418, "learning_rate": 6.625856627015621e-06, "loss": 0.6564, "step": 13467 }, { "epoch": 0.4127743042785338, "grad_norm": 0.5315738917804135, "learning_rate": 6.6253872735005296e-06, "loss": 0.4369, "step": 13468 }, { "epoch": 0.412804952801275, "grad_norm": 1.1007047261936496, "learning_rate": 6.624917903970084e-06, "loss": 0.6399, "step": 13469 }, { "epoch": 0.4128356013240162, "grad_norm": 1.2507884247656846, "learning_rate": 6.624448518428905e-06, "loss": 0.6912, "step": 13470 }, { "epoch": 0.4128662498467574, "grad_norm": 1.235165167313142, "learning_rate": 6.6239791168816195e-06, "loss": 0.6234, "step": 13471 }, { "epoch": 0.4128968983694986, "grad_norm": 1.1250697208676177, "learning_rate": 6.623509699332851e-06, "loss": 0.6972, "step": 13472 }, { "epoch": 0.4129275468922398, "grad_norm": 0.4538313021194359, "learning_rate": 6.623040265787227e-06, "loss": 0.4149, "step": 13473 }, { "epoch": 0.412958195414981, "grad_norm": 1.3522777454969033, "learning_rate": 6.6225708162493715e-06, "loss": 0.7177, "step": 13474 }, { "epoch": 0.4129888439377222, "grad_norm": 1.1809823520792933, "learning_rate": 6.6221013507239105e-06, "loss": 0.6442, "step": 13475 }, { "epoch": 0.41301949246046343, "grad_norm": 1.27968483983853, "learning_rate": 6.621631869215471e-06, "loss": 0.6425, "step": 13476 }, { "epoch": 0.4130501409832046, "grad_norm": 1.2089169136593165, "learning_rate": 6.621162371728678e-06, "loss": 0.6899, "step": 13477 }, { "epoch": 0.4130807895059458, "grad_norm": 1.2905056451393684, "learning_rate": 6.620692858268156e-06, "loss": 0.5535, "step": 13478 }, { "epoch": 0.413111438028687, "grad_norm": 1.0759328535217185, "learning_rate": 6.6202233288385335e-06, "loss": 0.5898, "step": 13479 }, { "epoch": 0.4131420865514282, "grad_norm": 1.3058241310593615, "learning_rate": 6.619753783444435e-06, "loss": 0.6799, "step": 13480 }, { "epoch": 0.4131727350741694, "grad_norm": 1.8065576755889483, "learning_rate": 6.6192842220904886e-06, "loss": 0.6412, "step": 13481 }, { "epoch": 0.4132033835969106, "grad_norm": 1.3061338230687862, "learning_rate": 6.61881464478132e-06, "loss": 0.6765, "step": 13482 }, { "epoch": 0.4132340321196518, "grad_norm": 1.1477228001202486, "learning_rate": 6.618345051521558e-06, "loss": 0.6041, "step": 13483 }, { "epoch": 0.413264680642393, "grad_norm": 1.2898780632877813, "learning_rate": 6.617875442315827e-06, "loss": 0.6742, "step": 13484 }, { "epoch": 0.41329532916513423, "grad_norm": 1.2541389820885016, "learning_rate": 6.617405817168755e-06, "loss": 0.6359, "step": 13485 }, { "epoch": 0.41332597768787543, "grad_norm": 1.1729568711144853, "learning_rate": 6.616936176084969e-06, "loss": 0.7047, "step": 13486 }, { "epoch": 0.41335662621061664, "grad_norm": 1.1024774498932488, "learning_rate": 6.616466519069099e-06, "loss": 0.5415, "step": 13487 }, { "epoch": 0.41338727473335785, "grad_norm": 1.2210458617520688, "learning_rate": 6.61599684612577e-06, "loss": 0.6601, "step": 13488 }, { "epoch": 0.41341792325609905, "grad_norm": 2.5860727313881324, "learning_rate": 6.615527157259611e-06, "loss": 0.7142, "step": 13489 }, { "epoch": 0.41344857177884026, "grad_norm": 1.2477623109199432, "learning_rate": 6.615057452475249e-06, "loss": 0.6687, "step": 13490 }, { "epoch": 0.41347922030158146, "grad_norm": 1.3120080742874913, "learning_rate": 6.6145877317773135e-06, "loss": 0.7123, "step": 13491 }, { "epoch": 0.41350986882432267, "grad_norm": 0.5350346600325798, "learning_rate": 6.614117995170431e-06, "loss": 0.4199, "step": 13492 }, { "epoch": 0.4135405173470639, "grad_norm": 1.1561184325668998, "learning_rate": 6.613648242659232e-06, "loss": 0.6974, "step": 13493 }, { "epoch": 0.4135711658698051, "grad_norm": 1.31373660580392, "learning_rate": 6.613178474248342e-06, "loss": 0.7188, "step": 13494 }, { "epoch": 0.4136018143925463, "grad_norm": 1.238955349825678, "learning_rate": 6.6127086899423935e-06, "loss": 0.6398, "step": 13495 }, { "epoch": 0.4136324629152875, "grad_norm": 1.319417952797377, "learning_rate": 6.612238889746013e-06, "loss": 0.6865, "step": 13496 }, { "epoch": 0.4136631114380287, "grad_norm": 1.214540065451845, "learning_rate": 6.611769073663831e-06, "loss": 0.6077, "step": 13497 }, { "epoch": 0.4136937599607699, "grad_norm": 1.3200549580508012, "learning_rate": 6.611299241700474e-06, "loss": 0.7432, "step": 13498 }, { "epoch": 0.4137244084835111, "grad_norm": 1.1931246410344045, "learning_rate": 6.610829393860575e-06, "loss": 0.5757, "step": 13499 }, { "epoch": 0.4137550570062523, "grad_norm": 1.1912936476955434, "learning_rate": 6.6103595301487625e-06, "loss": 0.6597, "step": 13500 }, { "epoch": 0.4137857055289935, "grad_norm": 1.2849278344839639, "learning_rate": 6.609889650569663e-06, "loss": 0.6622, "step": 13501 }, { "epoch": 0.4138163540517347, "grad_norm": 1.2435200795106922, "learning_rate": 6.609419755127911e-06, "loss": 0.6219, "step": 13502 }, { "epoch": 0.41384700257447593, "grad_norm": 1.2607811612624145, "learning_rate": 6.608949843828132e-06, "loss": 0.75, "step": 13503 }, { "epoch": 0.41387765109721714, "grad_norm": 1.2214452941201621, "learning_rate": 6.6084799166749615e-06, "loss": 0.6485, "step": 13504 }, { "epoch": 0.41390829961995834, "grad_norm": 1.246173185085109, "learning_rate": 6.608009973673025e-06, "loss": 0.6435, "step": 13505 }, { "epoch": 0.41393894814269955, "grad_norm": 1.1087465991655925, "learning_rate": 6.607540014826956e-06, "loss": 0.653, "step": 13506 }, { "epoch": 0.41396959666544075, "grad_norm": 0.48882725300121826, "learning_rate": 6.607070040141382e-06, "loss": 0.416, "step": 13507 }, { "epoch": 0.4140002451881819, "grad_norm": 1.1566105009403995, "learning_rate": 6.606600049620938e-06, "loss": 0.6022, "step": 13508 }, { "epoch": 0.4140308937109231, "grad_norm": 1.3524655988790573, "learning_rate": 6.606130043270251e-06, "loss": 0.7729, "step": 13509 }, { "epoch": 0.4140615422336643, "grad_norm": 1.273679443061751, "learning_rate": 6.6056600210939544e-06, "loss": 0.7455, "step": 13510 }, { "epoch": 0.4140921907564055, "grad_norm": 1.1642133028972166, "learning_rate": 6.605189983096678e-06, "loss": 0.7361, "step": 13511 }, { "epoch": 0.4141228392791467, "grad_norm": 1.0978170607929953, "learning_rate": 6.604719929283056e-06, "loss": 0.6515, "step": 13512 }, { "epoch": 0.41415348780188793, "grad_norm": 1.2542236361338075, "learning_rate": 6.604249859657717e-06, "loss": 0.6326, "step": 13513 }, { "epoch": 0.41418413632462914, "grad_norm": 1.0260758840750512, "learning_rate": 6.603779774225292e-06, "loss": 0.6332, "step": 13514 }, { "epoch": 0.41421478484737034, "grad_norm": 1.3038667795444472, "learning_rate": 6.6033096729904164e-06, "loss": 0.6586, "step": 13515 }, { "epoch": 0.41424543337011155, "grad_norm": 1.237273987536897, "learning_rate": 6.60283955595772e-06, "loss": 0.7765, "step": 13516 }, { "epoch": 0.41427608189285275, "grad_norm": 1.1485588248821088, "learning_rate": 6.602369423131836e-06, "loss": 0.6759, "step": 13517 }, { "epoch": 0.41430673041559396, "grad_norm": 1.1565114820219096, "learning_rate": 6.601899274517394e-06, "loss": 0.6077, "step": 13518 }, { "epoch": 0.41433737893833517, "grad_norm": 1.1938144352255635, "learning_rate": 6.601429110119031e-06, "loss": 0.6627, "step": 13519 }, { "epoch": 0.41436802746107637, "grad_norm": 1.303880816708954, "learning_rate": 6.600958929941376e-06, "loss": 0.6172, "step": 13520 }, { "epoch": 0.4143986759838176, "grad_norm": 1.254094172245438, "learning_rate": 6.600488733989064e-06, "loss": 0.6174, "step": 13521 }, { "epoch": 0.4144293245065588, "grad_norm": 1.0596622741818054, "learning_rate": 6.600018522266724e-06, "loss": 0.6171, "step": 13522 }, { "epoch": 0.4144599730293, "grad_norm": 1.4140764103928578, "learning_rate": 6.599548294778996e-06, "loss": 0.6036, "step": 13523 }, { "epoch": 0.4144906215520412, "grad_norm": 1.3643074098186978, "learning_rate": 6.599078051530506e-06, "loss": 0.6792, "step": 13524 }, { "epoch": 0.4145212700747824, "grad_norm": 1.3464480753478614, "learning_rate": 6.598607792525893e-06, "loss": 0.6421, "step": 13525 }, { "epoch": 0.4145519185975236, "grad_norm": 0.4968427861064154, "learning_rate": 6.598137517769787e-06, "loss": 0.412, "step": 13526 }, { "epoch": 0.4145825671202648, "grad_norm": 0.4694570621110299, "learning_rate": 6.597667227266825e-06, "loss": 0.4285, "step": 13527 }, { "epoch": 0.414613215643006, "grad_norm": 1.1482309921012162, "learning_rate": 6.597196921021638e-06, "loss": 0.6843, "step": 13528 }, { "epoch": 0.4146438641657472, "grad_norm": 1.1179113169590988, "learning_rate": 6.5967265990388605e-06, "loss": 0.7149, "step": 13529 }, { "epoch": 0.41467451268848843, "grad_norm": 1.1751806294869935, "learning_rate": 6.596256261323128e-06, "loss": 0.633, "step": 13530 }, { "epoch": 0.41470516121122963, "grad_norm": 1.1478318866939763, "learning_rate": 6.595785907879074e-06, "loss": 0.6434, "step": 13531 }, { "epoch": 0.41473580973397084, "grad_norm": 1.3331824504856367, "learning_rate": 6.595315538711334e-06, "loss": 0.7903, "step": 13532 }, { "epoch": 0.41476645825671205, "grad_norm": 0.4260582937520403, "learning_rate": 6.5948451538245406e-06, "loss": 0.4331, "step": 13533 }, { "epoch": 0.41479710677945325, "grad_norm": 1.1474862671088009, "learning_rate": 6.5943747532233305e-06, "loss": 0.5811, "step": 13534 }, { "epoch": 0.41482775530219446, "grad_norm": 1.2856913468878808, "learning_rate": 6.593904336912338e-06, "loss": 0.6533, "step": 13535 }, { "epoch": 0.41485840382493566, "grad_norm": 0.4548021796682381, "learning_rate": 6.5934339048961986e-06, "loss": 0.4287, "step": 13536 }, { "epoch": 0.41488905234767687, "grad_norm": 1.1100923473678785, "learning_rate": 6.592963457179546e-06, "loss": 0.6415, "step": 13537 }, { "epoch": 0.4149197008704181, "grad_norm": 1.2644889649180602, "learning_rate": 6.592492993767017e-06, "loss": 0.5658, "step": 13538 }, { "epoch": 0.4149503493931592, "grad_norm": 1.3139342857153067, "learning_rate": 6.592022514663248e-06, "loss": 0.6601, "step": 13539 }, { "epoch": 0.41498099791590043, "grad_norm": 0.466519692834372, "learning_rate": 6.591552019872872e-06, "loss": 0.4524, "step": 13540 }, { "epoch": 0.41501164643864163, "grad_norm": 1.1510351195840633, "learning_rate": 6.591081509400529e-06, "loss": 0.6788, "step": 13541 }, { "epoch": 0.41504229496138284, "grad_norm": 1.268036652735504, "learning_rate": 6.590610983250853e-06, "loss": 0.7162, "step": 13542 }, { "epoch": 0.41507294348412405, "grad_norm": 1.2245481185149512, "learning_rate": 6.590140441428479e-06, "loss": 0.6129, "step": 13543 }, { "epoch": 0.41510359200686525, "grad_norm": 1.2400603833902175, "learning_rate": 6.589669883938043e-06, "loss": 0.6744, "step": 13544 }, { "epoch": 0.41513424052960646, "grad_norm": 1.328939599586465, "learning_rate": 6.5891993107841846e-06, "loss": 0.6658, "step": 13545 }, { "epoch": 0.41516488905234766, "grad_norm": 1.1527862357979375, "learning_rate": 6.588728721971538e-06, "loss": 0.6655, "step": 13546 }, { "epoch": 0.41519553757508887, "grad_norm": 1.24390601892023, "learning_rate": 6.588258117504742e-06, "loss": 0.6508, "step": 13547 }, { "epoch": 0.4152261860978301, "grad_norm": 1.156028703729921, "learning_rate": 6.587787497388431e-06, "loss": 0.6451, "step": 13548 }, { "epoch": 0.4152568346205713, "grad_norm": 1.16071644270755, "learning_rate": 6.5873168616272445e-06, "loss": 0.6182, "step": 13549 }, { "epoch": 0.4152874831433125, "grad_norm": 1.3294991852350362, "learning_rate": 6.586846210225819e-06, "loss": 0.7486, "step": 13550 }, { "epoch": 0.4153181316660537, "grad_norm": 1.254110547795503, "learning_rate": 6.586375543188791e-06, "loss": 0.5952, "step": 13551 }, { "epoch": 0.4153487801887949, "grad_norm": 1.1650898034940678, "learning_rate": 6.585904860520798e-06, "loss": 0.6742, "step": 13552 }, { "epoch": 0.4153794287115361, "grad_norm": 0.5256732908714503, "learning_rate": 6.58543416222648e-06, "loss": 0.4233, "step": 13553 }, { "epoch": 0.4154100772342773, "grad_norm": 1.3642054677708817, "learning_rate": 6.584963448310474e-06, "loss": 0.6006, "step": 13554 }, { "epoch": 0.4154407257570185, "grad_norm": 1.2589219235949287, "learning_rate": 6.5844927187774164e-06, "loss": 0.6743, "step": 13555 }, { "epoch": 0.4154713742797597, "grad_norm": 0.4579610960789029, "learning_rate": 6.5840219736319475e-06, "loss": 0.4273, "step": 13556 }, { "epoch": 0.4155020228025009, "grad_norm": 1.921421381783818, "learning_rate": 6.583551212878704e-06, "loss": 0.7032, "step": 13557 }, { "epoch": 0.41553267132524213, "grad_norm": 1.2544575381180914, "learning_rate": 6.5830804365223266e-06, "loss": 0.5717, "step": 13558 }, { "epoch": 0.41556331984798334, "grad_norm": 1.3563875199272133, "learning_rate": 6.58260964456745e-06, "loss": 0.7219, "step": 13559 }, { "epoch": 0.41559396837072454, "grad_norm": 1.354210480767231, "learning_rate": 6.582138837018719e-06, "loss": 0.7091, "step": 13560 }, { "epoch": 0.41562461689346575, "grad_norm": 1.2507943231516956, "learning_rate": 6.581668013880767e-06, "loss": 0.6728, "step": 13561 }, { "epoch": 0.41565526541620695, "grad_norm": 1.172375192092373, "learning_rate": 6.581197175158236e-06, "loss": 0.6848, "step": 13562 }, { "epoch": 0.41568591393894816, "grad_norm": 1.3267539876130892, "learning_rate": 6.580726320855765e-06, "loss": 0.5958, "step": 13563 }, { "epoch": 0.41571656246168937, "grad_norm": 0.531479014062383, "learning_rate": 6.580255450977992e-06, "loss": 0.4194, "step": 13564 }, { "epoch": 0.41574721098443057, "grad_norm": 1.2654500383722216, "learning_rate": 6.579784565529558e-06, "loss": 0.7745, "step": 13565 }, { "epoch": 0.4157778595071718, "grad_norm": 2.0458765926344786, "learning_rate": 6.579313664515103e-06, "loss": 0.5929, "step": 13566 }, { "epoch": 0.415808508029913, "grad_norm": 1.2790863636107175, "learning_rate": 6.578842747939267e-06, "loss": 0.6496, "step": 13567 }, { "epoch": 0.4158391565526542, "grad_norm": 1.0726701572521602, "learning_rate": 6.578371815806689e-06, "loss": 0.625, "step": 13568 }, { "epoch": 0.4158698050753954, "grad_norm": 1.2557244561626675, "learning_rate": 6.5779008681220095e-06, "loss": 0.7391, "step": 13569 }, { "epoch": 0.41590045359813654, "grad_norm": 0.4512613213630395, "learning_rate": 6.577429904889868e-06, "loss": 0.4185, "step": 13570 }, { "epoch": 0.41593110212087775, "grad_norm": 0.4739241921879446, "learning_rate": 6.576958926114907e-06, "loss": 0.4401, "step": 13571 }, { "epoch": 0.41596175064361895, "grad_norm": 0.46037570955004287, "learning_rate": 6.576487931801766e-06, "loss": 0.4331, "step": 13572 }, { "epoch": 0.41599239916636016, "grad_norm": 1.5454598509315192, "learning_rate": 6.576016921955087e-06, "loss": 0.643, "step": 13573 }, { "epoch": 0.41602304768910137, "grad_norm": 1.1083487360990598, "learning_rate": 6.575545896579509e-06, "loss": 0.6264, "step": 13574 }, { "epoch": 0.41605369621184257, "grad_norm": 1.1809946185408693, "learning_rate": 6.575074855679675e-06, "loss": 0.5528, "step": 13575 }, { "epoch": 0.4160843447345838, "grad_norm": 1.2774526741554943, "learning_rate": 6.574603799260224e-06, "loss": 0.6541, "step": 13576 }, { "epoch": 0.416114993257325, "grad_norm": 1.1972153429451815, "learning_rate": 6.574132727325801e-06, "loss": 0.6832, "step": 13577 }, { "epoch": 0.4161456417800662, "grad_norm": 1.2784266356904517, "learning_rate": 6.5736616398810436e-06, "loss": 0.7533, "step": 13578 }, { "epoch": 0.4161762903028074, "grad_norm": 1.3004059791632598, "learning_rate": 6.573190536930596e-06, "loss": 0.6472, "step": 13579 }, { "epoch": 0.4162069388255486, "grad_norm": 1.3760154814815528, "learning_rate": 6.5727194184790985e-06, "loss": 0.6651, "step": 13580 }, { "epoch": 0.4162375873482898, "grad_norm": 1.229655459922146, "learning_rate": 6.572248284531196e-06, "loss": 0.6869, "step": 13581 }, { "epoch": 0.416268235871031, "grad_norm": 1.2738368374011448, "learning_rate": 6.571777135091528e-06, "loss": 0.6365, "step": 13582 }, { "epoch": 0.4162988843937722, "grad_norm": 1.3120414719253695, "learning_rate": 6.571305970164737e-06, "loss": 0.5857, "step": 13583 }, { "epoch": 0.4163295329165134, "grad_norm": 1.1237329372586435, "learning_rate": 6.570834789755468e-06, "loss": 0.5896, "step": 13584 }, { "epoch": 0.41636018143925463, "grad_norm": 1.13310083515823, "learning_rate": 6.570363593868361e-06, "loss": 0.6657, "step": 13585 }, { "epoch": 0.41639082996199583, "grad_norm": 1.282742660461406, "learning_rate": 6.569892382508061e-06, "loss": 0.735, "step": 13586 }, { "epoch": 0.41642147848473704, "grad_norm": 1.492535162083994, "learning_rate": 6.569421155679207e-06, "loss": 0.7094, "step": 13587 }, { "epoch": 0.41645212700747825, "grad_norm": 1.1414730386422152, "learning_rate": 6.568949913386446e-06, "loss": 0.5694, "step": 13588 }, { "epoch": 0.41648277553021945, "grad_norm": 1.1852130023823286, "learning_rate": 6.56847865563442e-06, "loss": 0.6272, "step": 13589 }, { "epoch": 0.41651342405296066, "grad_norm": 1.243957549782723, "learning_rate": 6.568007382427773e-06, "loss": 0.7094, "step": 13590 }, { "epoch": 0.41654407257570186, "grad_norm": 1.4999399015462875, "learning_rate": 6.567536093771147e-06, "loss": 0.6033, "step": 13591 }, { "epoch": 0.41657472109844307, "grad_norm": 1.0864489316162558, "learning_rate": 6.5670647896691885e-06, "loss": 0.6444, "step": 13592 }, { "epoch": 0.4166053696211843, "grad_norm": 1.2211368352249299, "learning_rate": 6.5665934701265384e-06, "loss": 0.668, "step": 13593 }, { "epoch": 0.4166360181439255, "grad_norm": 1.307974536506657, "learning_rate": 6.566122135147843e-06, "loss": 0.7272, "step": 13594 }, { "epoch": 0.4166666666666667, "grad_norm": 1.392897670839643, "learning_rate": 6.565650784737745e-06, "loss": 0.5821, "step": 13595 }, { "epoch": 0.4166973151894079, "grad_norm": 1.4957882095830541, "learning_rate": 6.565179418900889e-06, "loss": 0.7243, "step": 13596 }, { "epoch": 0.4167279637121491, "grad_norm": 1.2747354936677144, "learning_rate": 6.56470803764192e-06, "loss": 0.6192, "step": 13597 }, { "epoch": 0.4167586122348903, "grad_norm": 1.202193795633667, "learning_rate": 6.5642366409654826e-06, "loss": 0.6136, "step": 13598 }, { "epoch": 0.4167892607576315, "grad_norm": 1.4040835018674664, "learning_rate": 6.56376522887622e-06, "loss": 0.6672, "step": 13599 }, { "epoch": 0.4168199092803727, "grad_norm": 1.324192225188658, "learning_rate": 6.563293801378781e-06, "loss": 0.7265, "step": 13600 }, { "epoch": 0.41685055780311386, "grad_norm": 0.7743397142261088, "learning_rate": 6.562822358477806e-06, "loss": 0.4175, "step": 13601 }, { "epoch": 0.41688120632585507, "grad_norm": 1.196943083724565, "learning_rate": 6.562350900177943e-06, "loss": 0.6344, "step": 13602 }, { "epoch": 0.4169118548485963, "grad_norm": 1.1680773689665025, "learning_rate": 6.5618794264838374e-06, "loss": 0.6107, "step": 13603 }, { "epoch": 0.4169425033713375, "grad_norm": 1.403906342239964, "learning_rate": 6.561407937400132e-06, "loss": 0.7529, "step": 13604 }, { "epoch": 0.4169731518940787, "grad_norm": 1.129350018045054, "learning_rate": 6.560936432931477e-06, "loss": 0.585, "step": 13605 }, { "epoch": 0.4170038004168199, "grad_norm": 1.1582335212603772, "learning_rate": 6.560464913082515e-06, "loss": 0.6086, "step": 13606 }, { "epoch": 0.4170344489395611, "grad_norm": 1.144827602649093, "learning_rate": 6.559993377857894e-06, "loss": 0.745, "step": 13607 }, { "epoch": 0.4170650974623023, "grad_norm": 1.3532981782045161, "learning_rate": 6.5595218272622585e-06, "loss": 0.7033, "step": 13608 }, { "epoch": 0.4170957459850435, "grad_norm": 1.2463159590554858, "learning_rate": 6.559050261300255e-06, "loss": 0.723, "step": 13609 }, { "epoch": 0.4171263945077847, "grad_norm": 0.5107646985327986, "learning_rate": 6.55857867997653e-06, "loss": 0.4211, "step": 13610 }, { "epoch": 0.4171570430305259, "grad_norm": 1.433079233764029, "learning_rate": 6.558107083295731e-06, "loss": 0.7384, "step": 13611 }, { "epoch": 0.4171876915532671, "grad_norm": 1.1662619051200593, "learning_rate": 6.557635471262506e-06, "loss": 0.686, "step": 13612 }, { "epoch": 0.41721834007600833, "grad_norm": 1.1998467021874943, "learning_rate": 6.557163843881498e-06, "loss": 0.6963, "step": 13613 }, { "epoch": 0.41724898859874954, "grad_norm": 1.4353974683854518, "learning_rate": 6.556692201157356e-06, "loss": 0.6309, "step": 13614 }, { "epoch": 0.41727963712149074, "grad_norm": 0.4884706550439389, "learning_rate": 6.556220543094728e-06, "loss": 0.4241, "step": 13615 }, { "epoch": 0.41731028564423195, "grad_norm": 1.1633622254656075, "learning_rate": 6.555748869698262e-06, "loss": 0.5878, "step": 13616 }, { "epoch": 0.41734093416697315, "grad_norm": 1.141356554414351, "learning_rate": 6.5552771809726034e-06, "loss": 0.6746, "step": 13617 }, { "epoch": 0.41737158268971436, "grad_norm": 1.2697993533583851, "learning_rate": 6.554805476922401e-06, "loss": 0.6553, "step": 13618 }, { "epoch": 0.41740223121245557, "grad_norm": 1.1743913110467916, "learning_rate": 6.554333757552302e-06, "loss": 0.7167, "step": 13619 }, { "epoch": 0.41743287973519677, "grad_norm": 1.1134973112962288, "learning_rate": 6.553862022866956e-06, "loss": 0.5983, "step": 13620 }, { "epoch": 0.417463528257938, "grad_norm": 1.3703271270244772, "learning_rate": 6.5533902728710075e-06, "loss": 0.604, "step": 13621 }, { "epoch": 0.4174941767806792, "grad_norm": 1.2870280473847167, "learning_rate": 6.5529185075691095e-06, "loss": 0.5634, "step": 13622 }, { "epoch": 0.4175248253034204, "grad_norm": 1.4892754929074608, "learning_rate": 6.552446726965907e-06, "loss": 0.7169, "step": 13623 }, { "epoch": 0.4175554738261616, "grad_norm": 1.1738977387209284, "learning_rate": 6.55197493106605e-06, "loss": 0.588, "step": 13624 }, { "epoch": 0.4175861223489028, "grad_norm": 1.3444131801620878, "learning_rate": 6.551503119874186e-06, "loss": 0.7405, "step": 13625 }, { "epoch": 0.417616770871644, "grad_norm": 1.2005660902376338, "learning_rate": 6.551031293394965e-06, "loss": 0.6706, "step": 13626 }, { "epoch": 0.4176474193943852, "grad_norm": 1.2525167363989456, "learning_rate": 6.5505594516330385e-06, "loss": 0.7545, "step": 13627 }, { "epoch": 0.4176780679171264, "grad_norm": 1.296952371427288, "learning_rate": 6.55008759459305e-06, "loss": 0.7355, "step": 13628 }, { "epoch": 0.4177087164398676, "grad_norm": 1.1918863669780517, "learning_rate": 6.549615722279652e-06, "loss": 0.6765, "step": 13629 }, { "epoch": 0.41773936496260883, "grad_norm": 1.4918589986292081, "learning_rate": 6.5491438346974945e-06, "loss": 0.6949, "step": 13630 }, { "epoch": 0.41777001348535003, "grad_norm": 0.49893324950149265, "learning_rate": 6.548671931851227e-06, "loss": 0.4095, "step": 13631 }, { "epoch": 0.4178006620080912, "grad_norm": 1.2593991980590666, "learning_rate": 6.5482000137454985e-06, "loss": 0.6218, "step": 13632 }, { "epoch": 0.4178313105308324, "grad_norm": 1.4442626270661092, "learning_rate": 6.547728080384959e-06, "loss": 0.6449, "step": 13633 }, { "epoch": 0.4178619590535736, "grad_norm": 1.130452417818528, "learning_rate": 6.547256131774258e-06, "loss": 0.6893, "step": 13634 }, { "epoch": 0.4178926075763148, "grad_norm": 1.2275860693452147, "learning_rate": 6.546784167918047e-06, "loss": 0.6179, "step": 13635 }, { "epoch": 0.417923256099056, "grad_norm": 1.1448412955772809, "learning_rate": 6.546312188820976e-06, "loss": 0.6702, "step": 13636 }, { "epoch": 0.4179539046217972, "grad_norm": 1.183979335343488, "learning_rate": 6.545840194487694e-06, "loss": 0.6598, "step": 13637 }, { "epoch": 0.4179845531445384, "grad_norm": 1.2929696068359384, "learning_rate": 6.545368184922855e-06, "loss": 0.7072, "step": 13638 }, { "epoch": 0.4180152016672796, "grad_norm": 0.4471122007534135, "learning_rate": 6.5448961601311066e-06, "loss": 0.414, "step": 13639 }, { "epoch": 0.41804585019002083, "grad_norm": 0.43800258511362244, "learning_rate": 6.544424120117103e-06, "loss": 0.4095, "step": 13640 }, { "epoch": 0.41807649871276203, "grad_norm": 1.457346140269064, "learning_rate": 6.54395206488549e-06, "loss": 0.7036, "step": 13641 }, { "epoch": 0.41810714723550324, "grad_norm": 1.2966557137191814, "learning_rate": 6.543479994440926e-06, "loss": 0.7344, "step": 13642 }, { "epoch": 0.41813779575824445, "grad_norm": 1.245481637138567, "learning_rate": 6.543007908788057e-06, "loss": 0.7111, "step": 13643 }, { "epoch": 0.41816844428098565, "grad_norm": 0.45260047190516894, "learning_rate": 6.542535807931536e-06, "loss": 0.4191, "step": 13644 }, { "epoch": 0.41819909280372686, "grad_norm": 0.45000112962351224, "learning_rate": 6.542063691876015e-06, "loss": 0.4466, "step": 13645 }, { "epoch": 0.41822974132646806, "grad_norm": 1.2419419732887758, "learning_rate": 6.541591560626147e-06, "loss": 0.6017, "step": 13646 }, { "epoch": 0.41826038984920927, "grad_norm": 1.1709733870435548, "learning_rate": 6.5411194141865804e-06, "loss": 0.7368, "step": 13647 }, { "epoch": 0.4182910383719505, "grad_norm": 1.320208584709851, "learning_rate": 6.540647252561972e-06, "loss": 0.6425, "step": 13648 }, { "epoch": 0.4183216868946917, "grad_norm": 1.3443720818820968, "learning_rate": 6.540175075756971e-06, "loss": 0.6776, "step": 13649 }, { "epoch": 0.4183523354174329, "grad_norm": 1.1364409198045209, "learning_rate": 6.539702883776232e-06, "loss": 0.6464, "step": 13650 }, { "epoch": 0.4183829839401741, "grad_norm": 1.2489499339260124, "learning_rate": 6.539230676624406e-06, "loss": 0.6459, "step": 13651 }, { "epoch": 0.4184136324629153, "grad_norm": 1.3506465047283691, "learning_rate": 6.538758454306147e-06, "loss": 0.6623, "step": 13652 }, { "epoch": 0.4184442809856565, "grad_norm": 1.2444654188789641, "learning_rate": 6.538286216826107e-06, "loss": 0.6883, "step": 13653 }, { "epoch": 0.4184749295083977, "grad_norm": 1.3690139478224403, "learning_rate": 6.537813964188938e-06, "loss": 0.7228, "step": 13654 }, { "epoch": 0.4185055780311389, "grad_norm": 1.2437043400583696, "learning_rate": 6.537341696399295e-06, "loss": 0.5912, "step": 13655 }, { "epoch": 0.4185362265538801, "grad_norm": 1.2614137948947763, "learning_rate": 6.536869413461832e-06, "loss": 0.6714, "step": 13656 }, { "epoch": 0.4185668750766213, "grad_norm": 1.344828544316894, "learning_rate": 6.5363971153812e-06, "loss": 0.7181, "step": 13657 }, { "epoch": 0.41859752359936253, "grad_norm": 1.121051002977855, "learning_rate": 6.5359248021620556e-06, "loss": 0.6391, "step": 13658 }, { "epoch": 0.41862817212210374, "grad_norm": 0.4831671625075062, "learning_rate": 6.5354524738090505e-06, "loss": 0.4139, "step": 13659 }, { "epoch": 0.41865882064484494, "grad_norm": 1.3909894510341831, "learning_rate": 6.534980130326839e-06, "loss": 0.8209, "step": 13660 }, { "epoch": 0.41868946916758615, "grad_norm": 1.2438253481293782, "learning_rate": 6.534507771720076e-06, "loss": 0.6793, "step": 13661 }, { "epoch": 0.41872011769032735, "grad_norm": 1.1809248038195195, "learning_rate": 6.534035397993415e-06, "loss": 0.6054, "step": 13662 }, { "epoch": 0.4187507662130685, "grad_norm": 1.2616441151132622, "learning_rate": 6.53356300915151e-06, "loss": 0.6858, "step": 13663 }, { "epoch": 0.4187814147358097, "grad_norm": 1.151411002824001, "learning_rate": 6.533090605199017e-06, "loss": 0.666, "step": 13664 }, { "epoch": 0.4188120632585509, "grad_norm": 1.239704361837584, "learning_rate": 6.532618186140591e-06, "loss": 0.6672, "step": 13665 }, { "epoch": 0.4188427117812921, "grad_norm": 1.2814535178692534, "learning_rate": 6.5321457519808855e-06, "loss": 0.6235, "step": 13666 }, { "epoch": 0.4188733603040333, "grad_norm": 1.2335564819147917, "learning_rate": 6.531673302724555e-06, "loss": 0.7459, "step": 13667 }, { "epoch": 0.41890400882677453, "grad_norm": 1.1564720447441372, "learning_rate": 6.531200838376255e-06, "loss": 0.6403, "step": 13668 }, { "epoch": 0.41893465734951574, "grad_norm": 1.314012911955685, "learning_rate": 6.5307283589406425e-06, "loss": 0.6758, "step": 13669 }, { "epoch": 0.41896530587225694, "grad_norm": 1.1889134660329614, "learning_rate": 6.530255864422372e-06, "loss": 0.64, "step": 13670 }, { "epoch": 0.41899595439499815, "grad_norm": 1.2945573970369022, "learning_rate": 6.529783354826098e-06, "loss": 0.6574, "step": 13671 }, { "epoch": 0.41902660291773935, "grad_norm": 1.1225805461763096, "learning_rate": 6.529310830156479e-06, "loss": 0.6315, "step": 13672 }, { "epoch": 0.41905725144048056, "grad_norm": 1.3262116407348494, "learning_rate": 6.5288382904181665e-06, "loss": 0.6135, "step": 13673 }, { "epoch": 0.41908789996322177, "grad_norm": 0.4569305662260225, "learning_rate": 6.528365735615822e-06, "loss": 0.4228, "step": 13674 }, { "epoch": 0.41911854848596297, "grad_norm": 1.1885290775764303, "learning_rate": 6.527893165754097e-06, "loss": 0.684, "step": 13675 }, { "epoch": 0.4191491970087042, "grad_norm": 1.2300468948284675, "learning_rate": 6.5274205808376504e-06, "loss": 0.692, "step": 13676 }, { "epoch": 0.4191798455314454, "grad_norm": 1.2453578977568576, "learning_rate": 6.526947980871137e-06, "loss": 0.6986, "step": 13677 }, { "epoch": 0.4192104940541866, "grad_norm": 1.2553321595274722, "learning_rate": 6.526475365859215e-06, "loss": 0.6732, "step": 13678 }, { "epoch": 0.4192411425769278, "grad_norm": 0.45539777589718644, "learning_rate": 6.526002735806541e-06, "loss": 0.4419, "step": 13679 }, { "epoch": 0.419271791099669, "grad_norm": 1.3716282699820053, "learning_rate": 6.525530090717771e-06, "loss": 0.6278, "step": 13680 }, { "epoch": 0.4193024396224102, "grad_norm": 1.260169317669216, "learning_rate": 6.5250574305975635e-06, "loss": 0.7114, "step": 13681 }, { "epoch": 0.4193330881451514, "grad_norm": 1.2429203155684323, "learning_rate": 6.524584755450573e-06, "loss": 0.6034, "step": 13682 }, { "epoch": 0.4193637366678926, "grad_norm": 0.47240460363892195, "learning_rate": 6.524112065281461e-06, "loss": 0.4562, "step": 13683 }, { "epoch": 0.4193943851906338, "grad_norm": 1.204829993237447, "learning_rate": 6.523639360094882e-06, "loss": 0.6705, "step": 13684 }, { "epoch": 0.41942503371337503, "grad_norm": 1.277856723508404, "learning_rate": 6.523166639895496e-06, "loss": 0.6138, "step": 13685 }, { "epoch": 0.41945568223611623, "grad_norm": 1.2446301942928877, "learning_rate": 6.522693904687958e-06, "loss": 0.7046, "step": 13686 }, { "epoch": 0.41948633075885744, "grad_norm": 0.4623951745855334, "learning_rate": 6.522221154476927e-06, "loss": 0.4512, "step": 13687 }, { "epoch": 0.41951697928159865, "grad_norm": 1.2517163384021661, "learning_rate": 6.521748389267062e-06, "loss": 0.6086, "step": 13688 }, { "epoch": 0.41954762780433985, "grad_norm": 1.3472296573533704, "learning_rate": 6.521275609063021e-06, "loss": 0.708, "step": 13689 }, { "epoch": 0.41957827632708106, "grad_norm": 1.3265698820901035, "learning_rate": 6.520802813869463e-06, "loss": 0.6176, "step": 13690 }, { "epoch": 0.41960892484982226, "grad_norm": 1.3940768581510445, "learning_rate": 6.520330003691045e-06, "loss": 0.6616, "step": 13691 }, { "epoch": 0.41963957337256347, "grad_norm": 1.3021005488484003, "learning_rate": 6.519857178532424e-06, "loss": 0.6138, "step": 13692 }, { "epoch": 0.4196702218953047, "grad_norm": 1.3699257062832704, "learning_rate": 6.519384338398263e-06, "loss": 0.6374, "step": 13693 }, { "epoch": 0.4197008704180458, "grad_norm": 1.1698119092894341, "learning_rate": 6.518911483293221e-06, "loss": 0.6542, "step": 13694 }, { "epoch": 0.41973151894078703, "grad_norm": 1.2094049379192957, "learning_rate": 6.5184386132219535e-06, "loss": 0.6403, "step": 13695 }, { "epoch": 0.41976216746352824, "grad_norm": 1.4797099950290007, "learning_rate": 6.517965728189124e-06, "loss": 0.7305, "step": 13696 }, { "epoch": 0.41979281598626944, "grad_norm": 1.4232955624449632, "learning_rate": 6.517492828199388e-06, "loss": 0.6264, "step": 13697 }, { "epoch": 0.41982346450901065, "grad_norm": 1.310350327986345, "learning_rate": 6.5170199132574075e-06, "loss": 0.6687, "step": 13698 }, { "epoch": 0.41985411303175185, "grad_norm": 1.1021944239005543, "learning_rate": 6.516546983367841e-06, "loss": 0.6563, "step": 13699 }, { "epoch": 0.41988476155449306, "grad_norm": 1.2805473556520117, "learning_rate": 6.516074038535351e-06, "loss": 0.6807, "step": 13700 }, { "epoch": 0.41991541007723426, "grad_norm": 1.1656700977776155, "learning_rate": 6.515601078764593e-06, "loss": 0.5922, "step": 13701 }, { "epoch": 0.41994605859997547, "grad_norm": 1.2293213648752934, "learning_rate": 6.5151281040602325e-06, "loss": 0.6441, "step": 13702 }, { "epoch": 0.4199767071227167, "grad_norm": 1.1588312003614618, "learning_rate": 6.514655114426924e-06, "loss": 0.689, "step": 13703 }, { "epoch": 0.4200073556454579, "grad_norm": 1.3574746714066024, "learning_rate": 6.514182109869333e-06, "loss": 0.674, "step": 13704 }, { "epoch": 0.4200380041681991, "grad_norm": 0.4889239548655642, "learning_rate": 6.513709090392118e-06, "loss": 0.4329, "step": 13705 }, { "epoch": 0.4200686526909403, "grad_norm": 1.3173400164961688, "learning_rate": 6.51323605599994e-06, "loss": 0.6351, "step": 13706 }, { "epoch": 0.4200993012136815, "grad_norm": 1.274920267430634, "learning_rate": 6.51276300669746e-06, "loss": 0.6185, "step": 13707 }, { "epoch": 0.4201299497364227, "grad_norm": 1.3462297774784562, "learning_rate": 6.512289942489339e-06, "loss": 0.7474, "step": 13708 }, { "epoch": 0.4201605982591639, "grad_norm": 1.1657475431214657, "learning_rate": 6.511816863380239e-06, "loss": 0.7049, "step": 13709 }, { "epoch": 0.4201912467819051, "grad_norm": 1.310932601557737, "learning_rate": 6.511343769374819e-06, "loss": 0.6635, "step": 13710 }, { "epoch": 0.4202218953046463, "grad_norm": 1.1813476577525357, "learning_rate": 6.510870660477744e-06, "loss": 0.7391, "step": 13711 }, { "epoch": 0.4202525438273875, "grad_norm": 1.1424994071525147, "learning_rate": 6.510397536693673e-06, "loss": 0.6288, "step": 13712 }, { "epoch": 0.42028319235012873, "grad_norm": 0.4725849692756547, "learning_rate": 6.5099243980272684e-06, "loss": 0.4345, "step": 13713 }, { "epoch": 0.42031384087286994, "grad_norm": 1.3936931409534317, "learning_rate": 6.5094512444831915e-06, "loss": 0.7027, "step": 13714 }, { "epoch": 0.42034448939561114, "grad_norm": 1.1856277507436113, "learning_rate": 6.508978076066107e-06, "loss": 0.6168, "step": 13715 }, { "epoch": 0.42037513791835235, "grad_norm": 0.45535957873361366, "learning_rate": 6.508504892780675e-06, "loss": 0.4139, "step": 13716 }, { "epoch": 0.42040578644109355, "grad_norm": 1.2105419919080858, "learning_rate": 6.508031694631558e-06, "loss": 0.6732, "step": 13717 }, { "epoch": 0.42043643496383476, "grad_norm": 1.1777236592502023, "learning_rate": 6.507558481623419e-06, "loss": 0.574, "step": 13718 }, { "epoch": 0.42046708348657597, "grad_norm": 1.1512115133190455, "learning_rate": 6.50708525376092e-06, "loss": 0.593, "step": 13719 }, { "epoch": 0.42049773200931717, "grad_norm": 1.203481410209491, "learning_rate": 6.506612011048725e-06, "loss": 0.765, "step": 13720 }, { "epoch": 0.4205283805320584, "grad_norm": 0.47285869892639953, "learning_rate": 6.506138753491496e-06, "loss": 0.4446, "step": 13721 }, { "epoch": 0.4205590290547996, "grad_norm": 1.172614310531181, "learning_rate": 6.505665481093897e-06, "loss": 0.6773, "step": 13722 }, { "epoch": 0.4205896775775408, "grad_norm": 1.120339757862198, "learning_rate": 6.50519219386059e-06, "loss": 0.5747, "step": 13723 }, { "epoch": 0.420620326100282, "grad_norm": 0.4744900288005147, "learning_rate": 6.50471889179624e-06, "loss": 0.4299, "step": 13724 }, { "epoch": 0.42065097462302314, "grad_norm": 1.2849019979280776, "learning_rate": 6.5042455749055086e-06, "loss": 0.7104, "step": 13725 }, { "epoch": 0.42068162314576435, "grad_norm": 0.45308197039033243, "learning_rate": 6.503772243193061e-06, "loss": 0.4372, "step": 13726 }, { "epoch": 0.42071227166850556, "grad_norm": 1.3199601795345517, "learning_rate": 6.5032988966635625e-06, "loss": 0.6489, "step": 13727 }, { "epoch": 0.42074292019124676, "grad_norm": 1.3736813737691307, "learning_rate": 6.502825535321674e-06, "loss": 0.6649, "step": 13728 }, { "epoch": 0.42077356871398797, "grad_norm": 1.1799046570153775, "learning_rate": 6.502352159172061e-06, "loss": 0.6539, "step": 13729 }, { "epoch": 0.42080421723672917, "grad_norm": 1.2746675269335244, "learning_rate": 6.501878768219387e-06, "loss": 0.6458, "step": 13730 }, { "epoch": 0.4208348657594704, "grad_norm": 1.172869462639001, "learning_rate": 6.501405362468319e-06, "loss": 0.5717, "step": 13731 }, { "epoch": 0.4208655142822116, "grad_norm": 1.2411829492852757, "learning_rate": 6.500931941923519e-06, "loss": 0.7258, "step": 13732 }, { "epoch": 0.4208961628049528, "grad_norm": 0.4844912645116016, "learning_rate": 6.500458506589652e-06, "loss": 0.4055, "step": 13733 }, { "epoch": 0.420926811327694, "grad_norm": 1.277457878181923, "learning_rate": 6.499985056471384e-06, "loss": 0.7146, "step": 13734 }, { "epoch": 0.4209574598504352, "grad_norm": 1.3094740939116718, "learning_rate": 6.49951159157338e-06, "loss": 0.6462, "step": 13735 }, { "epoch": 0.4209881083731764, "grad_norm": 1.2822545841434074, "learning_rate": 6.499038111900302e-06, "loss": 0.6226, "step": 13736 }, { "epoch": 0.4210187568959176, "grad_norm": 0.48080660257043456, "learning_rate": 6.498564617456821e-06, "loss": 0.423, "step": 13737 }, { "epoch": 0.4210494054186588, "grad_norm": 1.3797859766780303, "learning_rate": 6.498091108247597e-06, "loss": 0.6998, "step": 13738 }, { "epoch": 0.4210800539414, "grad_norm": 1.1226767767267762, "learning_rate": 6.4976175842773005e-06, "loss": 0.5843, "step": 13739 }, { "epoch": 0.42111070246414123, "grad_norm": 1.2849611788159092, "learning_rate": 6.497144045550593e-06, "loss": 0.6242, "step": 13740 }, { "epoch": 0.42114135098688243, "grad_norm": 1.2157876163258667, "learning_rate": 6.496670492072144e-06, "loss": 0.6317, "step": 13741 }, { "epoch": 0.42117199950962364, "grad_norm": 0.47323923133878804, "learning_rate": 6.496196923846615e-06, "loss": 0.4326, "step": 13742 }, { "epoch": 0.42120264803236485, "grad_norm": 1.3029246776580798, "learning_rate": 6.495723340878677e-06, "loss": 0.6591, "step": 13743 }, { "epoch": 0.42123329655510605, "grad_norm": 1.480464652355484, "learning_rate": 6.4952497431729936e-06, "loss": 0.6505, "step": 13744 }, { "epoch": 0.42126394507784726, "grad_norm": 1.4421967882243067, "learning_rate": 6.4947761307342315e-06, "loss": 0.7084, "step": 13745 }, { "epoch": 0.42129459360058846, "grad_norm": 1.2419745415823125, "learning_rate": 6.494302503567057e-06, "loss": 0.667, "step": 13746 }, { "epoch": 0.42132524212332967, "grad_norm": 1.3261648037557034, "learning_rate": 6.493828861676139e-06, "loss": 0.6907, "step": 13747 }, { "epoch": 0.4213558906460709, "grad_norm": 1.121270244488122, "learning_rate": 6.493355205066143e-06, "loss": 0.6722, "step": 13748 }, { "epoch": 0.4213865391688121, "grad_norm": 1.0119905934016349, "learning_rate": 6.492881533741735e-06, "loss": 0.5898, "step": 13749 }, { "epoch": 0.4214171876915533, "grad_norm": 1.2301161452643514, "learning_rate": 6.492407847707584e-06, "loss": 0.6524, "step": 13750 }, { "epoch": 0.4214478362142945, "grad_norm": 1.281213878804518, "learning_rate": 6.491934146968357e-06, "loss": 0.7287, "step": 13751 }, { "epoch": 0.4214784847370357, "grad_norm": 1.0880774543392557, "learning_rate": 6.491460431528721e-06, "loss": 0.5527, "step": 13752 }, { "epoch": 0.4215091332597769, "grad_norm": 1.1149673321517584, "learning_rate": 6.490986701393343e-06, "loss": 0.6758, "step": 13753 }, { "epoch": 0.4215397817825181, "grad_norm": 1.1261616866775053, "learning_rate": 6.490512956566894e-06, "loss": 0.6192, "step": 13754 }, { "epoch": 0.4215704303052593, "grad_norm": 1.200945536967678, "learning_rate": 6.490039197054037e-06, "loss": 0.7047, "step": 13755 }, { "epoch": 0.42160107882800046, "grad_norm": 1.1193741781036246, "learning_rate": 6.489565422859443e-06, "loss": 0.6222, "step": 13756 }, { "epoch": 0.42163172735074167, "grad_norm": 1.1005558073462993, "learning_rate": 6.489091633987778e-06, "loss": 0.6917, "step": 13757 }, { "epoch": 0.4216623758734829, "grad_norm": 1.4205898788568008, "learning_rate": 6.488617830443715e-06, "loss": 0.6287, "step": 13758 }, { "epoch": 0.4216930243962241, "grad_norm": 1.2717112946720135, "learning_rate": 6.488144012231918e-06, "loss": 0.626, "step": 13759 }, { "epoch": 0.4217236729189653, "grad_norm": 0.5100873042441104, "learning_rate": 6.487670179357058e-06, "loss": 0.433, "step": 13760 }, { "epoch": 0.4217543214417065, "grad_norm": 1.126941663374864, "learning_rate": 6.487196331823803e-06, "loss": 0.5055, "step": 13761 }, { "epoch": 0.4217849699644477, "grad_norm": 1.2957430137785075, "learning_rate": 6.486722469636822e-06, "loss": 0.6763, "step": 13762 }, { "epoch": 0.4218156184871889, "grad_norm": 0.445755130601517, "learning_rate": 6.486248592800785e-06, "loss": 0.4088, "step": 13763 }, { "epoch": 0.4218462670099301, "grad_norm": 1.298637720683187, "learning_rate": 6.485774701320358e-06, "loss": 0.6276, "step": 13764 }, { "epoch": 0.4218769155326713, "grad_norm": 1.3754559569824365, "learning_rate": 6.485300795200215e-06, "loss": 0.6733, "step": 13765 }, { "epoch": 0.4219075640554125, "grad_norm": 1.2876968573304695, "learning_rate": 6.484826874445023e-06, "loss": 0.6412, "step": 13766 }, { "epoch": 0.4219382125781537, "grad_norm": 1.237056923385971, "learning_rate": 6.4843529390594505e-06, "loss": 0.7046, "step": 13767 }, { "epoch": 0.42196886110089493, "grad_norm": 1.3057944141107456, "learning_rate": 6.483878989048169e-06, "loss": 0.7127, "step": 13768 }, { "epoch": 0.42199950962363614, "grad_norm": 1.2052871637187557, "learning_rate": 6.48340502441585e-06, "loss": 0.6403, "step": 13769 }, { "epoch": 0.42203015814637734, "grad_norm": 0.4707423982064199, "learning_rate": 6.48293104516716e-06, "loss": 0.4326, "step": 13770 }, { "epoch": 0.42206080666911855, "grad_norm": 1.1543684266390115, "learning_rate": 6.482457051306772e-06, "loss": 0.6136, "step": 13771 }, { "epoch": 0.42209145519185975, "grad_norm": 0.46141209553773677, "learning_rate": 6.481983042839354e-06, "loss": 0.4229, "step": 13772 }, { "epoch": 0.42212210371460096, "grad_norm": 1.3604097565537054, "learning_rate": 6.481509019769579e-06, "loss": 0.6743, "step": 13773 }, { "epoch": 0.42215275223734217, "grad_norm": 1.221539984427917, "learning_rate": 6.481034982102116e-06, "loss": 0.7074, "step": 13774 }, { "epoch": 0.42218340076008337, "grad_norm": 1.2590971840750027, "learning_rate": 6.480560929841636e-06, "loss": 0.6657, "step": 13775 }, { "epoch": 0.4222140492828246, "grad_norm": 0.44199786339421476, "learning_rate": 6.4800868629928116e-06, "loss": 0.4311, "step": 13776 }, { "epoch": 0.4222446978055658, "grad_norm": 1.3076584276952588, "learning_rate": 6.479612781560312e-06, "loss": 0.6676, "step": 13777 }, { "epoch": 0.422275346328307, "grad_norm": 1.3077748836861762, "learning_rate": 6.4791386855488096e-06, "loss": 0.5956, "step": 13778 }, { "epoch": 0.4223059948510482, "grad_norm": 1.2076703554368153, "learning_rate": 6.478664574962974e-06, "loss": 0.62, "step": 13779 }, { "epoch": 0.4223366433737894, "grad_norm": 1.401916242560098, "learning_rate": 6.478190449807479e-06, "loss": 0.6919, "step": 13780 }, { "epoch": 0.4223672918965306, "grad_norm": 1.2951873445843547, "learning_rate": 6.4777163100869944e-06, "loss": 0.6931, "step": 13781 }, { "epoch": 0.4223979404192718, "grad_norm": 1.2704735562466674, "learning_rate": 6.477242155806195e-06, "loss": 0.7577, "step": 13782 }, { "epoch": 0.422428588942013, "grad_norm": 1.3638186436284112, "learning_rate": 6.476767986969748e-06, "loss": 0.7065, "step": 13783 }, { "epoch": 0.4224592374647542, "grad_norm": 1.175533807865584, "learning_rate": 6.47629380358233e-06, "loss": 0.7297, "step": 13784 }, { "epoch": 0.42248988598749543, "grad_norm": 1.1180825342985516, "learning_rate": 6.475819605648611e-06, "loss": 0.5888, "step": 13785 }, { "epoch": 0.42252053451023663, "grad_norm": 1.4291864929808973, "learning_rate": 6.4753453931732634e-06, "loss": 0.7482, "step": 13786 }, { "epoch": 0.4225511830329778, "grad_norm": 1.3266013108991195, "learning_rate": 6.474871166160959e-06, "loss": 0.6099, "step": 13787 }, { "epoch": 0.422581831555719, "grad_norm": 0.4815038366375341, "learning_rate": 6.474396924616374e-06, "loss": 0.3959, "step": 13788 }, { "epoch": 0.4226124800784602, "grad_norm": 1.509708099082744, "learning_rate": 6.473922668544179e-06, "loss": 0.6269, "step": 13789 }, { "epoch": 0.4226431286012014, "grad_norm": 0.4737432822850048, "learning_rate": 6.473448397949045e-06, "loss": 0.4359, "step": 13790 }, { "epoch": 0.4226737771239426, "grad_norm": 1.2977627666579572, "learning_rate": 6.472974112835647e-06, "loss": 0.7066, "step": 13791 }, { "epoch": 0.4227044256466838, "grad_norm": 1.1626473778088848, "learning_rate": 6.472499813208659e-06, "loss": 0.6843, "step": 13792 }, { "epoch": 0.422735074169425, "grad_norm": 1.2682952514616694, "learning_rate": 6.472025499072754e-06, "loss": 0.7471, "step": 13793 }, { "epoch": 0.4227657226921662, "grad_norm": 1.3257440321296063, "learning_rate": 6.471551170432604e-06, "loss": 0.6656, "step": 13794 }, { "epoch": 0.42279637121490743, "grad_norm": 0.47815110643126907, "learning_rate": 6.471076827292885e-06, "loss": 0.444, "step": 13795 }, { "epoch": 0.42282701973764864, "grad_norm": 1.1208910588510523, "learning_rate": 6.470602469658268e-06, "loss": 0.55, "step": 13796 }, { "epoch": 0.42285766826038984, "grad_norm": 1.1909731444043747, "learning_rate": 6.4701280975334316e-06, "loss": 0.6321, "step": 13797 }, { "epoch": 0.42288831678313105, "grad_norm": 1.1965103017955914, "learning_rate": 6.469653710923044e-06, "loss": 0.5853, "step": 13798 }, { "epoch": 0.42291896530587225, "grad_norm": 1.3196006354639758, "learning_rate": 6.469179309831783e-06, "loss": 0.6624, "step": 13799 }, { "epoch": 0.42294961382861346, "grad_norm": 1.248167152423583, "learning_rate": 6.468704894264324e-06, "loss": 0.6326, "step": 13800 }, { "epoch": 0.42298026235135466, "grad_norm": 1.311167664376389, "learning_rate": 6.468230464225337e-06, "loss": 0.6656, "step": 13801 }, { "epoch": 0.42301091087409587, "grad_norm": 0.47854318261814544, "learning_rate": 6.467756019719501e-06, "loss": 0.4343, "step": 13802 }, { "epoch": 0.4230415593968371, "grad_norm": 1.1603815257788537, "learning_rate": 6.467281560751489e-06, "loss": 0.6128, "step": 13803 }, { "epoch": 0.4230722079195783, "grad_norm": 1.2167994449131991, "learning_rate": 6.466807087325978e-06, "loss": 0.7676, "step": 13804 }, { "epoch": 0.4231028564423195, "grad_norm": 1.4485466499303425, "learning_rate": 6.46633259944764e-06, "loss": 0.6924, "step": 13805 }, { "epoch": 0.4231335049650607, "grad_norm": 1.220621801582019, "learning_rate": 6.465858097121151e-06, "loss": 0.7337, "step": 13806 }, { "epoch": 0.4231641534878019, "grad_norm": 1.3547217718213285, "learning_rate": 6.4653835803511884e-06, "loss": 0.6559, "step": 13807 }, { "epoch": 0.4231948020105431, "grad_norm": 0.46104672225538507, "learning_rate": 6.464909049142427e-06, "loss": 0.4272, "step": 13808 }, { "epoch": 0.4232254505332843, "grad_norm": 1.2936169992662723, "learning_rate": 6.464434503499542e-06, "loss": 0.7208, "step": 13809 }, { "epoch": 0.4232560990560255, "grad_norm": 1.2171196318056907, "learning_rate": 6.463959943427207e-06, "loss": 0.7195, "step": 13810 }, { "epoch": 0.4232867475787667, "grad_norm": 0.49060401030454354, "learning_rate": 6.463485368930102e-06, "loss": 0.4288, "step": 13811 }, { "epoch": 0.4233173961015079, "grad_norm": 1.0343306884774728, "learning_rate": 6.4630107800129015e-06, "loss": 0.5579, "step": 13812 }, { "epoch": 0.42334804462424913, "grad_norm": 1.2390758143259963, "learning_rate": 6.46253617668028e-06, "loss": 0.6414, "step": 13813 }, { "epoch": 0.42337869314699034, "grad_norm": 1.271234293984596, "learning_rate": 6.462061558936916e-06, "loss": 0.6302, "step": 13814 }, { "epoch": 0.42340934166973154, "grad_norm": 0.43684405516937314, "learning_rate": 6.461586926787484e-06, "loss": 0.4188, "step": 13815 }, { "epoch": 0.42343999019247275, "grad_norm": 1.36583458200946, "learning_rate": 6.461112280236663e-06, "loss": 0.6479, "step": 13816 }, { "epoch": 0.42347063871521395, "grad_norm": 0.475805166432512, "learning_rate": 6.460637619289129e-06, "loss": 0.4139, "step": 13817 }, { "epoch": 0.4235012872379551, "grad_norm": 1.2564554078868222, "learning_rate": 6.46016294394956e-06, "loss": 0.5599, "step": 13818 }, { "epoch": 0.4235319357606963, "grad_norm": 1.21033622054947, "learning_rate": 6.45968825422263e-06, "loss": 0.6498, "step": 13819 }, { "epoch": 0.4235625842834375, "grad_norm": 1.2794590651542406, "learning_rate": 6.459213550113019e-06, "loss": 0.6792, "step": 13820 }, { "epoch": 0.4235932328061787, "grad_norm": 1.0980846852830102, "learning_rate": 6.4587388316254055e-06, "loss": 0.6792, "step": 13821 }, { "epoch": 0.4236238813289199, "grad_norm": 1.1557135203535627, "learning_rate": 6.458264098764462e-06, "loss": 0.6393, "step": 13822 }, { "epoch": 0.42365452985166113, "grad_norm": 1.2708861532781572, "learning_rate": 6.457789351534871e-06, "loss": 0.5912, "step": 13823 }, { "epoch": 0.42368517837440234, "grad_norm": 1.3711548547900019, "learning_rate": 6.457314589941308e-06, "loss": 0.6914, "step": 13824 }, { "epoch": 0.42371582689714354, "grad_norm": 1.1394688564422994, "learning_rate": 6.456839813988451e-06, "loss": 0.5443, "step": 13825 }, { "epoch": 0.42374647541988475, "grad_norm": 1.3411932030587552, "learning_rate": 6.4563650236809785e-06, "loss": 0.5748, "step": 13826 }, { "epoch": 0.42377712394262596, "grad_norm": 1.1638226357687902, "learning_rate": 6.455890219023569e-06, "loss": 0.5815, "step": 13827 }, { "epoch": 0.42380777246536716, "grad_norm": 1.120053554025672, "learning_rate": 6.455415400020901e-06, "loss": 0.6056, "step": 13828 }, { "epoch": 0.42383842098810837, "grad_norm": 1.1730964574465002, "learning_rate": 6.454940566677652e-06, "loss": 0.6721, "step": 13829 }, { "epoch": 0.42386906951084957, "grad_norm": 1.2024520559106369, "learning_rate": 6.454465718998503e-06, "loss": 0.6406, "step": 13830 }, { "epoch": 0.4238997180335908, "grad_norm": 1.2405817716768606, "learning_rate": 6.45399085698813e-06, "loss": 0.703, "step": 13831 }, { "epoch": 0.423930366556332, "grad_norm": 0.4852952276836119, "learning_rate": 6.453515980651213e-06, "loss": 0.4378, "step": 13832 }, { "epoch": 0.4239610150790732, "grad_norm": 1.3050226113726497, "learning_rate": 6.453041089992431e-06, "loss": 0.6614, "step": 13833 }, { "epoch": 0.4239916636018144, "grad_norm": 1.1080031736984586, "learning_rate": 6.452566185016464e-06, "loss": 0.6573, "step": 13834 }, { "epoch": 0.4240223121245556, "grad_norm": 1.2029982944718267, "learning_rate": 6.452091265727991e-06, "loss": 0.6086, "step": 13835 }, { "epoch": 0.4240529606472968, "grad_norm": 1.2443189500843472, "learning_rate": 6.4516163321316905e-06, "loss": 0.6153, "step": 13836 }, { "epoch": 0.424083609170038, "grad_norm": 0.5129159481897966, "learning_rate": 6.451141384232242e-06, "loss": 0.4436, "step": 13837 }, { "epoch": 0.4241142576927792, "grad_norm": 2.1468656089505735, "learning_rate": 6.450666422034327e-06, "loss": 0.6059, "step": 13838 }, { "epoch": 0.4241449062155204, "grad_norm": 1.102613485280249, "learning_rate": 6.450191445542625e-06, "loss": 0.5237, "step": 13839 }, { "epoch": 0.42417555473826163, "grad_norm": 1.224703063683997, "learning_rate": 6.449716454761816e-06, "loss": 0.6741, "step": 13840 }, { "epoch": 0.42420620326100283, "grad_norm": 1.1056701246017417, "learning_rate": 6.449241449696579e-06, "loss": 0.6834, "step": 13841 }, { "epoch": 0.42423685178374404, "grad_norm": 1.1591228109551248, "learning_rate": 6.448766430351595e-06, "loss": 0.6348, "step": 13842 }, { "epoch": 0.42426750030648525, "grad_norm": 0.48052323335864516, "learning_rate": 6.448291396731545e-06, "loss": 0.4365, "step": 13843 }, { "epoch": 0.42429814882922645, "grad_norm": 1.323702072967529, "learning_rate": 6.4478163488411096e-06, "loss": 0.674, "step": 13844 }, { "epoch": 0.42432879735196766, "grad_norm": 1.2563263906184066, "learning_rate": 6.447341286684969e-06, "loss": 0.5835, "step": 13845 }, { "epoch": 0.42435944587470886, "grad_norm": 1.1623989163515709, "learning_rate": 6.446866210267804e-06, "loss": 0.6243, "step": 13846 }, { "epoch": 0.42439009439745007, "grad_norm": 1.032254965940771, "learning_rate": 6.446391119594297e-06, "loss": 0.5606, "step": 13847 }, { "epoch": 0.4244207429201913, "grad_norm": 1.3287876748011302, "learning_rate": 6.445916014669127e-06, "loss": 0.6921, "step": 13848 }, { "epoch": 0.4244513914429324, "grad_norm": 1.2301381007110979, "learning_rate": 6.445440895496977e-06, "loss": 0.5691, "step": 13849 }, { "epoch": 0.42448203996567363, "grad_norm": 1.1769920406223051, "learning_rate": 6.4449657620825275e-06, "loss": 0.6673, "step": 13850 }, { "epoch": 0.42451268848841484, "grad_norm": 1.1793721021140982, "learning_rate": 6.444490614430463e-06, "loss": 0.6168, "step": 13851 }, { "epoch": 0.42454333701115604, "grad_norm": 1.229157774898062, "learning_rate": 6.44401545254546e-06, "loss": 0.6446, "step": 13852 }, { "epoch": 0.42457398553389725, "grad_norm": 1.2408416309025094, "learning_rate": 6.4435402764322056e-06, "loss": 0.6381, "step": 13853 }, { "epoch": 0.42460463405663845, "grad_norm": 1.1587122543742154, "learning_rate": 6.443065086095379e-06, "loss": 0.6838, "step": 13854 }, { "epoch": 0.42463528257937966, "grad_norm": 1.2127320990337898, "learning_rate": 6.442589881539662e-06, "loss": 0.7974, "step": 13855 }, { "epoch": 0.42466593110212086, "grad_norm": 1.2668074801983602, "learning_rate": 6.4421146627697375e-06, "loss": 0.6816, "step": 13856 }, { "epoch": 0.42469657962486207, "grad_norm": 0.4956009745755281, "learning_rate": 6.4416394297902894e-06, "loss": 0.4214, "step": 13857 }, { "epoch": 0.4247272281476033, "grad_norm": 1.2845601848400032, "learning_rate": 6.441164182605999e-06, "loss": 0.6453, "step": 13858 }, { "epoch": 0.4247578766703445, "grad_norm": 1.2636420992041568, "learning_rate": 6.440688921221547e-06, "loss": 0.6045, "step": 13859 }, { "epoch": 0.4247885251930857, "grad_norm": 1.2489815960310477, "learning_rate": 6.440213645641621e-06, "loss": 0.6669, "step": 13860 }, { "epoch": 0.4248191737158269, "grad_norm": 1.069797824832093, "learning_rate": 6.4397383558709005e-06, "loss": 0.607, "step": 13861 }, { "epoch": 0.4248498222385681, "grad_norm": 1.1436862351737582, "learning_rate": 6.439263051914071e-06, "loss": 0.6643, "step": 13862 }, { "epoch": 0.4248804707613093, "grad_norm": 0.4838916835738966, "learning_rate": 6.438787733775812e-06, "loss": 0.4191, "step": 13863 }, { "epoch": 0.4249111192840505, "grad_norm": 1.152809963672909, "learning_rate": 6.438312401460812e-06, "loss": 0.5593, "step": 13864 }, { "epoch": 0.4249417678067917, "grad_norm": 1.4493517182279703, "learning_rate": 6.437837054973748e-06, "loss": 0.6463, "step": 13865 }, { "epoch": 0.4249724163295329, "grad_norm": 1.1792331950100416, "learning_rate": 6.437361694319312e-06, "loss": 0.5916, "step": 13866 }, { "epoch": 0.4250030648522741, "grad_norm": 0.4295656808491389, "learning_rate": 6.436886319502181e-06, "loss": 0.4081, "step": 13867 }, { "epoch": 0.42503371337501533, "grad_norm": 1.4259594945118153, "learning_rate": 6.436410930527042e-06, "loss": 0.7802, "step": 13868 }, { "epoch": 0.42506436189775654, "grad_norm": 1.3472416344999827, "learning_rate": 6.435935527398578e-06, "loss": 0.6685, "step": 13869 }, { "epoch": 0.42509501042049774, "grad_norm": 1.2061408197336871, "learning_rate": 6.435460110121474e-06, "loss": 0.7661, "step": 13870 }, { "epoch": 0.42512565894323895, "grad_norm": 1.218512677198847, "learning_rate": 6.434984678700416e-06, "loss": 0.6855, "step": 13871 }, { "epoch": 0.42515630746598015, "grad_norm": 0.44243131661396623, "learning_rate": 6.434509233140084e-06, "loss": 0.4187, "step": 13872 }, { "epoch": 0.42518695598872136, "grad_norm": 1.0469329286819782, "learning_rate": 6.434033773445168e-06, "loss": 0.6198, "step": 13873 }, { "epoch": 0.42521760451146257, "grad_norm": 1.269998263471269, "learning_rate": 6.4335582996203484e-06, "loss": 0.7284, "step": 13874 }, { "epoch": 0.42524825303420377, "grad_norm": 1.2736355908116639, "learning_rate": 6.433082811670314e-06, "loss": 0.7182, "step": 13875 }, { "epoch": 0.425278901556945, "grad_norm": 1.2594744833059324, "learning_rate": 6.432607309599745e-06, "loss": 0.6641, "step": 13876 }, { "epoch": 0.4253095500796862, "grad_norm": 1.2128317510023001, "learning_rate": 6.432131793413333e-06, "loss": 0.638, "step": 13877 }, { "epoch": 0.4253401986024274, "grad_norm": 1.168341277128171, "learning_rate": 6.431656263115757e-06, "loss": 0.6194, "step": 13878 }, { "epoch": 0.4253708471251686, "grad_norm": 1.2958778735866496, "learning_rate": 6.4311807187117085e-06, "loss": 0.6503, "step": 13879 }, { "epoch": 0.42540149564790974, "grad_norm": 1.2667407854993373, "learning_rate": 6.430705160205868e-06, "loss": 0.7074, "step": 13880 }, { "epoch": 0.42543214417065095, "grad_norm": 1.2087298645273192, "learning_rate": 6.4302295876029245e-06, "loss": 0.6275, "step": 13881 }, { "epoch": 0.42546279269339216, "grad_norm": 0.4764681225492247, "learning_rate": 6.4297540009075634e-06, "loss": 0.4235, "step": 13882 }, { "epoch": 0.42549344121613336, "grad_norm": 1.283094827626004, "learning_rate": 6.429278400124469e-06, "loss": 0.6381, "step": 13883 }, { "epoch": 0.42552408973887457, "grad_norm": 1.255090959640242, "learning_rate": 6.428802785258329e-06, "loss": 0.6598, "step": 13884 }, { "epoch": 0.4255547382616158, "grad_norm": 1.1560670324476705, "learning_rate": 6.4283271563138305e-06, "loss": 0.6854, "step": 13885 }, { "epoch": 0.425585386784357, "grad_norm": 1.224619403632341, "learning_rate": 6.427851513295659e-06, "loss": 0.6866, "step": 13886 }, { "epoch": 0.4256160353070982, "grad_norm": 1.182502381447603, "learning_rate": 6.4273758562085e-06, "loss": 0.6887, "step": 13887 }, { "epoch": 0.4256466838298394, "grad_norm": 0.4761104748784991, "learning_rate": 6.426900185057042e-06, "loss": 0.4287, "step": 13888 }, { "epoch": 0.4256773323525806, "grad_norm": 1.2881820007552236, "learning_rate": 6.4264244998459725e-06, "loss": 0.6503, "step": 13889 }, { "epoch": 0.4257079808753218, "grad_norm": 0.43830574808631534, "learning_rate": 6.425948800579977e-06, "loss": 0.4129, "step": 13890 }, { "epoch": 0.425738629398063, "grad_norm": 1.2995934998918897, "learning_rate": 6.4254730872637415e-06, "loss": 0.6918, "step": 13891 }, { "epoch": 0.4257692779208042, "grad_norm": 1.1959374824053244, "learning_rate": 6.424997359901957e-06, "loss": 0.5933, "step": 13892 }, { "epoch": 0.4257999264435454, "grad_norm": 1.112652395179053, "learning_rate": 6.4245216184993085e-06, "loss": 0.6264, "step": 13893 }, { "epoch": 0.4258305749662866, "grad_norm": 0.4667513670589578, "learning_rate": 6.424045863060484e-06, "loss": 0.4472, "step": 13894 }, { "epoch": 0.42586122348902783, "grad_norm": 1.3500411939807921, "learning_rate": 6.42357009359017e-06, "loss": 0.6773, "step": 13895 }, { "epoch": 0.42589187201176903, "grad_norm": 1.3862180152035999, "learning_rate": 6.423094310093056e-06, "loss": 0.5793, "step": 13896 }, { "epoch": 0.42592252053451024, "grad_norm": 1.4966033142151185, "learning_rate": 6.4226185125738305e-06, "loss": 0.7236, "step": 13897 }, { "epoch": 0.42595316905725145, "grad_norm": 1.1017057180281153, "learning_rate": 6.422142701037179e-06, "loss": 0.6525, "step": 13898 }, { "epoch": 0.42598381757999265, "grad_norm": 1.051638323649829, "learning_rate": 6.4216668754877945e-06, "loss": 0.5749, "step": 13899 }, { "epoch": 0.42601446610273386, "grad_norm": 1.3512539540324044, "learning_rate": 6.42119103593036e-06, "loss": 0.6948, "step": 13900 }, { "epoch": 0.42604511462547506, "grad_norm": 0.44383381376925496, "learning_rate": 6.420715182369569e-06, "loss": 0.4101, "step": 13901 }, { "epoch": 0.42607576314821627, "grad_norm": 1.2779237115736128, "learning_rate": 6.420239314810106e-06, "loss": 0.7404, "step": 13902 }, { "epoch": 0.4261064116709575, "grad_norm": 1.2261216254392846, "learning_rate": 6.419763433256663e-06, "loss": 0.6204, "step": 13903 }, { "epoch": 0.4261370601936987, "grad_norm": 1.0572729678790787, "learning_rate": 6.4192875377139265e-06, "loss": 0.5641, "step": 13904 }, { "epoch": 0.4261677087164399, "grad_norm": 1.1343837033754047, "learning_rate": 6.4188116281865875e-06, "loss": 0.588, "step": 13905 }, { "epoch": 0.4261983572391811, "grad_norm": 1.0967712314888676, "learning_rate": 6.418335704679332e-06, "loss": 0.6451, "step": 13906 }, { "epoch": 0.4262290057619223, "grad_norm": 1.2966651453328482, "learning_rate": 6.417859767196855e-06, "loss": 0.6814, "step": 13907 }, { "epoch": 0.4262596542846635, "grad_norm": 0.45222516151442205, "learning_rate": 6.4173838157438415e-06, "loss": 0.4126, "step": 13908 }, { "epoch": 0.4262903028074047, "grad_norm": 1.2804951425419535, "learning_rate": 6.4169078503249835e-06, "loss": 0.5547, "step": 13909 }, { "epoch": 0.4263209513301459, "grad_norm": 1.2917165107164217, "learning_rate": 6.416431870944969e-06, "loss": 0.6169, "step": 13910 }, { "epoch": 0.42635159985288706, "grad_norm": 1.2691013901178076, "learning_rate": 6.41595587760849e-06, "loss": 0.7109, "step": 13911 }, { "epoch": 0.42638224837562827, "grad_norm": 0.42829797933633057, "learning_rate": 6.415479870320233e-06, "loss": 0.422, "step": 13912 }, { "epoch": 0.4264128968983695, "grad_norm": 1.3931218863625192, "learning_rate": 6.415003849084893e-06, "loss": 0.655, "step": 13913 }, { "epoch": 0.4264435454211107, "grad_norm": 1.1958193271838775, "learning_rate": 6.414527813907158e-06, "loss": 0.5653, "step": 13914 }, { "epoch": 0.4264741939438519, "grad_norm": 1.228426536271321, "learning_rate": 6.414051764791717e-06, "loss": 0.6935, "step": 13915 }, { "epoch": 0.4265048424665931, "grad_norm": 1.2494897685947954, "learning_rate": 6.413575701743264e-06, "loss": 0.7085, "step": 13916 }, { "epoch": 0.4265354909893343, "grad_norm": 1.2474608636113365, "learning_rate": 6.413099624766487e-06, "loss": 0.6747, "step": 13917 }, { "epoch": 0.4265661395120755, "grad_norm": 1.2425329793097453, "learning_rate": 6.4126235338660784e-06, "loss": 0.6557, "step": 13918 }, { "epoch": 0.4265967880348167, "grad_norm": 1.3111899733405394, "learning_rate": 6.4121474290467266e-06, "loss": 0.6696, "step": 13919 }, { "epoch": 0.4266274365575579, "grad_norm": 1.2938258347765041, "learning_rate": 6.411671310313128e-06, "loss": 0.6942, "step": 13920 }, { "epoch": 0.4266580850802991, "grad_norm": 1.1659319832732526, "learning_rate": 6.411195177669968e-06, "loss": 0.6094, "step": 13921 }, { "epoch": 0.4266887336030403, "grad_norm": 1.1714827411354265, "learning_rate": 6.410719031121943e-06, "loss": 0.5853, "step": 13922 }, { "epoch": 0.42671938212578153, "grad_norm": 1.2408192202896973, "learning_rate": 6.410242870673739e-06, "loss": 0.6064, "step": 13923 }, { "epoch": 0.42675003064852274, "grad_norm": 1.3875139693429808, "learning_rate": 6.409766696330055e-06, "loss": 0.7022, "step": 13924 }, { "epoch": 0.42678067917126394, "grad_norm": 1.2885035328715497, "learning_rate": 6.409290508095578e-06, "loss": 0.6217, "step": 13925 }, { "epoch": 0.42681132769400515, "grad_norm": 1.1055090454814709, "learning_rate": 6.408814305974999e-06, "loss": 0.6176, "step": 13926 }, { "epoch": 0.42684197621674635, "grad_norm": 1.2531984776526968, "learning_rate": 6.408338089973015e-06, "loss": 0.6303, "step": 13927 }, { "epoch": 0.42687262473948756, "grad_norm": 1.300642958083829, "learning_rate": 6.407861860094314e-06, "loss": 0.6129, "step": 13928 }, { "epoch": 0.42690327326222877, "grad_norm": 1.1604599924949965, "learning_rate": 6.407385616343591e-06, "loss": 0.6504, "step": 13929 }, { "epoch": 0.42693392178496997, "grad_norm": 1.3782305276740354, "learning_rate": 6.406909358725536e-06, "loss": 0.6188, "step": 13930 }, { "epoch": 0.4269645703077112, "grad_norm": 1.2916789314572554, "learning_rate": 6.4064330872448455e-06, "loss": 0.6936, "step": 13931 }, { "epoch": 0.4269952188304524, "grad_norm": 1.127494007613491, "learning_rate": 6.405956801906207e-06, "loss": 0.7082, "step": 13932 }, { "epoch": 0.4270258673531936, "grad_norm": 1.199045553463047, "learning_rate": 6.405480502714319e-06, "loss": 0.6593, "step": 13933 }, { "epoch": 0.4270565158759348, "grad_norm": 1.1483229830126915, "learning_rate": 6.405004189673869e-06, "loss": 0.6366, "step": 13934 }, { "epoch": 0.427087164398676, "grad_norm": 1.351645243154628, "learning_rate": 6.404527862789556e-06, "loss": 0.675, "step": 13935 }, { "epoch": 0.4271178129214172, "grad_norm": 1.252045798373491, "learning_rate": 6.404051522066068e-06, "loss": 0.636, "step": 13936 }, { "epoch": 0.4271484614441584, "grad_norm": 1.367476573739259, "learning_rate": 6.403575167508104e-06, "loss": 0.6315, "step": 13937 }, { "epoch": 0.4271791099668996, "grad_norm": 1.3629592364651009, "learning_rate": 6.403098799120352e-06, "loss": 0.7233, "step": 13938 }, { "epoch": 0.4272097584896408, "grad_norm": 1.4861971142863608, "learning_rate": 6.402622416907511e-06, "loss": 0.665, "step": 13939 }, { "epoch": 0.42724040701238203, "grad_norm": 0.5092385320465509, "learning_rate": 6.4021460208742716e-06, "loss": 0.4132, "step": 13940 }, { "epoch": 0.42727105553512323, "grad_norm": 1.4854240431614874, "learning_rate": 6.401669611025327e-06, "loss": 0.6329, "step": 13941 }, { "epoch": 0.4273017040578644, "grad_norm": 1.3630467016770622, "learning_rate": 6.401193187365375e-06, "loss": 0.6921, "step": 13942 }, { "epoch": 0.4273323525806056, "grad_norm": 1.3207425834972741, "learning_rate": 6.400716749899108e-06, "loss": 0.7785, "step": 13943 }, { "epoch": 0.4273630011033468, "grad_norm": 1.3014063141960586, "learning_rate": 6.4002402986312195e-06, "loss": 0.652, "step": 13944 }, { "epoch": 0.427393649626088, "grad_norm": 1.5678092474203578, "learning_rate": 6.3997638335664055e-06, "loss": 0.8157, "step": 13945 }, { "epoch": 0.4274242981488292, "grad_norm": 1.3047107335347585, "learning_rate": 6.39928735470936e-06, "loss": 0.681, "step": 13946 }, { "epoch": 0.4274549466715704, "grad_norm": 1.2606916132072346, "learning_rate": 6.39881086206478e-06, "loss": 0.6195, "step": 13947 }, { "epoch": 0.4274855951943116, "grad_norm": 1.3429327199852104, "learning_rate": 6.398334355637356e-06, "loss": 0.5847, "step": 13948 }, { "epoch": 0.4275162437170528, "grad_norm": 1.200767423104218, "learning_rate": 6.397857835431787e-06, "loss": 0.6219, "step": 13949 }, { "epoch": 0.42754689223979403, "grad_norm": 1.237199440841526, "learning_rate": 6.397381301452768e-06, "loss": 0.6238, "step": 13950 }, { "epoch": 0.42757754076253524, "grad_norm": 1.4138741740770515, "learning_rate": 6.396904753704993e-06, "loss": 0.7198, "step": 13951 }, { "epoch": 0.42760818928527644, "grad_norm": 1.276503314179697, "learning_rate": 6.396428192193156e-06, "loss": 0.7145, "step": 13952 }, { "epoch": 0.42763883780801765, "grad_norm": 1.2117015124374058, "learning_rate": 6.395951616921957e-06, "loss": 0.6409, "step": 13953 }, { "epoch": 0.42766948633075885, "grad_norm": 1.234568422833959, "learning_rate": 6.395475027896089e-06, "loss": 0.6598, "step": 13954 }, { "epoch": 0.42770013485350006, "grad_norm": 1.2439275969190315, "learning_rate": 6.394998425120249e-06, "loss": 0.7002, "step": 13955 }, { "epoch": 0.42773078337624126, "grad_norm": 1.6132813755860969, "learning_rate": 6.394521808599131e-06, "loss": 0.6171, "step": 13956 }, { "epoch": 0.42776143189898247, "grad_norm": 1.353223331562532, "learning_rate": 6.394045178337434e-06, "loss": 0.7341, "step": 13957 }, { "epoch": 0.4277920804217237, "grad_norm": 1.2728724789312305, "learning_rate": 6.393568534339854e-06, "loss": 0.663, "step": 13958 }, { "epoch": 0.4278227289444649, "grad_norm": 1.1168038250140773, "learning_rate": 6.393091876611086e-06, "loss": 0.5794, "step": 13959 }, { "epoch": 0.4278533774672061, "grad_norm": 0.4878313250545582, "learning_rate": 6.392615205155826e-06, "loss": 0.4259, "step": 13960 }, { "epoch": 0.4278840259899473, "grad_norm": 1.187636973484703, "learning_rate": 6.3921385199787735e-06, "loss": 0.6529, "step": 13961 }, { "epoch": 0.4279146745126885, "grad_norm": 1.2351949128168729, "learning_rate": 6.391661821084624e-06, "loss": 0.5932, "step": 13962 }, { "epoch": 0.4279453230354297, "grad_norm": 1.337473839876998, "learning_rate": 6.391185108478074e-06, "loss": 0.654, "step": 13963 }, { "epoch": 0.4279759715581709, "grad_norm": 1.2671345832895706, "learning_rate": 6.39070838216382e-06, "loss": 0.6144, "step": 13964 }, { "epoch": 0.4280066200809121, "grad_norm": 1.2967449424247253, "learning_rate": 6.3902316421465626e-06, "loss": 0.6654, "step": 13965 }, { "epoch": 0.4280372686036533, "grad_norm": 1.3185467264895192, "learning_rate": 6.389754888430996e-06, "loss": 0.646, "step": 13966 }, { "epoch": 0.4280679171263945, "grad_norm": 1.2283407007341418, "learning_rate": 6.389278121021818e-06, "loss": 0.6018, "step": 13967 }, { "epoch": 0.42809856564913573, "grad_norm": 1.2934049041503235, "learning_rate": 6.388801339923729e-06, "loss": 0.6459, "step": 13968 }, { "epoch": 0.42812921417187694, "grad_norm": 1.1838820881162129, "learning_rate": 6.388324545141423e-06, "loss": 0.7176, "step": 13969 }, { "epoch": 0.42815986269461814, "grad_norm": 1.4082751064395924, "learning_rate": 6.387847736679603e-06, "loss": 0.7339, "step": 13970 }, { "epoch": 0.42819051121735935, "grad_norm": 1.2366109159532048, "learning_rate": 6.387370914542962e-06, "loss": 0.7413, "step": 13971 }, { "epoch": 0.42822115974010055, "grad_norm": 1.2169919097838011, "learning_rate": 6.386894078736201e-06, "loss": 0.698, "step": 13972 }, { "epoch": 0.4282518082628417, "grad_norm": 1.2711041735670967, "learning_rate": 6.386417229264017e-06, "loss": 0.7322, "step": 13973 }, { "epoch": 0.4282824567855829, "grad_norm": 0.4480111230397843, "learning_rate": 6.385940366131112e-06, "loss": 0.4007, "step": 13974 }, { "epoch": 0.4283131053083241, "grad_norm": 1.3864020519406808, "learning_rate": 6.385463489342179e-06, "loss": 0.681, "step": 13975 }, { "epoch": 0.4283437538310653, "grad_norm": 1.1861354158156727, "learning_rate": 6.384986598901921e-06, "loss": 0.745, "step": 13976 }, { "epoch": 0.4283744023538065, "grad_norm": 1.3291101847855162, "learning_rate": 6.384509694815036e-06, "loss": 0.7244, "step": 13977 }, { "epoch": 0.42840505087654773, "grad_norm": 1.151647015810263, "learning_rate": 6.384032777086222e-06, "loss": 0.5749, "step": 13978 }, { "epoch": 0.42843569939928894, "grad_norm": 1.3900938133530394, "learning_rate": 6.38355584572018e-06, "loss": 0.6979, "step": 13979 }, { "epoch": 0.42846634792203014, "grad_norm": 1.2211617867015772, "learning_rate": 6.383078900721607e-06, "loss": 0.6802, "step": 13980 }, { "epoch": 0.42849699644477135, "grad_norm": 1.211060932529233, "learning_rate": 6.382601942095203e-06, "loss": 0.6797, "step": 13981 }, { "epoch": 0.42852764496751256, "grad_norm": 1.2536707950020214, "learning_rate": 6.38212496984567e-06, "loss": 0.6302, "step": 13982 }, { "epoch": 0.42855829349025376, "grad_norm": 0.4652217723924618, "learning_rate": 6.381647983977706e-06, "loss": 0.4323, "step": 13983 }, { "epoch": 0.42858894201299497, "grad_norm": 1.1800876336583763, "learning_rate": 6.381170984496009e-06, "loss": 0.6144, "step": 13984 }, { "epoch": 0.4286195905357362, "grad_norm": 1.1478262719980057, "learning_rate": 6.380693971405284e-06, "loss": 0.5885, "step": 13985 }, { "epoch": 0.4286502390584774, "grad_norm": 1.2751937159213593, "learning_rate": 6.380216944710224e-06, "loss": 0.7056, "step": 13986 }, { "epoch": 0.4286808875812186, "grad_norm": 1.2218547791509293, "learning_rate": 6.379739904415537e-06, "loss": 0.6767, "step": 13987 }, { "epoch": 0.4287115361039598, "grad_norm": 0.45113719472890224, "learning_rate": 6.379262850525918e-06, "loss": 0.4329, "step": 13988 }, { "epoch": 0.428742184626701, "grad_norm": 1.2153175219359418, "learning_rate": 6.3787857830460706e-06, "loss": 0.6074, "step": 13989 }, { "epoch": 0.4287728331494422, "grad_norm": 1.1103320246865989, "learning_rate": 6.378308701980692e-06, "loss": 0.6717, "step": 13990 }, { "epoch": 0.4288034816721834, "grad_norm": 0.44000101508900097, "learning_rate": 6.377831607334487e-06, "loss": 0.4194, "step": 13991 }, { "epoch": 0.4288341301949246, "grad_norm": 1.3375719138581872, "learning_rate": 6.377354499112153e-06, "loss": 0.6376, "step": 13992 }, { "epoch": 0.4288647787176658, "grad_norm": 1.2544507708570702, "learning_rate": 6.376877377318393e-06, "loss": 0.6979, "step": 13993 }, { "epoch": 0.428895427240407, "grad_norm": 1.231700611861299, "learning_rate": 6.3764002419579095e-06, "loss": 0.7121, "step": 13994 }, { "epoch": 0.42892607576314823, "grad_norm": 1.1920757417244185, "learning_rate": 6.3759230930354e-06, "loss": 0.5965, "step": 13995 }, { "epoch": 0.42895672428588943, "grad_norm": 1.1529547130200923, "learning_rate": 6.375445930555569e-06, "loss": 0.6086, "step": 13996 }, { "epoch": 0.42898737280863064, "grad_norm": 1.1512879676522818, "learning_rate": 6.374968754523119e-06, "loss": 0.618, "step": 13997 }, { "epoch": 0.42901802133137185, "grad_norm": 1.3484256985166978, "learning_rate": 6.3744915649427485e-06, "loss": 0.6754, "step": 13998 }, { "epoch": 0.42904866985411305, "grad_norm": 1.2520838751404402, "learning_rate": 6.374014361819161e-06, "loss": 0.6397, "step": 13999 }, { "epoch": 0.42907931837685426, "grad_norm": 1.1958254381855735, "learning_rate": 6.373537145157058e-06, "loss": 0.7383, "step": 14000 }, { "epoch": 0.42910996689959546, "grad_norm": 1.1146715184640459, "learning_rate": 6.373059914961144e-06, "loss": 0.6363, "step": 14001 }, { "epoch": 0.42914061542233667, "grad_norm": 1.2346930066189494, "learning_rate": 6.372582671236118e-06, "loss": 0.7454, "step": 14002 }, { "epoch": 0.4291712639450779, "grad_norm": 0.4772130721132358, "learning_rate": 6.372105413986684e-06, "loss": 0.4434, "step": 14003 }, { "epoch": 0.429201912467819, "grad_norm": 1.3078088347439558, "learning_rate": 6.371628143217543e-06, "loss": 0.7709, "step": 14004 }, { "epoch": 0.42923256099056023, "grad_norm": 1.2154241574988613, "learning_rate": 6.3711508589334e-06, "loss": 0.6696, "step": 14005 }, { "epoch": 0.42926320951330144, "grad_norm": 1.284333548971215, "learning_rate": 6.370673561138958e-06, "loss": 0.7041, "step": 14006 }, { "epoch": 0.42929385803604264, "grad_norm": 1.2138720570540966, "learning_rate": 6.3701962498389165e-06, "loss": 0.6643, "step": 14007 }, { "epoch": 0.42932450655878385, "grad_norm": 1.3293989200189207, "learning_rate": 6.369718925037982e-06, "loss": 0.7379, "step": 14008 }, { "epoch": 0.42935515508152505, "grad_norm": 1.3690187484510419, "learning_rate": 6.369241586740856e-06, "loss": 0.6789, "step": 14009 }, { "epoch": 0.42938580360426626, "grad_norm": 1.4247655844755849, "learning_rate": 6.3687642349522425e-06, "loss": 0.6446, "step": 14010 }, { "epoch": 0.42941645212700746, "grad_norm": 1.1195996937800907, "learning_rate": 6.368286869676846e-06, "loss": 0.6536, "step": 14011 }, { "epoch": 0.42944710064974867, "grad_norm": 1.2666451790242248, "learning_rate": 6.367809490919368e-06, "loss": 0.6549, "step": 14012 }, { "epoch": 0.4294777491724899, "grad_norm": 1.2417412209323522, "learning_rate": 6.367332098684512e-06, "loss": 0.6167, "step": 14013 }, { "epoch": 0.4295083976952311, "grad_norm": 1.3837457413780172, "learning_rate": 6.366854692976983e-06, "loss": 0.742, "step": 14014 }, { "epoch": 0.4295390462179723, "grad_norm": 1.2094589626465757, "learning_rate": 6.366377273801486e-06, "loss": 0.687, "step": 14015 }, { "epoch": 0.4295696947407135, "grad_norm": 1.3762430879640772, "learning_rate": 6.365899841162725e-06, "loss": 0.7084, "step": 14016 }, { "epoch": 0.4296003432634547, "grad_norm": 1.1650685027093428, "learning_rate": 6.365422395065403e-06, "loss": 0.6198, "step": 14017 }, { "epoch": 0.4296309917861959, "grad_norm": 1.2481993452989473, "learning_rate": 6.3649449355142226e-06, "loss": 0.625, "step": 14018 }, { "epoch": 0.4296616403089371, "grad_norm": 1.2145103508693307, "learning_rate": 6.364467462513892e-06, "loss": 0.5951, "step": 14019 }, { "epoch": 0.4296922888316783, "grad_norm": 1.2180292074732475, "learning_rate": 6.363989976069115e-06, "loss": 0.6007, "step": 14020 }, { "epoch": 0.4297229373544195, "grad_norm": 1.2561279455269174, "learning_rate": 6.363512476184595e-06, "loss": 0.5812, "step": 14021 }, { "epoch": 0.4297535858771607, "grad_norm": 1.2005312882488346, "learning_rate": 6.363034962865038e-06, "loss": 0.6236, "step": 14022 }, { "epoch": 0.42978423439990193, "grad_norm": 1.1620699972805357, "learning_rate": 6.362557436115149e-06, "loss": 0.6499, "step": 14023 }, { "epoch": 0.42981488292264314, "grad_norm": 1.2909415761863396, "learning_rate": 6.362079895939632e-06, "loss": 0.662, "step": 14024 }, { "epoch": 0.42984553144538434, "grad_norm": 1.3479761820969083, "learning_rate": 6.361602342343194e-06, "loss": 0.8106, "step": 14025 }, { "epoch": 0.42987617996812555, "grad_norm": 1.4073503178637874, "learning_rate": 6.361124775330539e-06, "loss": 0.6527, "step": 14026 }, { "epoch": 0.42990682849086675, "grad_norm": 1.126797162796145, "learning_rate": 6.360647194906373e-06, "loss": 0.6932, "step": 14027 }, { "epoch": 0.42993747701360796, "grad_norm": 0.47117271403329697, "learning_rate": 6.360169601075404e-06, "loss": 0.4259, "step": 14028 }, { "epoch": 0.42996812553634917, "grad_norm": 1.3756365490484606, "learning_rate": 6.359691993842335e-06, "loss": 0.645, "step": 14029 }, { "epoch": 0.42999877405909037, "grad_norm": 1.2018904275783096, "learning_rate": 6.359214373211873e-06, "loss": 0.6676, "step": 14030 }, { "epoch": 0.4300294225818316, "grad_norm": 1.052646891211699, "learning_rate": 6.358736739188724e-06, "loss": 0.5578, "step": 14031 }, { "epoch": 0.4300600711045728, "grad_norm": 1.2167542171481311, "learning_rate": 6.3582590917775946e-06, "loss": 0.6793, "step": 14032 }, { "epoch": 0.430090719627314, "grad_norm": 1.2706409337240208, "learning_rate": 6.357781430983189e-06, "loss": 0.608, "step": 14033 }, { "epoch": 0.4301213681500552, "grad_norm": 1.2378893208621797, "learning_rate": 6.357303756810218e-06, "loss": 0.7048, "step": 14034 }, { "epoch": 0.43015201667279634, "grad_norm": 1.0546937358640052, "learning_rate": 6.356826069263384e-06, "loss": 0.6283, "step": 14035 }, { "epoch": 0.43018266519553755, "grad_norm": 1.2745027055984626, "learning_rate": 6.356348368347396e-06, "loss": 0.672, "step": 14036 }, { "epoch": 0.43021331371827876, "grad_norm": 1.2247587542038312, "learning_rate": 6.355870654066961e-06, "loss": 0.6536, "step": 14037 }, { "epoch": 0.43024396224101996, "grad_norm": 1.2178118815315042, "learning_rate": 6.3553929264267845e-06, "loss": 0.6553, "step": 14038 }, { "epoch": 0.43027461076376117, "grad_norm": 1.223264512002806, "learning_rate": 6.354915185431576e-06, "loss": 0.6584, "step": 14039 }, { "epoch": 0.4303052592865024, "grad_norm": 1.8855241428772762, "learning_rate": 6.354437431086041e-06, "loss": 0.6983, "step": 14040 }, { "epoch": 0.4303359078092436, "grad_norm": 1.2309585027457997, "learning_rate": 6.353959663394887e-06, "loss": 0.6472, "step": 14041 }, { "epoch": 0.4303665563319848, "grad_norm": 1.3382401124935825, "learning_rate": 6.353481882362822e-06, "loss": 0.6207, "step": 14042 }, { "epoch": 0.430397204854726, "grad_norm": 0.4617271359831584, "learning_rate": 6.3530040879945565e-06, "loss": 0.4376, "step": 14043 }, { "epoch": 0.4304278533774672, "grad_norm": 1.1968659664575876, "learning_rate": 6.352526280294791e-06, "loss": 0.6128, "step": 14044 }, { "epoch": 0.4304585019002084, "grad_norm": 1.3646752006378033, "learning_rate": 6.352048459268241e-06, "loss": 0.6201, "step": 14045 }, { "epoch": 0.4304891504229496, "grad_norm": 1.4596349511166238, "learning_rate": 6.35157062491961e-06, "loss": 0.7128, "step": 14046 }, { "epoch": 0.4305197989456908, "grad_norm": 1.2069378677877824, "learning_rate": 6.351092777253609e-06, "loss": 0.5285, "step": 14047 }, { "epoch": 0.430550447468432, "grad_norm": 1.15945928929687, "learning_rate": 6.350614916274945e-06, "loss": 0.6418, "step": 14048 }, { "epoch": 0.4305810959911732, "grad_norm": 1.3987291343961064, "learning_rate": 6.350137041988327e-06, "loss": 0.6737, "step": 14049 }, { "epoch": 0.43061174451391443, "grad_norm": 1.2407078478351876, "learning_rate": 6.349659154398462e-06, "loss": 0.7175, "step": 14050 }, { "epoch": 0.43064239303665564, "grad_norm": 1.1847984335397337, "learning_rate": 6.34918125351006e-06, "loss": 0.6894, "step": 14051 }, { "epoch": 0.43067304155939684, "grad_norm": 1.2309251386909834, "learning_rate": 6.348703339327832e-06, "loss": 0.6249, "step": 14052 }, { "epoch": 0.43070369008213805, "grad_norm": 1.2410147102719942, "learning_rate": 6.348225411856482e-06, "loss": 0.693, "step": 14053 }, { "epoch": 0.43073433860487925, "grad_norm": 1.2567416925966621, "learning_rate": 6.347747471100725e-06, "loss": 0.6814, "step": 14054 }, { "epoch": 0.43076498712762046, "grad_norm": 1.1962190459921471, "learning_rate": 6.347269517065265e-06, "loss": 0.7156, "step": 14055 }, { "epoch": 0.43079563565036166, "grad_norm": 1.2802578384053722, "learning_rate": 6.346791549754816e-06, "loss": 0.6613, "step": 14056 }, { "epoch": 0.43082628417310287, "grad_norm": 1.2565635999793427, "learning_rate": 6.346313569174083e-06, "loss": 0.6466, "step": 14057 }, { "epoch": 0.4308569326958441, "grad_norm": 0.4750733527204118, "learning_rate": 6.34583557532778e-06, "loss": 0.4311, "step": 14058 }, { "epoch": 0.4308875812185853, "grad_norm": 0.4474575539318171, "learning_rate": 6.345357568220613e-06, "loss": 0.4077, "step": 14059 }, { "epoch": 0.4309182297413265, "grad_norm": 1.2914647358073976, "learning_rate": 6.344879547857294e-06, "loss": 0.6229, "step": 14060 }, { "epoch": 0.4309488782640677, "grad_norm": 1.3315018473730384, "learning_rate": 6.3444015142425335e-06, "loss": 0.5755, "step": 14061 }, { "epoch": 0.4309795267868089, "grad_norm": 1.3790402598005447, "learning_rate": 6.34392346738104e-06, "loss": 0.659, "step": 14062 }, { "epoch": 0.4310101753095501, "grad_norm": 1.2241102004047095, "learning_rate": 6.3434454072775255e-06, "loss": 0.6441, "step": 14063 }, { "epoch": 0.4310408238322913, "grad_norm": 1.5898170733078019, "learning_rate": 6.342967333936698e-06, "loss": 0.6637, "step": 14064 }, { "epoch": 0.4310714723550325, "grad_norm": 0.4886581676252717, "learning_rate": 6.342489247363272e-06, "loss": 0.4348, "step": 14065 }, { "epoch": 0.43110212087777366, "grad_norm": 0.46231579206863693, "learning_rate": 6.342011147561955e-06, "loss": 0.4116, "step": 14066 }, { "epoch": 0.43113276940051487, "grad_norm": 1.1589872826324157, "learning_rate": 6.341533034537459e-06, "loss": 0.6559, "step": 14067 }, { "epoch": 0.4311634179232561, "grad_norm": 1.1580017689711093, "learning_rate": 6.3410549082944935e-06, "loss": 0.7073, "step": 14068 }, { "epoch": 0.4311940664459973, "grad_norm": 1.2116755781899693, "learning_rate": 6.340576768837772e-06, "loss": 0.673, "step": 14069 }, { "epoch": 0.4312247149687385, "grad_norm": 0.46846858061255164, "learning_rate": 6.340098616172006e-06, "loss": 0.4079, "step": 14070 }, { "epoch": 0.4312553634914797, "grad_norm": 1.256586695695742, "learning_rate": 6.339620450301903e-06, "loss": 0.7008, "step": 14071 }, { "epoch": 0.4312860120142209, "grad_norm": 1.3744739699484, "learning_rate": 6.339142271232177e-06, "loss": 0.6223, "step": 14072 }, { "epoch": 0.4313166605369621, "grad_norm": 1.2986767504382646, "learning_rate": 6.3386640789675415e-06, "loss": 0.675, "step": 14073 }, { "epoch": 0.4313473090597033, "grad_norm": 1.1302412448962793, "learning_rate": 6.338185873512705e-06, "loss": 0.6673, "step": 14074 }, { "epoch": 0.4313779575824445, "grad_norm": 1.1244790477990012, "learning_rate": 6.337707654872382e-06, "loss": 0.5694, "step": 14075 }, { "epoch": 0.4314086061051857, "grad_norm": 1.1733307222772054, "learning_rate": 6.337229423051281e-06, "loss": 0.5934, "step": 14076 }, { "epoch": 0.4314392546279269, "grad_norm": 1.4111886125444144, "learning_rate": 6.336751178054118e-06, "loss": 0.723, "step": 14077 }, { "epoch": 0.43146990315066813, "grad_norm": 1.3351156619869806, "learning_rate": 6.336272919885603e-06, "loss": 0.6282, "step": 14078 }, { "epoch": 0.43150055167340934, "grad_norm": 0.46988553015416334, "learning_rate": 6.335794648550448e-06, "loss": 0.4038, "step": 14079 }, { "epoch": 0.43153120019615054, "grad_norm": 1.0616125790096296, "learning_rate": 6.335316364053369e-06, "loss": 0.4924, "step": 14080 }, { "epoch": 0.43156184871889175, "grad_norm": 1.3292286012434615, "learning_rate": 6.334838066399074e-06, "loss": 0.7008, "step": 14081 }, { "epoch": 0.43159249724163296, "grad_norm": 0.4587199618478132, "learning_rate": 6.33435975559228e-06, "loss": 0.3957, "step": 14082 }, { "epoch": 0.43162314576437416, "grad_norm": 1.2895193290792815, "learning_rate": 6.333881431637696e-06, "loss": 0.6575, "step": 14083 }, { "epoch": 0.43165379428711537, "grad_norm": 1.2542987213876378, "learning_rate": 6.333403094540038e-06, "loss": 0.6915, "step": 14084 }, { "epoch": 0.43168444280985657, "grad_norm": 1.1926940893790097, "learning_rate": 6.332924744304019e-06, "loss": 0.6843, "step": 14085 }, { "epoch": 0.4317150913325978, "grad_norm": 1.336608647845694, "learning_rate": 6.332446380934349e-06, "loss": 0.7072, "step": 14086 }, { "epoch": 0.431745739855339, "grad_norm": 0.43270525381630875, "learning_rate": 6.331968004435746e-06, "loss": 0.4246, "step": 14087 }, { "epoch": 0.4317763883780802, "grad_norm": 1.082417032945579, "learning_rate": 6.3314896148129205e-06, "loss": 0.605, "step": 14088 }, { "epoch": 0.4318070369008214, "grad_norm": 1.2737853683535545, "learning_rate": 6.331011212070588e-06, "loss": 0.667, "step": 14089 }, { "epoch": 0.4318376854235626, "grad_norm": 1.1004661869866308, "learning_rate": 6.33053279621346e-06, "loss": 0.6232, "step": 14090 }, { "epoch": 0.4318683339463038, "grad_norm": 1.3549508223258981, "learning_rate": 6.3300543672462536e-06, "loss": 0.6103, "step": 14091 }, { "epoch": 0.431898982469045, "grad_norm": 1.0788991931346805, "learning_rate": 6.329575925173679e-06, "loss": 0.4271, "step": 14092 }, { "epoch": 0.4319296309917862, "grad_norm": 1.2391596804261962, "learning_rate": 6.329097470000456e-06, "loss": 0.6271, "step": 14093 }, { "epoch": 0.4319602795145274, "grad_norm": 1.2843844526630472, "learning_rate": 6.328619001731292e-06, "loss": 0.672, "step": 14094 }, { "epoch": 0.43199092803726863, "grad_norm": 1.3277908711947264, "learning_rate": 6.3281405203709065e-06, "loss": 0.6532, "step": 14095 }, { "epoch": 0.43202157656000983, "grad_norm": 1.29770439567761, "learning_rate": 6.327662025924013e-06, "loss": 0.6566, "step": 14096 }, { "epoch": 0.432052225082751, "grad_norm": 1.3270813344586374, "learning_rate": 6.327183518395327e-06, "loss": 0.6528, "step": 14097 }, { "epoch": 0.4320828736054922, "grad_norm": 1.2102832293294374, "learning_rate": 6.32670499778956e-06, "loss": 0.5716, "step": 14098 }, { "epoch": 0.4321135221282334, "grad_norm": 1.2751667857359155, "learning_rate": 6.3262264641114305e-06, "loss": 0.6116, "step": 14099 }, { "epoch": 0.4321441706509746, "grad_norm": 1.308473577291824, "learning_rate": 6.325747917365651e-06, "loss": 0.6501, "step": 14100 }, { "epoch": 0.4321748191737158, "grad_norm": 1.2060946079742418, "learning_rate": 6.32526935755694e-06, "loss": 0.6913, "step": 14101 }, { "epoch": 0.432205467696457, "grad_norm": 0.53120152609669, "learning_rate": 6.3247907846900096e-06, "loss": 0.4326, "step": 14102 }, { "epoch": 0.4322361162191982, "grad_norm": 1.3949128929088896, "learning_rate": 6.324312198769576e-06, "loss": 0.6394, "step": 14103 }, { "epoch": 0.4322667647419394, "grad_norm": 0.4636402304536075, "learning_rate": 6.323833599800356e-06, "loss": 0.4303, "step": 14104 }, { "epoch": 0.43229741326468063, "grad_norm": 1.0716917272983428, "learning_rate": 6.323354987787066e-06, "loss": 0.5725, "step": 14105 }, { "epoch": 0.43232806178742184, "grad_norm": 1.2122994190007248, "learning_rate": 6.32287636273442e-06, "loss": 0.6205, "step": 14106 }, { "epoch": 0.43235871031016304, "grad_norm": 1.2172131963945327, "learning_rate": 6.322397724647134e-06, "loss": 0.6744, "step": 14107 }, { "epoch": 0.43238935883290425, "grad_norm": 1.1806159097556959, "learning_rate": 6.3219190735299254e-06, "loss": 0.6424, "step": 14108 }, { "epoch": 0.43242000735564545, "grad_norm": 0.4913801194446142, "learning_rate": 6.3214404093875105e-06, "loss": 0.4096, "step": 14109 }, { "epoch": 0.43245065587838666, "grad_norm": 1.2255251621582313, "learning_rate": 6.320961732224605e-06, "loss": 0.65, "step": 14110 }, { "epoch": 0.43248130440112786, "grad_norm": 1.3400890512294117, "learning_rate": 6.320483042045924e-06, "loss": 0.609, "step": 14111 }, { "epoch": 0.43251195292386907, "grad_norm": 1.2704135746953806, "learning_rate": 6.320004338856189e-06, "loss": 0.6707, "step": 14112 }, { "epoch": 0.4325426014466103, "grad_norm": 1.3900421363879496, "learning_rate": 6.319525622660111e-06, "loss": 0.7865, "step": 14113 }, { "epoch": 0.4325732499693515, "grad_norm": 1.457053990215834, "learning_rate": 6.31904689346241e-06, "loss": 0.6857, "step": 14114 }, { "epoch": 0.4326038984920927, "grad_norm": 1.0374225036577789, "learning_rate": 6.318568151267801e-06, "loss": 0.6235, "step": 14115 }, { "epoch": 0.4326345470148339, "grad_norm": 1.1896353055088986, "learning_rate": 6.318089396081004e-06, "loss": 0.6608, "step": 14116 }, { "epoch": 0.4326651955375751, "grad_norm": 1.1454939347403263, "learning_rate": 6.317610627906736e-06, "loss": 0.6444, "step": 14117 }, { "epoch": 0.4326958440603163, "grad_norm": 1.1119710859022207, "learning_rate": 6.317131846749711e-06, "loss": 0.589, "step": 14118 }, { "epoch": 0.4327264925830575, "grad_norm": 1.3158916564962007, "learning_rate": 6.316653052614651e-06, "loss": 0.6257, "step": 14119 }, { "epoch": 0.4327571411057987, "grad_norm": 1.223588802664943, "learning_rate": 6.316174245506271e-06, "loss": 0.6765, "step": 14120 }, { "epoch": 0.4327877896285399, "grad_norm": 1.3946987751943052, "learning_rate": 6.315695425429289e-06, "loss": 0.5316, "step": 14121 }, { "epoch": 0.4328184381512811, "grad_norm": 1.2538378522232818, "learning_rate": 6.315216592388423e-06, "loss": 0.6052, "step": 14122 }, { "epoch": 0.43284908667402233, "grad_norm": 1.255441179204581, "learning_rate": 6.314737746388393e-06, "loss": 0.6336, "step": 14123 }, { "epoch": 0.43287973519676354, "grad_norm": 0.5142184970129795, "learning_rate": 6.314258887433915e-06, "loss": 0.421, "step": 14124 }, { "epoch": 0.43291038371950474, "grad_norm": 0.4604630501252689, "learning_rate": 6.313780015529707e-06, "loss": 0.4116, "step": 14125 }, { "epoch": 0.43294103224224595, "grad_norm": 1.3924643989593333, "learning_rate": 6.313301130680488e-06, "loss": 0.6409, "step": 14126 }, { "epoch": 0.43297168076498715, "grad_norm": 1.2621724068181495, "learning_rate": 6.312822232890978e-06, "loss": 0.6207, "step": 14127 }, { "epoch": 0.4330023292877283, "grad_norm": 1.2343401085639394, "learning_rate": 6.312343322165895e-06, "loss": 0.6906, "step": 14128 }, { "epoch": 0.4330329778104695, "grad_norm": 1.2781042480149991, "learning_rate": 6.311864398509957e-06, "loss": 0.6912, "step": 14129 }, { "epoch": 0.4330636263332107, "grad_norm": 1.2022227033826374, "learning_rate": 6.311385461927882e-06, "loss": 0.6876, "step": 14130 }, { "epoch": 0.4330942748559519, "grad_norm": 1.188119455246267, "learning_rate": 6.310906512424393e-06, "loss": 0.642, "step": 14131 }, { "epoch": 0.4331249233786931, "grad_norm": 1.2524816906036174, "learning_rate": 6.3104275500042055e-06, "loss": 0.6774, "step": 14132 }, { "epoch": 0.43315557190143433, "grad_norm": 1.41586983324527, "learning_rate": 6.30994857467204e-06, "loss": 0.6753, "step": 14133 }, { "epoch": 0.43318622042417554, "grad_norm": 1.1789884053800175, "learning_rate": 6.309469586432616e-06, "loss": 0.6753, "step": 14134 }, { "epoch": 0.43321686894691674, "grad_norm": 1.1746202706573259, "learning_rate": 6.308990585290653e-06, "loss": 0.6005, "step": 14135 }, { "epoch": 0.43324751746965795, "grad_norm": 1.1501984090459287, "learning_rate": 6.308511571250871e-06, "loss": 0.6826, "step": 14136 }, { "epoch": 0.43327816599239916, "grad_norm": 1.232776890030177, "learning_rate": 6.3080325443179905e-06, "loss": 0.6634, "step": 14137 }, { "epoch": 0.43330881451514036, "grad_norm": 1.156087922520042, "learning_rate": 6.307553504496729e-06, "loss": 0.6822, "step": 14138 }, { "epoch": 0.43333946303788157, "grad_norm": 0.6226321086339837, "learning_rate": 6.3070744517918105e-06, "loss": 0.4072, "step": 14139 }, { "epoch": 0.4333701115606228, "grad_norm": 1.1693439613195038, "learning_rate": 6.306595386207952e-06, "loss": 0.6327, "step": 14140 }, { "epoch": 0.433400760083364, "grad_norm": 0.5529150407519449, "learning_rate": 6.306116307749874e-06, "loss": 0.4062, "step": 14141 }, { "epoch": 0.4334314086061052, "grad_norm": 1.3715524198801414, "learning_rate": 6.305637216422298e-06, "loss": 0.7188, "step": 14142 }, { "epoch": 0.4334620571288464, "grad_norm": 1.4155469062945008, "learning_rate": 6.305158112229946e-06, "loss": 0.6516, "step": 14143 }, { "epoch": 0.4334927056515876, "grad_norm": 1.2600131574036164, "learning_rate": 6.304678995177535e-06, "loss": 0.6983, "step": 14144 }, { "epoch": 0.4335233541743288, "grad_norm": 1.238425369440883, "learning_rate": 6.304199865269789e-06, "loss": 0.7168, "step": 14145 }, { "epoch": 0.43355400269707, "grad_norm": 1.227167248084143, "learning_rate": 6.303720722511428e-06, "loss": 0.6525, "step": 14146 }, { "epoch": 0.4335846512198112, "grad_norm": 1.1487014365999157, "learning_rate": 6.303241566907173e-06, "loss": 0.6065, "step": 14147 }, { "epoch": 0.4336152997425524, "grad_norm": 1.3532844072978731, "learning_rate": 6.302762398461746e-06, "loss": 0.7118, "step": 14148 }, { "epoch": 0.4336459482652936, "grad_norm": 1.0815374809055875, "learning_rate": 6.302283217179868e-06, "loss": 0.5964, "step": 14149 }, { "epoch": 0.43367659678803483, "grad_norm": 0.6220592691873159, "learning_rate": 6.301804023066258e-06, "loss": 0.4269, "step": 14150 }, { "epoch": 0.43370724531077604, "grad_norm": 1.1236330092810907, "learning_rate": 6.3013248161256425e-06, "loss": 0.6213, "step": 14151 }, { "epoch": 0.43373789383351724, "grad_norm": 1.195207697892197, "learning_rate": 6.300845596362739e-06, "loss": 0.6441, "step": 14152 }, { "epoch": 0.43376854235625845, "grad_norm": 1.3431634381104909, "learning_rate": 6.300366363782272e-06, "loss": 0.7393, "step": 14153 }, { "epoch": 0.43379919087899965, "grad_norm": 2.169961362346114, "learning_rate": 6.299887118388962e-06, "loss": 0.6159, "step": 14154 }, { "epoch": 0.43382983940174086, "grad_norm": 1.188376022315372, "learning_rate": 6.2994078601875334e-06, "loss": 0.6569, "step": 14155 }, { "epoch": 0.43386048792448206, "grad_norm": 1.1767753126327218, "learning_rate": 6.298928589182704e-06, "loss": 0.6774, "step": 14156 }, { "epoch": 0.43389113644722327, "grad_norm": 1.37047330022683, "learning_rate": 6.2984493053792e-06, "loss": 0.5909, "step": 14157 }, { "epoch": 0.4339217849699645, "grad_norm": 0.4569071464109169, "learning_rate": 6.297970008781742e-06, "loss": 0.4048, "step": 14158 }, { "epoch": 0.4339524334927056, "grad_norm": 1.2814473771400046, "learning_rate": 6.297490699395055e-06, "loss": 0.6928, "step": 14159 }, { "epoch": 0.43398308201544683, "grad_norm": 1.2777837866165564, "learning_rate": 6.297011377223859e-06, "loss": 0.6958, "step": 14160 }, { "epoch": 0.43401373053818804, "grad_norm": 1.482171510796517, "learning_rate": 6.296532042272878e-06, "loss": 0.5845, "step": 14161 }, { "epoch": 0.43404437906092924, "grad_norm": 0.4513614155865656, "learning_rate": 6.296052694546837e-06, "loss": 0.4208, "step": 14162 }, { "epoch": 0.43407502758367045, "grad_norm": 1.2825751130506338, "learning_rate": 6.295573334050455e-06, "loss": 0.6214, "step": 14163 }, { "epoch": 0.43410567610641165, "grad_norm": 1.264161363397146, "learning_rate": 6.2950939607884574e-06, "loss": 0.7345, "step": 14164 }, { "epoch": 0.43413632462915286, "grad_norm": 0.4630113690536447, "learning_rate": 6.294614574765567e-06, "loss": 0.4361, "step": 14165 }, { "epoch": 0.43416697315189406, "grad_norm": 1.3814929067791561, "learning_rate": 6.294135175986511e-06, "loss": 0.6868, "step": 14166 }, { "epoch": 0.43419762167463527, "grad_norm": 1.2518515124526248, "learning_rate": 6.293655764456008e-06, "loss": 0.6663, "step": 14167 }, { "epoch": 0.4342282701973765, "grad_norm": 1.1692049058167495, "learning_rate": 6.2931763401787835e-06, "loss": 0.6412, "step": 14168 }, { "epoch": 0.4342589187201177, "grad_norm": 1.2196554398710289, "learning_rate": 6.292696903159562e-06, "loss": 0.6325, "step": 14169 }, { "epoch": 0.4342895672428589, "grad_norm": 1.4846907077862854, "learning_rate": 6.292217453403068e-06, "loss": 0.6923, "step": 14170 }, { "epoch": 0.4343202157656001, "grad_norm": 1.1069306223293194, "learning_rate": 6.291737990914024e-06, "loss": 0.5964, "step": 14171 }, { "epoch": 0.4343508642883413, "grad_norm": 1.278415251505555, "learning_rate": 6.291258515697155e-06, "loss": 0.5992, "step": 14172 }, { "epoch": 0.4343815128110825, "grad_norm": 1.197606766209503, "learning_rate": 6.290779027757186e-06, "loss": 0.715, "step": 14173 }, { "epoch": 0.4344121613338237, "grad_norm": 1.459997522024309, "learning_rate": 6.29029952709884e-06, "loss": 0.6832, "step": 14174 }, { "epoch": 0.4344428098565649, "grad_norm": 1.3392405698429002, "learning_rate": 6.289820013726844e-06, "loss": 0.6512, "step": 14175 }, { "epoch": 0.4344734583793061, "grad_norm": 1.2402020805155674, "learning_rate": 6.2893404876459195e-06, "loss": 0.5873, "step": 14176 }, { "epoch": 0.4345041069020473, "grad_norm": 1.3888583123101406, "learning_rate": 6.288860948860794e-06, "loss": 0.5909, "step": 14177 }, { "epoch": 0.43453475542478853, "grad_norm": 0.5176240450833698, "learning_rate": 6.288381397376193e-06, "loss": 0.4053, "step": 14178 }, { "epoch": 0.43456540394752974, "grad_norm": 1.1829445036711639, "learning_rate": 6.28790183319684e-06, "loss": 0.5811, "step": 14179 }, { "epoch": 0.43459605247027094, "grad_norm": 1.1284487942691612, "learning_rate": 6.28742225632746e-06, "loss": 0.5997, "step": 14180 }, { "epoch": 0.43462670099301215, "grad_norm": 1.3215698041708033, "learning_rate": 6.28694266677278e-06, "loss": 0.7687, "step": 14181 }, { "epoch": 0.43465734951575336, "grad_norm": 1.1570477015747107, "learning_rate": 6.286463064537524e-06, "loss": 0.6993, "step": 14182 }, { "epoch": 0.43468799803849456, "grad_norm": 1.284061602780935, "learning_rate": 6.285983449626418e-06, "loss": 0.6401, "step": 14183 }, { "epoch": 0.43471864656123577, "grad_norm": 1.2151158078671034, "learning_rate": 6.285503822044188e-06, "loss": 0.7026, "step": 14184 }, { "epoch": 0.43474929508397697, "grad_norm": 1.4503874124802452, "learning_rate": 6.285024181795561e-06, "loss": 0.6151, "step": 14185 }, { "epoch": 0.4347799436067182, "grad_norm": 1.3610144192679674, "learning_rate": 6.2845445288852615e-06, "loss": 0.6876, "step": 14186 }, { "epoch": 0.4348105921294594, "grad_norm": 1.3779596698210128, "learning_rate": 6.284064863318016e-06, "loss": 0.6923, "step": 14187 }, { "epoch": 0.4348412406522006, "grad_norm": 1.2025111299504854, "learning_rate": 6.283585185098551e-06, "loss": 0.6573, "step": 14188 }, { "epoch": 0.4348718891749418, "grad_norm": 1.339025295788914, "learning_rate": 6.283105494231591e-06, "loss": 0.7754, "step": 14189 }, { "epoch": 0.43490253769768294, "grad_norm": 1.1717688401824338, "learning_rate": 6.282625790721867e-06, "loss": 0.6735, "step": 14190 }, { "epoch": 0.43493318622042415, "grad_norm": 1.1645935901385784, "learning_rate": 6.2821460745741e-06, "loss": 0.6372, "step": 14191 }, { "epoch": 0.43496383474316536, "grad_norm": 1.2554516120429742, "learning_rate": 6.2816663457930225e-06, "loss": 0.582, "step": 14192 }, { "epoch": 0.43499448326590656, "grad_norm": 1.323295200270225, "learning_rate": 6.281186604383358e-06, "loss": 0.6137, "step": 14193 }, { "epoch": 0.43502513178864777, "grad_norm": 1.1191372032187892, "learning_rate": 6.280706850349834e-06, "loss": 0.587, "step": 14194 }, { "epoch": 0.435055780311389, "grad_norm": 1.289707076200878, "learning_rate": 6.2802270836971756e-06, "loss": 0.5989, "step": 14195 }, { "epoch": 0.4350864288341302, "grad_norm": 1.2917227499407917, "learning_rate": 6.279747304430115e-06, "loss": 0.6698, "step": 14196 }, { "epoch": 0.4351170773568714, "grad_norm": 0.459886078320618, "learning_rate": 6.279267512553375e-06, "loss": 0.4279, "step": 14197 }, { "epoch": 0.4351477258796126, "grad_norm": 1.1071227570373168, "learning_rate": 6.278787708071687e-06, "loss": 0.6542, "step": 14198 }, { "epoch": 0.4351783744023538, "grad_norm": 0.4677034399825137, "learning_rate": 6.278307890989773e-06, "loss": 0.4079, "step": 14199 }, { "epoch": 0.435209022925095, "grad_norm": 1.0865966551158324, "learning_rate": 6.277828061312367e-06, "loss": 0.6557, "step": 14200 }, { "epoch": 0.4352396714478362, "grad_norm": 1.229078467953417, "learning_rate": 6.277348219044194e-06, "loss": 0.6846, "step": 14201 }, { "epoch": 0.4352703199705774, "grad_norm": 1.2278462725756267, "learning_rate": 6.276868364189981e-06, "loss": 0.6524, "step": 14202 }, { "epoch": 0.4353009684933186, "grad_norm": 1.0880648983991783, "learning_rate": 6.276388496754458e-06, "loss": 0.6256, "step": 14203 }, { "epoch": 0.4353316170160598, "grad_norm": 1.179667912027476, "learning_rate": 6.275908616742351e-06, "loss": 0.7234, "step": 14204 }, { "epoch": 0.43536226553880103, "grad_norm": 1.187824875788391, "learning_rate": 6.275428724158393e-06, "loss": 0.5752, "step": 14205 }, { "epoch": 0.43539291406154224, "grad_norm": 1.2397636743009564, "learning_rate": 6.274948819007307e-06, "loss": 0.6388, "step": 14206 }, { "epoch": 0.43542356258428344, "grad_norm": 1.382022146619099, "learning_rate": 6.274468901293825e-06, "loss": 0.7112, "step": 14207 }, { "epoch": 0.43545421110702465, "grad_norm": 0.45614157782638237, "learning_rate": 6.2739889710226745e-06, "loss": 0.44, "step": 14208 }, { "epoch": 0.43548485962976585, "grad_norm": 1.2738995866978886, "learning_rate": 6.2735090281985855e-06, "loss": 0.6948, "step": 14209 }, { "epoch": 0.43551550815250706, "grad_norm": 1.1514204624541795, "learning_rate": 6.273029072826285e-06, "loss": 0.6911, "step": 14210 }, { "epoch": 0.43554615667524826, "grad_norm": 1.1147326399866364, "learning_rate": 6.272549104910504e-06, "loss": 0.5936, "step": 14211 }, { "epoch": 0.43557680519798947, "grad_norm": 0.434757814875904, "learning_rate": 6.272069124455973e-06, "loss": 0.4346, "step": 14212 }, { "epoch": 0.4356074537207307, "grad_norm": 1.1063666519380677, "learning_rate": 6.271589131467416e-06, "loss": 0.5794, "step": 14213 }, { "epoch": 0.4356381022434719, "grad_norm": 1.1578371351643308, "learning_rate": 6.271109125949568e-06, "loss": 0.6826, "step": 14214 }, { "epoch": 0.4356687507662131, "grad_norm": 1.1450486652839467, "learning_rate": 6.270629107907155e-06, "loss": 0.5933, "step": 14215 }, { "epoch": 0.4356993992889543, "grad_norm": 0.4427575971853833, "learning_rate": 6.2701490773449105e-06, "loss": 0.4121, "step": 14216 }, { "epoch": 0.4357300478116955, "grad_norm": 1.3509511239217726, "learning_rate": 6.26966903426756e-06, "loss": 0.6993, "step": 14217 }, { "epoch": 0.4357606963344367, "grad_norm": 1.1238052722006207, "learning_rate": 6.269188978679837e-06, "loss": 0.6789, "step": 14218 }, { "epoch": 0.4357913448571779, "grad_norm": 1.3249698778733834, "learning_rate": 6.26870891058647e-06, "loss": 0.6224, "step": 14219 }, { "epoch": 0.4358219933799191, "grad_norm": 1.1506132705250784, "learning_rate": 6.26822882999219e-06, "loss": 0.5257, "step": 14220 }, { "epoch": 0.43585264190266026, "grad_norm": 1.2939216952228865, "learning_rate": 6.267748736901726e-06, "loss": 0.6518, "step": 14221 }, { "epoch": 0.43588329042540147, "grad_norm": 1.4217701286304798, "learning_rate": 6.2672686313198095e-06, "loss": 0.7396, "step": 14222 }, { "epoch": 0.4359139389481427, "grad_norm": 0.43774752379132675, "learning_rate": 6.26678851325117e-06, "loss": 0.4271, "step": 14223 }, { "epoch": 0.4359445874708839, "grad_norm": 1.1963836362972204, "learning_rate": 6.266308382700541e-06, "loss": 0.6835, "step": 14224 }, { "epoch": 0.4359752359936251, "grad_norm": 0.45736675917201147, "learning_rate": 6.26582823967265e-06, "loss": 0.4128, "step": 14225 }, { "epoch": 0.4360058845163663, "grad_norm": 1.3738888925569814, "learning_rate": 6.26534808417223e-06, "loss": 0.6839, "step": 14226 }, { "epoch": 0.4360365330391075, "grad_norm": 1.1124597291566747, "learning_rate": 6.264867916204011e-06, "loss": 0.5763, "step": 14227 }, { "epoch": 0.4360671815618487, "grad_norm": 1.3078639408368338, "learning_rate": 6.264387735772727e-06, "loss": 0.611, "step": 14228 }, { "epoch": 0.4360978300845899, "grad_norm": 1.1060757477521825, "learning_rate": 6.2639075428831054e-06, "loss": 0.6368, "step": 14229 }, { "epoch": 0.4361284786073311, "grad_norm": 1.4313005335313522, "learning_rate": 6.263427337539878e-06, "loss": 0.7036, "step": 14230 }, { "epoch": 0.4361591271300723, "grad_norm": 1.2601533703746026, "learning_rate": 6.2629471197477795e-06, "loss": 0.6142, "step": 14231 }, { "epoch": 0.4361897756528135, "grad_norm": 1.2963556936275906, "learning_rate": 6.26246688951154e-06, "loss": 0.6525, "step": 14232 }, { "epoch": 0.43622042417555473, "grad_norm": 1.4318915187021881, "learning_rate": 6.261986646835892e-06, "loss": 0.6275, "step": 14233 }, { "epoch": 0.43625107269829594, "grad_norm": 1.2437947602527926, "learning_rate": 6.261506391725565e-06, "loss": 0.6853, "step": 14234 }, { "epoch": 0.43628172122103714, "grad_norm": 1.1050148871623082, "learning_rate": 6.2610261241852946e-06, "loss": 0.6236, "step": 14235 }, { "epoch": 0.43631236974377835, "grad_norm": 1.1366767885286455, "learning_rate": 6.26054584421981e-06, "loss": 0.5989, "step": 14236 }, { "epoch": 0.43634301826651956, "grad_norm": 1.3427957338067626, "learning_rate": 6.260065551833845e-06, "loss": 0.6396, "step": 14237 }, { "epoch": 0.43637366678926076, "grad_norm": 1.2083626502368607, "learning_rate": 6.259585247032129e-06, "loss": 0.5898, "step": 14238 }, { "epoch": 0.43640431531200197, "grad_norm": 1.1380565323665413, "learning_rate": 6.2591049298194005e-06, "loss": 0.6883, "step": 14239 }, { "epoch": 0.4364349638347432, "grad_norm": 0.5169724799589172, "learning_rate": 6.258624600200389e-06, "loss": 0.4348, "step": 14240 }, { "epoch": 0.4364656123574844, "grad_norm": 1.310700012820816, "learning_rate": 6.258144258179826e-06, "loss": 0.6002, "step": 14241 }, { "epoch": 0.4364962608802256, "grad_norm": 1.1923114542816624, "learning_rate": 6.257663903762445e-06, "loss": 0.7036, "step": 14242 }, { "epoch": 0.4365269094029668, "grad_norm": 1.2760581368477923, "learning_rate": 6.257183536952982e-06, "loss": 0.6917, "step": 14243 }, { "epoch": 0.436557557925708, "grad_norm": 1.2564119328487766, "learning_rate": 6.2567031577561676e-06, "loss": 0.6331, "step": 14244 }, { "epoch": 0.4365882064484492, "grad_norm": 1.1340890391007152, "learning_rate": 6.2562227661767336e-06, "loss": 0.6005, "step": 14245 }, { "epoch": 0.4366188549711904, "grad_norm": 1.1913887268766141, "learning_rate": 6.2557423622194165e-06, "loss": 0.6937, "step": 14246 }, { "epoch": 0.4366495034939316, "grad_norm": 1.2205445762133624, "learning_rate": 6.255261945888949e-06, "loss": 0.6433, "step": 14247 }, { "epoch": 0.4366801520166728, "grad_norm": 1.0839409242090086, "learning_rate": 6.254781517190064e-06, "loss": 0.5916, "step": 14248 }, { "epoch": 0.436710800539414, "grad_norm": 1.2094444143613612, "learning_rate": 6.254301076127495e-06, "loss": 0.593, "step": 14249 }, { "epoch": 0.43674144906215523, "grad_norm": 1.024877187081034, "learning_rate": 6.253820622705977e-06, "loss": 0.6509, "step": 14250 }, { "epoch": 0.43677209758489643, "grad_norm": 1.173632645844109, "learning_rate": 6.253340156930243e-06, "loss": 0.61, "step": 14251 }, { "epoch": 0.4368027461076376, "grad_norm": 1.184158233272911, "learning_rate": 6.25285967880503e-06, "loss": 0.7121, "step": 14252 }, { "epoch": 0.4368333946303788, "grad_norm": 1.1525972696511522, "learning_rate": 6.252379188335067e-06, "loss": 0.6477, "step": 14253 }, { "epoch": 0.43686404315312, "grad_norm": 1.283281071146275, "learning_rate": 6.251898685525093e-06, "loss": 0.6879, "step": 14254 }, { "epoch": 0.4368946916758612, "grad_norm": 1.1670699676386924, "learning_rate": 6.251418170379841e-06, "loss": 0.6411, "step": 14255 }, { "epoch": 0.4369253401986024, "grad_norm": 1.2435356167628282, "learning_rate": 6.250937642904045e-06, "loss": 0.6937, "step": 14256 }, { "epoch": 0.4369559887213436, "grad_norm": 1.1915279043758118, "learning_rate": 6.250457103102441e-06, "loss": 0.5827, "step": 14257 }, { "epoch": 0.4369866372440848, "grad_norm": 0.49973150563204066, "learning_rate": 6.2499765509797615e-06, "loss": 0.4337, "step": 14258 }, { "epoch": 0.437017285766826, "grad_norm": 1.3820817218612438, "learning_rate": 6.249495986540746e-06, "loss": 0.6383, "step": 14259 }, { "epoch": 0.43704793428956723, "grad_norm": 1.235911605379701, "learning_rate": 6.249015409790126e-06, "loss": 0.6678, "step": 14260 }, { "epoch": 0.43707858281230844, "grad_norm": 0.4556829721471037, "learning_rate": 6.248534820732637e-06, "loss": 0.4318, "step": 14261 }, { "epoch": 0.43710923133504964, "grad_norm": 1.0601057089080597, "learning_rate": 6.248054219373014e-06, "loss": 0.5773, "step": 14262 }, { "epoch": 0.43713987985779085, "grad_norm": 1.3395818458528526, "learning_rate": 6.247573605715996e-06, "loss": 0.6228, "step": 14263 }, { "epoch": 0.43717052838053205, "grad_norm": 1.231812805353093, "learning_rate": 6.247092979766314e-06, "loss": 0.7034, "step": 14264 }, { "epoch": 0.43720117690327326, "grad_norm": 1.3481435387527247, "learning_rate": 6.246612341528706e-06, "loss": 0.5883, "step": 14265 }, { "epoch": 0.43723182542601446, "grad_norm": 1.2367448636110643, "learning_rate": 6.246131691007908e-06, "loss": 0.6706, "step": 14266 }, { "epoch": 0.43726247394875567, "grad_norm": 1.2495303337128838, "learning_rate": 6.2456510282086556e-06, "loss": 0.6461, "step": 14267 }, { "epoch": 0.4372931224714969, "grad_norm": 1.2424273899022071, "learning_rate": 6.245170353135686e-06, "loss": 0.6474, "step": 14268 }, { "epoch": 0.4373237709942381, "grad_norm": 1.0706679911515005, "learning_rate": 6.244689665793733e-06, "loss": 0.5767, "step": 14269 }, { "epoch": 0.4373544195169793, "grad_norm": 1.2659581764289811, "learning_rate": 6.244208966187534e-06, "loss": 0.7051, "step": 14270 }, { "epoch": 0.4373850680397205, "grad_norm": 1.1137157846080519, "learning_rate": 6.243728254321826e-06, "loss": 0.6374, "step": 14271 }, { "epoch": 0.4374157165624617, "grad_norm": 1.3863309187606265, "learning_rate": 6.243247530201345e-06, "loss": 0.6554, "step": 14272 }, { "epoch": 0.4374463650852029, "grad_norm": 1.2210316454413832, "learning_rate": 6.242766793830828e-06, "loss": 0.5912, "step": 14273 }, { "epoch": 0.4374770136079441, "grad_norm": 1.1857562619888182, "learning_rate": 6.242286045215014e-06, "loss": 0.7491, "step": 14274 }, { "epoch": 0.4375076621306853, "grad_norm": 1.3424448845092551, "learning_rate": 6.241805284358635e-06, "loss": 0.7177, "step": 14275 }, { "epoch": 0.4375383106534265, "grad_norm": 1.2419529992069465, "learning_rate": 6.241324511266432e-06, "loss": 0.7155, "step": 14276 }, { "epoch": 0.4375689591761677, "grad_norm": 1.2217738756499303, "learning_rate": 6.2408437259431396e-06, "loss": 0.6188, "step": 14277 }, { "epoch": 0.43759960769890893, "grad_norm": 1.0712151959906973, "learning_rate": 6.2403629283935e-06, "loss": 0.6368, "step": 14278 }, { "epoch": 0.43763025622165014, "grad_norm": 1.2981832299407987, "learning_rate": 6.239882118622244e-06, "loss": 0.5634, "step": 14279 }, { "epoch": 0.43766090474439134, "grad_norm": 1.2620565131471655, "learning_rate": 6.239401296634113e-06, "loss": 0.757, "step": 14280 }, { "epoch": 0.43769155326713255, "grad_norm": 1.3793718232370835, "learning_rate": 6.238920462433843e-06, "loss": 0.7036, "step": 14281 }, { "epoch": 0.43772220178987375, "grad_norm": 1.2386896144878425, "learning_rate": 6.238439616026174e-06, "loss": 0.6458, "step": 14282 }, { "epoch": 0.4377528503126149, "grad_norm": 1.1604091751231944, "learning_rate": 6.237958757415843e-06, "loss": 0.5689, "step": 14283 }, { "epoch": 0.4377834988353561, "grad_norm": 1.2471644875836148, "learning_rate": 6.237477886607586e-06, "loss": 0.6466, "step": 14284 }, { "epoch": 0.4378141473580973, "grad_norm": 1.2624670743591655, "learning_rate": 6.2369970036061435e-06, "loss": 0.6415, "step": 14285 }, { "epoch": 0.4378447958808385, "grad_norm": 1.4565308474377736, "learning_rate": 6.236516108416254e-06, "loss": 0.7113, "step": 14286 }, { "epoch": 0.4378754444035797, "grad_norm": 1.2944904904948542, "learning_rate": 6.236035201042654e-06, "loss": 0.6864, "step": 14287 }, { "epoch": 0.43790609292632093, "grad_norm": 1.2421989483591938, "learning_rate": 6.235554281490082e-06, "loss": 0.6636, "step": 14288 }, { "epoch": 0.43793674144906214, "grad_norm": 1.7371019669612937, "learning_rate": 6.23507334976328e-06, "loss": 0.6934, "step": 14289 }, { "epoch": 0.43796738997180334, "grad_norm": 1.1543428012562602, "learning_rate": 6.234592405866981e-06, "loss": 0.5866, "step": 14290 }, { "epoch": 0.43799803849454455, "grad_norm": 1.2872180407583629, "learning_rate": 6.2341114498059295e-06, "loss": 0.6703, "step": 14291 }, { "epoch": 0.43802868701728576, "grad_norm": 0.4746631776708723, "learning_rate": 6.233630481584862e-06, "loss": 0.4279, "step": 14292 }, { "epoch": 0.43805933554002696, "grad_norm": 1.3039311086883818, "learning_rate": 6.233149501208518e-06, "loss": 0.5818, "step": 14293 }, { "epoch": 0.43808998406276817, "grad_norm": 1.476236426594558, "learning_rate": 6.2326685086816355e-06, "loss": 0.7064, "step": 14294 }, { "epoch": 0.4381206325855094, "grad_norm": 1.29676956423134, "learning_rate": 6.2321875040089555e-06, "loss": 0.5861, "step": 14295 }, { "epoch": 0.4381512811082506, "grad_norm": 1.3420185675690686, "learning_rate": 6.231706487195215e-06, "loss": 0.6497, "step": 14296 }, { "epoch": 0.4381819296309918, "grad_norm": 1.2473134205106, "learning_rate": 6.231225458245157e-06, "loss": 0.7714, "step": 14297 }, { "epoch": 0.438212578153733, "grad_norm": 1.186414317840169, "learning_rate": 6.230744417163519e-06, "loss": 0.5724, "step": 14298 }, { "epoch": 0.4382432266764742, "grad_norm": 1.3200665270913383, "learning_rate": 6.23026336395504e-06, "loss": 0.7144, "step": 14299 }, { "epoch": 0.4382738751992154, "grad_norm": 1.269131908001413, "learning_rate": 6.229782298624464e-06, "loss": 0.6306, "step": 14300 }, { "epoch": 0.4383045237219566, "grad_norm": 1.235955252510775, "learning_rate": 6.229301221176527e-06, "loss": 0.6887, "step": 14301 }, { "epoch": 0.4383351722446978, "grad_norm": 0.43890142288120026, "learning_rate": 6.2288201316159715e-06, "loss": 0.405, "step": 14302 }, { "epoch": 0.438365820767439, "grad_norm": 0.45539405617205725, "learning_rate": 6.228339029947534e-06, "loss": 0.4022, "step": 14303 }, { "epoch": 0.4383964692901802, "grad_norm": 0.4443245765210493, "learning_rate": 6.227857916175961e-06, "loss": 0.4213, "step": 14304 }, { "epoch": 0.43842711781292143, "grad_norm": 0.43945435861045906, "learning_rate": 6.227376790305989e-06, "loss": 0.4111, "step": 14305 }, { "epoch": 0.43845776633566264, "grad_norm": 1.3080412562342931, "learning_rate": 6.226895652342359e-06, "loss": 0.7357, "step": 14306 }, { "epoch": 0.43848841485840384, "grad_norm": 1.3492896442593951, "learning_rate": 6.226414502289811e-06, "loss": 0.7248, "step": 14307 }, { "epoch": 0.43851906338114505, "grad_norm": 1.0581189330305154, "learning_rate": 6.2259333401530896e-06, "loss": 0.6488, "step": 14308 }, { "epoch": 0.43854971190388625, "grad_norm": 0.4559263265857786, "learning_rate": 6.225452165936932e-06, "loss": 0.4284, "step": 14309 }, { "epoch": 0.43858036042662746, "grad_norm": 1.1566459906473143, "learning_rate": 6.22497097964608e-06, "loss": 0.5635, "step": 14310 }, { "epoch": 0.43861100894936866, "grad_norm": 1.3281973333489332, "learning_rate": 6.224489781285277e-06, "loss": 0.6031, "step": 14311 }, { "epoch": 0.43864165747210987, "grad_norm": 1.3000534023489574, "learning_rate": 6.224008570859262e-06, "loss": 0.6914, "step": 14312 }, { "epoch": 0.4386723059948511, "grad_norm": 1.2598984329995515, "learning_rate": 6.223527348372778e-06, "loss": 0.6592, "step": 14313 }, { "epoch": 0.4387029545175922, "grad_norm": 1.1588466093689938, "learning_rate": 6.223046113830564e-06, "loss": 0.6058, "step": 14314 }, { "epoch": 0.43873360304033343, "grad_norm": 1.3505843675378257, "learning_rate": 6.222564867237366e-06, "loss": 0.7109, "step": 14315 }, { "epoch": 0.43876425156307464, "grad_norm": 1.2660091636879078, "learning_rate": 6.222083608597923e-06, "loss": 0.6767, "step": 14316 }, { "epoch": 0.43879490008581584, "grad_norm": 1.277547876305703, "learning_rate": 6.221602337916978e-06, "loss": 0.6431, "step": 14317 }, { "epoch": 0.43882554860855705, "grad_norm": 1.2804555070620693, "learning_rate": 6.221121055199271e-06, "loss": 0.5761, "step": 14318 }, { "epoch": 0.43885619713129825, "grad_norm": 1.2545897213754122, "learning_rate": 6.220639760449547e-06, "loss": 0.7196, "step": 14319 }, { "epoch": 0.43888684565403946, "grad_norm": 1.7258173054107087, "learning_rate": 6.220158453672547e-06, "loss": 0.7447, "step": 14320 }, { "epoch": 0.43891749417678066, "grad_norm": 1.2548087416964122, "learning_rate": 6.219677134873013e-06, "loss": 0.6532, "step": 14321 }, { "epoch": 0.43894814269952187, "grad_norm": 1.352966631196009, "learning_rate": 6.219195804055689e-06, "loss": 0.7087, "step": 14322 }, { "epoch": 0.4389787912222631, "grad_norm": 1.186828135694419, "learning_rate": 6.218714461225316e-06, "loss": 0.6879, "step": 14323 }, { "epoch": 0.4390094397450043, "grad_norm": 1.4915736868150855, "learning_rate": 6.218233106386639e-06, "loss": 0.6796, "step": 14324 }, { "epoch": 0.4390400882677455, "grad_norm": 1.2408739153458017, "learning_rate": 6.217751739544396e-06, "loss": 0.673, "step": 14325 }, { "epoch": 0.4390707367904867, "grad_norm": 1.1349688082519693, "learning_rate": 6.217270360703337e-06, "loss": 0.614, "step": 14326 }, { "epoch": 0.4391013853132279, "grad_norm": 1.237142937136523, "learning_rate": 6.216788969868199e-06, "loss": 0.6815, "step": 14327 }, { "epoch": 0.4391320338359691, "grad_norm": 1.3968892807392135, "learning_rate": 6.2163075670437324e-06, "loss": 0.7285, "step": 14328 }, { "epoch": 0.4391626823587103, "grad_norm": 0.5529085067712645, "learning_rate": 6.215826152234672e-06, "loss": 0.4456, "step": 14329 }, { "epoch": 0.4391933308814515, "grad_norm": 1.1596749108207032, "learning_rate": 6.215344725445766e-06, "loss": 0.6569, "step": 14330 }, { "epoch": 0.4392239794041927, "grad_norm": 1.3913431369691733, "learning_rate": 6.214863286681759e-06, "loss": 0.6978, "step": 14331 }, { "epoch": 0.4392546279269339, "grad_norm": 1.1345264715264958, "learning_rate": 6.214381835947393e-06, "loss": 0.6047, "step": 14332 }, { "epoch": 0.43928527644967513, "grad_norm": 1.3967754514852453, "learning_rate": 6.213900373247411e-06, "loss": 0.7944, "step": 14333 }, { "epoch": 0.43931592497241634, "grad_norm": 3.6411673329103356, "learning_rate": 6.213418898586559e-06, "loss": 0.5705, "step": 14334 }, { "epoch": 0.43934657349515754, "grad_norm": 1.2572539168255883, "learning_rate": 6.212937411969579e-06, "loss": 0.5965, "step": 14335 }, { "epoch": 0.43937722201789875, "grad_norm": 1.2607031180899728, "learning_rate": 6.2124559134012165e-06, "loss": 0.6353, "step": 14336 }, { "epoch": 0.43940787054063996, "grad_norm": 0.5119474517376172, "learning_rate": 6.211974402886218e-06, "loss": 0.4437, "step": 14337 }, { "epoch": 0.43943851906338116, "grad_norm": 1.2010898318851446, "learning_rate": 6.211492880429323e-06, "loss": 0.6812, "step": 14338 }, { "epoch": 0.43946916758612237, "grad_norm": 1.3305127891225461, "learning_rate": 6.211011346035279e-06, "loss": 0.7071, "step": 14339 }, { "epoch": 0.4394998161088636, "grad_norm": 1.2367901500804643, "learning_rate": 6.210529799708831e-06, "loss": 0.6961, "step": 14340 }, { "epoch": 0.4395304646316048, "grad_norm": 1.3242032984608234, "learning_rate": 6.210048241454723e-06, "loss": 0.6439, "step": 14341 }, { "epoch": 0.439561113154346, "grad_norm": 0.4629903198892217, "learning_rate": 6.2095666712776995e-06, "loss": 0.4257, "step": 14342 }, { "epoch": 0.4395917616770872, "grad_norm": 1.1358317631828294, "learning_rate": 6.209085089182507e-06, "loss": 0.601, "step": 14343 }, { "epoch": 0.4396224101998284, "grad_norm": 0.4730364902990589, "learning_rate": 6.20860349517389e-06, "loss": 0.4058, "step": 14344 }, { "epoch": 0.43965305872256955, "grad_norm": 1.2106024741176804, "learning_rate": 6.208121889256592e-06, "loss": 0.7623, "step": 14345 }, { "epoch": 0.43968370724531075, "grad_norm": 1.1802801927375874, "learning_rate": 6.20764027143536e-06, "loss": 0.5865, "step": 14346 }, { "epoch": 0.43971435576805196, "grad_norm": 1.1805896170584604, "learning_rate": 6.207158641714942e-06, "loss": 0.6203, "step": 14347 }, { "epoch": 0.43974500429079316, "grad_norm": 1.0701809505027047, "learning_rate": 6.2066770001000784e-06, "loss": 0.5883, "step": 14348 }, { "epoch": 0.43977565281353437, "grad_norm": 1.4434385758312707, "learning_rate": 6.206195346595518e-06, "loss": 0.7062, "step": 14349 }, { "epoch": 0.4398063013362756, "grad_norm": 1.1534994228266413, "learning_rate": 6.2057136812060074e-06, "loss": 0.6964, "step": 14350 }, { "epoch": 0.4398369498590168, "grad_norm": 1.1720061204014902, "learning_rate": 6.20523200393629e-06, "loss": 0.5654, "step": 14351 }, { "epoch": 0.439867598381758, "grad_norm": 1.2132409350444664, "learning_rate": 6.204750314791115e-06, "loss": 0.6321, "step": 14352 }, { "epoch": 0.4398982469044992, "grad_norm": 1.3112379140028798, "learning_rate": 6.204268613775225e-06, "loss": 0.7001, "step": 14353 }, { "epoch": 0.4399288954272404, "grad_norm": 1.4288191991918036, "learning_rate": 6.203786900893369e-06, "loss": 0.6181, "step": 14354 }, { "epoch": 0.4399595439499816, "grad_norm": 1.2568247311624945, "learning_rate": 6.203305176150293e-06, "loss": 0.6373, "step": 14355 }, { "epoch": 0.4399901924727228, "grad_norm": 1.1986795430789186, "learning_rate": 6.2028234395507435e-06, "loss": 0.6439, "step": 14356 }, { "epoch": 0.440020840995464, "grad_norm": 1.2462181517715523, "learning_rate": 6.202341691099465e-06, "loss": 0.639, "step": 14357 }, { "epoch": 0.4400514895182052, "grad_norm": 1.2402626199730566, "learning_rate": 6.2018599308012085e-06, "loss": 0.6438, "step": 14358 }, { "epoch": 0.4400821380409464, "grad_norm": 1.200040374081941, "learning_rate": 6.201378158660718e-06, "loss": 0.6393, "step": 14359 }, { "epoch": 0.44011278656368763, "grad_norm": 1.3047352018932379, "learning_rate": 6.200896374682741e-06, "loss": 0.6276, "step": 14360 }, { "epoch": 0.44014343508642884, "grad_norm": 0.5440699562956359, "learning_rate": 6.200414578872024e-06, "loss": 0.4275, "step": 14361 }, { "epoch": 0.44017408360917004, "grad_norm": 1.1754420339261182, "learning_rate": 6.199932771233315e-06, "loss": 0.5909, "step": 14362 }, { "epoch": 0.44020473213191125, "grad_norm": 1.1739749558327675, "learning_rate": 6.199450951771363e-06, "loss": 0.665, "step": 14363 }, { "epoch": 0.44023538065465245, "grad_norm": 1.3243540622333485, "learning_rate": 6.198969120490913e-06, "loss": 0.638, "step": 14364 }, { "epoch": 0.44026602917739366, "grad_norm": 1.429737251068635, "learning_rate": 6.198487277396712e-06, "loss": 0.7145, "step": 14365 }, { "epoch": 0.44029667770013486, "grad_norm": 1.2737434802730037, "learning_rate": 6.198005422493511e-06, "loss": 0.6022, "step": 14366 }, { "epoch": 0.44032732622287607, "grad_norm": 1.214037595730034, "learning_rate": 6.1975235557860554e-06, "loss": 0.6681, "step": 14367 }, { "epoch": 0.4403579747456173, "grad_norm": 1.2568774038169195, "learning_rate": 6.197041677279094e-06, "loss": 0.6614, "step": 14368 }, { "epoch": 0.4403886232683585, "grad_norm": 1.198408465953729, "learning_rate": 6.196559786977374e-06, "loss": 0.6039, "step": 14369 }, { "epoch": 0.4404192717910997, "grad_norm": 1.1473298533570904, "learning_rate": 6.196077884885646e-06, "loss": 0.6153, "step": 14370 }, { "epoch": 0.4404499203138409, "grad_norm": 1.2979489131918502, "learning_rate": 6.195595971008655e-06, "loss": 0.6712, "step": 14371 }, { "epoch": 0.4404805688365821, "grad_norm": 0.4451964504354041, "learning_rate": 6.195114045351151e-06, "loss": 0.3824, "step": 14372 }, { "epoch": 0.4405112173593233, "grad_norm": 1.1996425433444147, "learning_rate": 6.194632107917884e-06, "loss": 0.6259, "step": 14373 }, { "epoch": 0.4405418658820645, "grad_norm": 1.2547026161047805, "learning_rate": 6.1941501587136e-06, "loss": 0.5562, "step": 14374 }, { "epoch": 0.4405725144048057, "grad_norm": 1.4038335805119824, "learning_rate": 6.193668197743051e-06, "loss": 0.6304, "step": 14375 }, { "epoch": 0.44060316292754687, "grad_norm": 1.2236475799489426, "learning_rate": 6.19318622501098e-06, "loss": 0.8044, "step": 14376 }, { "epoch": 0.44063381145028807, "grad_norm": 1.4439734573708631, "learning_rate": 6.192704240522142e-06, "loss": 0.6803, "step": 14377 }, { "epoch": 0.4406644599730293, "grad_norm": 0.46450941093867787, "learning_rate": 6.192222244281284e-06, "loss": 0.4065, "step": 14378 }, { "epoch": 0.4406951084957705, "grad_norm": 1.2942991970426119, "learning_rate": 6.191740236293154e-06, "loss": 0.7504, "step": 14379 }, { "epoch": 0.4407257570185117, "grad_norm": 1.1290888345312788, "learning_rate": 6.191258216562503e-06, "loss": 0.6257, "step": 14380 }, { "epoch": 0.4407564055412529, "grad_norm": 1.162718421742014, "learning_rate": 6.19077618509408e-06, "loss": 0.6539, "step": 14381 }, { "epoch": 0.4407870540639941, "grad_norm": 1.3697419635738934, "learning_rate": 6.190294141892637e-06, "loss": 0.5047, "step": 14382 }, { "epoch": 0.4408177025867353, "grad_norm": 1.3125724794999134, "learning_rate": 6.1898120869629185e-06, "loss": 0.6122, "step": 14383 }, { "epoch": 0.4408483511094765, "grad_norm": 1.2174065721588951, "learning_rate": 6.189330020309678e-06, "loss": 0.7378, "step": 14384 }, { "epoch": 0.4408789996322177, "grad_norm": 0.47516831960528555, "learning_rate": 6.188847941937664e-06, "loss": 0.4074, "step": 14385 }, { "epoch": 0.4409096481549589, "grad_norm": 1.169541322262783, "learning_rate": 6.188365851851629e-06, "loss": 0.6354, "step": 14386 }, { "epoch": 0.4409402966777001, "grad_norm": 1.2595305295097108, "learning_rate": 6.187883750056319e-06, "loss": 0.697, "step": 14387 }, { "epoch": 0.44097094520044133, "grad_norm": 1.2383151761824884, "learning_rate": 6.187401636556487e-06, "loss": 0.5607, "step": 14388 }, { "epoch": 0.44100159372318254, "grad_norm": 1.2817157311318534, "learning_rate": 6.186919511356882e-06, "loss": 0.6595, "step": 14389 }, { "epoch": 0.44103224224592374, "grad_norm": 1.4232093978069218, "learning_rate": 6.186437374462257e-06, "loss": 0.7868, "step": 14390 }, { "epoch": 0.44106289076866495, "grad_norm": 1.4049284360831018, "learning_rate": 6.18595522587736e-06, "loss": 0.6344, "step": 14391 }, { "epoch": 0.44109353929140616, "grad_norm": 1.3025815031908345, "learning_rate": 6.185473065606944e-06, "loss": 0.6415, "step": 14392 }, { "epoch": 0.44112418781414736, "grad_norm": 1.24868752259213, "learning_rate": 6.184990893655758e-06, "loss": 0.6282, "step": 14393 }, { "epoch": 0.44115483633688857, "grad_norm": 1.351495461286786, "learning_rate": 6.184508710028552e-06, "loss": 0.7456, "step": 14394 }, { "epoch": 0.4411854848596298, "grad_norm": 1.2764253243783954, "learning_rate": 6.18402651473008e-06, "loss": 0.6552, "step": 14395 }, { "epoch": 0.441216133382371, "grad_norm": 1.3383248017427933, "learning_rate": 6.18354430776509e-06, "loss": 0.7332, "step": 14396 }, { "epoch": 0.4412467819051122, "grad_norm": 1.2252667554750805, "learning_rate": 6.1830620891383384e-06, "loss": 0.6637, "step": 14397 }, { "epoch": 0.4412774304278534, "grad_norm": 1.3857907267682834, "learning_rate": 6.182579858854572e-06, "loss": 0.732, "step": 14398 }, { "epoch": 0.4413080789505946, "grad_norm": 1.2548533667015975, "learning_rate": 6.182097616918543e-06, "loss": 0.5406, "step": 14399 }, { "epoch": 0.4413387274733358, "grad_norm": 1.2550819533185167, "learning_rate": 6.1816153633350026e-06, "loss": 0.6495, "step": 14400 }, { "epoch": 0.441369375996077, "grad_norm": 1.248915353995441, "learning_rate": 6.181133098108707e-06, "loss": 0.5554, "step": 14401 }, { "epoch": 0.4414000245188182, "grad_norm": 1.1744074846450623, "learning_rate": 6.180650821244403e-06, "loss": 0.6718, "step": 14402 }, { "epoch": 0.4414306730415594, "grad_norm": 1.1850880568549906, "learning_rate": 6.1801685327468445e-06, "loss": 0.6568, "step": 14403 }, { "epoch": 0.4414613215643006, "grad_norm": 1.3645150304127587, "learning_rate": 6.1796862326207815e-06, "loss": 0.7348, "step": 14404 }, { "epoch": 0.44149197008704183, "grad_norm": 1.2995488225138363, "learning_rate": 6.179203920870971e-06, "loss": 0.6895, "step": 14405 }, { "epoch": 0.44152261860978304, "grad_norm": 1.1285454689970371, "learning_rate": 6.178721597502162e-06, "loss": 0.6265, "step": 14406 }, { "epoch": 0.4415532671325242, "grad_norm": 1.2249046750598989, "learning_rate": 6.178239262519106e-06, "loss": 0.7539, "step": 14407 }, { "epoch": 0.4415839156552654, "grad_norm": 1.4012578728993943, "learning_rate": 6.177756915926558e-06, "loss": 0.6905, "step": 14408 }, { "epoch": 0.4416145641780066, "grad_norm": 1.3172490079211712, "learning_rate": 6.17727455772927e-06, "loss": 0.6431, "step": 14409 }, { "epoch": 0.4416452127007478, "grad_norm": 1.250297593565408, "learning_rate": 6.176792187931995e-06, "loss": 0.5969, "step": 14410 }, { "epoch": 0.441675861223489, "grad_norm": 0.48035504778652216, "learning_rate": 6.1763098065394844e-06, "loss": 0.442, "step": 14411 }, { "epoch": 0.4417065097462302, "grad_norm": 1.2231340592173199, "learning_rate": 6.1758274135564935e-06, "loss": 0.6561, "step": 14412 }, { "epoch": 0.4417371582689714, "grad_norm": 0.45747172778308, "learning_rate": 6.175345008987773e-06, "loss": 0.4337, "step": 14413 }, { "epoch": 0.4417678067917126, "grad_norm": 1.1686133204172702, "learning_rate": 6.174862592838079e-06, "loss": 0.6833, "step": 14414 }, { "epoch": 0.44179845531445383, "grad_norm": 1.3891201008476635, "learning_rate": 6.174380165112161e-06, "loss": 0.6236, "step": 14415 }, { "epoch": 0.44182910383719504, "grad_norm": 1.3266010748553032, "learning_rate": 6.173897725814777e-06, "loss": 0.7003, "step": 14416 }, { "epoch": 0.44185975235993624, "grad_norm": 1.2417044272246847, "learning_rate": 6.173415274950677e-06, "loss": 0.6751, "step": 14417 }, { "epoch": 0.44189040088267745, "grad_norm": 1.2390961321582947, "learning_rate": 6.1729328125246165e-06, "loss": 0.6845, "step": 14418 }, { "epoch": 0.44192104940541865, "grad_norm": 1.281866098482046, "learning_rate": 6.172450338541348e-06, "loss": 0.7127, "step": 14419 }, { "epoch": 0.44195169792815986, "grad_norm": 0.4586547161087212, "learning_rate": 6.171967853005628e-06, "loss": 0.4155, "step": 14420 }, { "epoch": 0.44198234645090106, "grad_norm": 1.3317716846381211, "learning_rate": 6.171485355922208e-06, "loss": 0.569, "step": 14421 }, { "epoch": 0.44201299497364227, "grad_norm": 0.45836454785772923, "learning_rate": 6.171002847295843e-06, "loss": 0.4311, "step": 14422 }, { "epoch": 0.4420436434963835, "grad_norm": 1.5295615613465585, "learning_rate": 6.170520327131288e-06, "loss": 0.747, "step": 14423 }, { "epoch": 0.4420742920191247, "grad_norm": 1.3305206225474728, "learning_rate": 6.170037795433296e-06, "loss": 0.6092, "step": 14424 }, { "epoch": 0.4421049405418659, "grad_norm": 1.236141064161943, "learning_rate": 6.169555252206623e-06, "loss": 0.5995, "step": 14425 }, { "epoch": 0.4421355890646071, "grad_norm": 1.2239880790696642, "learning_rate": 6.169072697456021e-06, "loss": 0.6961, "step": 14426 }, { "epoch": 0.4421662375873483, "grad_norm": 1.2196375581353984, "learning_rate": 6.168590131186247e-06, "loss": 0.5956, "step": 14427 }, { "epoch": 0.4421968861100895, "grad_norm": 1.2548342526765894, "learning_rate": 6.168107553402057e-06, "loss": 0.781, "step": 14428 }, { "epoch": 0.4422275346328307, "grad_norm": 0.48395427893936827, "learning_rate": 6.167624964108205e-06, "loss": 0.4284, "step": 14429 }, { "epoch": 0.4422581831555719, "grad_norm": 1.2707711277572065, "learning_rate": 6.1671423633094426e-06, "loss": 0.5739, "step": 14430 }, { "epoch": 0.4422888316783131, "grad_norm": 1.053208876943281, "learning_rate": 6.1666597510105294e-06, "loss": 0.5874, "step": 14431 }, { "epoch": 0.4423194802010543, "grad_norm": 1.2513281082840506, "learning_rate": 6.16617712721622e-06, "loss": 0.6546, "step": 14432 }, { "epoch": 0.44235012872379553, "grad_norm": 1.2751031447756167, "learning_rate": 6.1656944919312675e-06, "loss": 0.5624, "step": 14433 }, { "epoch": 0.44238077724653674, "grad_norm": 0.46311140453317434, "learning_rate": 6.165211845160429e-06, "loss": 0.4015, "step": 14434 }, { "epoch": 0.44241142576927794, "grad_norm": 1.232715610300016, "learning_rate": 6.164729186908462e-06, "loss": 0.7023, "step": 14435 }, { "epoch": 0.44244207429201915, "grad_norm": 1.1668559353045618, "learning_rate": 6.164246517180119e-06, "loss": 0.6426, "step": 14436 }, { "epoch": 0.44247272281476036, "grad_norm": 0.483132926983443, "learning_rate": 6.163763835980156e-06, "loss": 0.4348, "step": 14437 }, { "epoch": 0.4425033713375015, "grad_norm": 1.168373330062958, "learning_rate": 6.1632811433133325e-06, "loss": 0.7083, "step": 14438 }, { "epoch": 0.4425340198602427, "grad_norm": 1.2319617682716375, "learning_rate": 6.1627984391843995e-06, "loss": 0.6986, "step": 14439 }, { "epoch": 0.4425646683829839, "grad_norm": 1.2628406387962352, "learning_rate": 6.1623157235981194e-06, "loss": 0.6478, "step": 14440 }, { "epoch": 0.4425953169057251, "grad_norm": 1.4147917910599856, "learning_rate": 6.1618329965592415e-06, "loss": 0.6464, "step": 14441 }, { "epoch": 0.44262596542846633, "grad_norm": 0.4751628418387346, "learning_rate": 6.161350258072528e-06, "loss": 0.4175, "step": 14442 }, { "epoch": 0.44265661395120753, "grad_norm": 1.1951963357520679, "learning_rate": 6.160867508142733e-06, "loss": 0.6404, "step": 14443 }, { "epoch": 0.44268726247394874, "grad_norm": 1.1835086422117043, "learning_rate": 6.160384746774614e-06, "loss": 0.6609, "step": 14444 }, { "epoch": 0.44271791099668995, "grad_norm": 1.2932748926943578, "learning_rate": 6.159901973972926e-06, "loss": 0.6482, "step": 14445 }, { "epoch": 0.44274855951943115, "grad_norm": 0.44171231499203834, "learning_rate": 6.159419189742427e-06, "loss": 0.411, "step": 14446 }, { "epoch": 0.44277920804217236, "grad_norm": 1.1605937280547924, "learning_rate": 6.1589363940878755e-06, "loss": 0.6696, "step": 14447 }, { "epoch": 0.44280985656491356, "grad_norm": 1.2214802314658064, "learning_rate": 6.158453587014025e-06, "loss": 0.6652, "step": 14448 }, { "epoch": 0.44284050508765477, "grad_norm": 1.3478536886576657, "learning_rate": 6.1579707685256365e-06, "loss": 0.7445, "step": 14449 }, { "epoch": 0.442871153610396, "grad_norm": 1.4009730804210627, "learning_rate": 6.157487938627464e-06, "loss": 0.6996, "step": 14450 }, { "epoch": 0.4429018021331372, "grad_norm": 1.2060757735485668, "learning_rate": 6.15700509732427e-06, "loss": 0.6247, "step": 14451 }, { "epoch": 0.4429324506558784, "grad_norm": 1.2646052438452646, "learning_rate": 6.156522244620806e-06, "loss": 0.7368, "step": 14452 }, { "epoch": 0.4429630991786196, "grad_norm": 1.1323265841296706, "learning_rate": 6.156039380521833e-06, "loss": 0.6979, "step": 14453 }, { "epoch": 0.4429937477013608, "grad_norm": 1.3546872390529823, "learning_rate": 6.1555565050321085e-06, "loss": 0.6687, "step": 14454 }, { "epoch": 0.443024396224102, "grad_norm": 1.4489783533765632, "learning_rate": 6.155073618156391e-06, "loss": 0.6636, "step": 14455 }, { "epoch": 0.4430550447468432, "grad_norm": 0.4844259980589579, "learning_rate": 6.154590719899436e-06, "loss": 0.4445, "step": 14456 }, { "epoch": 0.4430856932695844, "grad_norm": 0.4555612789486488, "learning_rate": 6.154107810266004e-06, "loss": 0.4332, "step": 14457 }, { "epoch": 0.4431163417923256, "grad_norm": 1.3088708523162662, "learning_rate": 6.153624889260852e-06, "loss": 0.6074, "step": 14458 }, { "epoch": 0.4431469903150668, "grad_norm": 1.1019458885355342, "learning_rate": 6.15314195688874e-06, "loss": 0.7158, "step": 14459 }, { "epoch": 0.44317763883780803, "grad_norm": 1.1935334983914183, "learning_rate": 6.152659013154424e-06, "loss": 0.6892, "step": 14460 }, { "epoch": 0.44320828736054924, "grad_norm": 1.1398400808473486, "learning_rate": 6.152176058062665e-06, "loss": 0.6566, "step": 14461 }, { "epoch": 0.44323893588329044, "grad_norm": 0.9870025573605926, "learning_rate": 6.151693091618218e-06, "loss": 0.5101, "step": 14462 }, { "epoch": 0.44326958440603165, "grad_norm": 1.2635531973956793, "learning_rate": 6.151210113825846e-06, "loss": 0.5614, "step": 14463 }, { "epoch": 0.44330023292877285, "grad_norm": 1.1386826228151432, "learning_rate": 6.150727124690306e-06, "loss": 0.548, "step": 14464 }, { "epoch": 0.44333088145151406, "grad_norm": 1.1960877348490822, "learning_rate": 6.150244124216358e-06, "loss": 0.5506, "step": 14465 }, { "epoch": 0.44336152997425526, "grad_norm": 0.5199042648428029, "learning_rate": 6.14976111240876e-06, "loss": 0.4228, "step": 14466 }, { "epoch": 0.44339217849699647, "grad_norm": 1.2768872160912554, "learning_rate": 6.149278089272271e-06, "loss": 0.712, "step": 14467 }, { "epoch": 0.4434228270197377, "grad_norm": 1.2974509040557456, "learning_rate": 6.148795054811652e-06, "loss": 0.651, "step": 14468 }, { "epoch": 0.4434534755424788, "grad_norm": 1.4584124244634245, "learning_rate": 6.1483120090316595e-06, "loss": 0.7266, "step": 14469 }, { "epoch": 0.44348412406522003, "grad_norm": 0.4573440853271699, "learning_rate": 6.147828951937057e-06, "loss": 0.4302, "step": 14470 }, { "epoch": 0.44351477258796124, "grad_norm": 1.2255721333136431, "learning_rate": 6.147345883532601e-06, "loss": 0.7711, "step": 14471 }, { "epoch": 0.44354542111070244, "grad_norm": 1.404788267252821, "learning_rate": 6.146862803823053e-06, "loss": 0.6853, "step": 14472 }, { "epoch": 0.44357606963344365, "grad_norm": 1.0944095857850522, "learning_rate": 6.1463797128131705e-06, "loss": 0.556, "step": 14473 }, { "epoch": 0.44360671815618485, "grad_norm": 1.251067757874085, "learning_rate": 6.1458966105077176e-06, "loss": 0.7047, "step": 14474 }, { "epoch": 0.44363736667892606, "grad_norm": 1.1689682277515008, "learning_rate": 6.145413496911452e-06, "loss": 0.5762, "step": 14475 }, { "epoch": 0.44366801520166727, "grad_norm": 1.1591854245636666, "learning_rate": 6.144930372029133e-06, "loss": 0.5986, "step": 14476 }, { "epoch": 0.44369866372440847, "grad_norm": 1.3112673583531362, "learning_rate": 6.144447235865522e-06, "loss": 0.7235, "step": 14477 }, { "epoch": 0.4437293122471497, "grad_norm": 1.185440739998024, "learning_rate": 6.143964088425382e-06, "loss": 0.6101, "step": 14478 }, { "epoch": 0.4437599607698909, "grad_norm": 1.2355187928104459, "learning_rate": 6.143480929713469e-06, "loss": 0.6431, "step": 14479 }, { "epoch": 0.4437906092926321, "grad_norm": 0.48194563716862354, "learning_rate": 6.142997759734546e-06, "loss": 0.4112, "step": 14480 }, { "epoch": 0.4438212578153733, "grad_norm": 1.494534416350338, "learning_rate": 6.142514578493374e-06, "loss": 0.7371, "step": 14481 }, { "epoch": 0.4438519063381145, "grad_norm": 1.7704201959349395, "learning_rate": 6.142031385994714e-06, "loss": 0.564, "step": 14482 }, { "epoch": 0.4438825548608557, "grad_norm": 1.259623516626558, "learning_rate": 6.141548182243326e-06, "loss": 0.5995, "step": 14483 }, { "epoch": 0.4439132033835969, "grad_norm": 1.16700139782187, "learning_rate": 6.141064967243972e-06, "loss": 0.652, "step": 14484 }, { "epoch": 0.4439438519063381, "grad_norm": 1.0448831167328536, "learning_rate": 6.140581741001413e-06, "loss": 0.6082, "step": 14485 }, { "epoch": 0.4439745004290793, "grad_norm": 1.1583201781865609, "learning_rate": 6.1400985035204095e-06, "loss": 0.6939, "step": 14486 }, { "epoch": 0.4440051489518205, "grad_norm": 1.239679909843758, "learning_rate": 6.139615254805724e-06, "loss": 0.651, "step": 14487 }, { "epoch": 0.44403579747456173, "grad_norm": 1.4393376397488629, "learning_rate": 6.139131994862118e-06, "loss": 0.6949, "step": 14488 }, { "epoch": 0.44406644599730294, "grad_norm": 1.3134430804275865, "learning_rate": 6.1386487236943525e-06, "loss": 0.7132, "step": 14489 }, { "epoch": 0.44409709452004414, "grad_norm": 1.1684518035744185, "learning_rate": 6.138165441307191e-06, "loss": 0.6481, "step": 14490 }, { "epoch": 0.44412774304278535, "grad_norm": 1.0872898168724043, "learning_rate": 6.137682147705392e-06, "loss": 0.5919, "step": 14491 }, { "epoch": 0.44415839156552656, "grad_norm": 0.456410663777784, "learning_rate": 6.1371988428937215e-06, "loss": 0.4158, "step": 14492 }, { "epoch": 0.44418904008826776, "grad_norm": 1.2111861410634837, "learning_rate": 6.136715526876938e-06, "loss": 0.6367, "step": 14493 }, { "epoch": 0.44421968861100897, "grad_norm": 1.136965999906421, "learning_rate": 6.136232199659809e-06, "loss": 0.6109, "step": 14494 }, { "epoch": 0.4442503371337502, "grad_norm": 1.4602956492451695, "learning_rate": 6.13574886124709e-06, "loss": 0.648, "step": 14495 }, { "epoch": 0.4442809856564914, "grad_norm": 1.2147919135781469, "learning_rate": 6.13526551164355e-06, "loss": 0.6272, "step": 14496 }, { "epoch": 0.4443116341792326, "grad_norm": 1.3310741157432113, "learning_rate": 6.134782150853946e-06, "loss": 0.6952, "step": 14497 }, { "epoch": 0.4443422827019738, "grad_norm": 1.2274549528500402, "learning_rate": 6.134298778883046e-06, "loss": 0.6455, "step": 14498 }, { "epoch": 0.444372931224715, "grad_norm": 1.1141352188776599, "learning_rate": 6.133815395735606e-06, "loss": 0.696, "step": 14499 }, { "epoch": 0.44440357974745615, "grad_norm": 1.3350661950999216, "learning_rate": 6.133332001416394e-06, "loss": 0.6901, "step": 14500 }, { "epoch": 0.44443422827019735, "grad_norm": 0.447623410111787, "learning_rate": 6.1328485959301745e-06, "loss": 0.411, "step": 14501 }, { "epoch": 0.44446487679293856, "grad_norm": 1.2109733087657124, "learning_rate": 6.1323651792817045e-06, "loss": 0.6265, "step": 14502 }, { "epoch": 0.44449552531567976, "grad_norm": 1.3760163070436977, "learning_rate": 6.131881751475752e-06, "loss": 0.6696, "step": 14503 }, { "epoch": 0.44452617383842097, "grad_norm": 1.2414864354743125, "learning_rate": 6.131398312517078e-06, "loss": 0.6933, "step": 14504 }, { "epoch": 0.4445568223611622, "grad_norm": 1.1679406674395756, "learning_rate": 6.13091486241045e-06, "loss": 0.6903, "step": 14505 }, { "epoch": 0.4445874708839034, "grad_norm": 1.2037926900326266, "learning_rate": 6.130431401160626e-06, "loss": 0.6291, "step": 14506 }, { "epoch": 0.4446181194066446, "grad_norm": 1.3651855744741976, "learning_rate": 6.129947928772373e-06, "loss": 0.6897, "step": 14507 }, { "epoch": 0.4446487679293858, "grad_norm": 1.1897391737155452, "learning_rate": 6.129464445250452e-06, "loss": 0.5427, "step": 14508 }, { "epoch": 0.444679416452127, "grad_norm": 1.1860085905940831, "learning_rate": 6.128980950599632e-06, "loss": 0.5715, "step": 14509 }, { "epoch": 0.4447100649748682, "grad_norm": 1.3007014363594325, "learning_rate": 6.128497444824672e-06, "loss": 0.6025, "step": 14510 }, { "epoch": 0.4447407134976094, "grad_norm": 1.2004562946565767, "learning_rate": 6.1280139279303385e-06, "loss": 0.7039, "step": 14511 }, { "epoch": 0.4447713620203506, "grad_norm": 1.240591192812784, "learning_rate": 6.127530399921393e-06, "loss": 0.6538, "step": 14512 }, { "epoch": 0.4448020105430918, "grad_norm": 1.4724768538130457, "learning_rate": 6.127046860802605e-06, "loss": 0.6946, "step": 14513 }, { "epoch": 0.444832659065833, "grad_norm": 1.3228887912720757, "learning_rate": 6.1265633105787344e-06, "loss": 0.6928, "step": 14514 }, { "epoch": 0.44486330758857423, "grad_norm": 1.1766905275295938, "learning_rate": 6.1260797492545484e-06, "loss": 0.6379, "step": 14515 }, { "epoch": 0.44489395611131544, "grad_norm": 1.3403406288489808, "learning_rate": 6.125596176834809e-06, "loss": 0.7067, "step": 14516 }, { "epoch": 0.44492460463405664, "grad_norm": 1.1424636125051835, "learning_rate": 6.125112593324283e-06, "loss": 0.5753, "step": 14517 }, { "epoch": 0.44495525315679785, "grad_norm": 1.1624350899181894, "learning_rate": 6.124628998727735e-06, "loss": 0.6032, "step": 14518 }, { "epoch": 0.44498590167953905, "grad_norm": 1.2171999745876985, "learning_rate": 6.124145393049929e-06, "loss": 0.6639, "step": 14519 }, { "epoch": 0.44501655020228026, "grad_norm": 1.2429426082677348, "learning_rate": 6.123661776295632e-06, "loss": 0.652, "step": 14520 }, { "epoch": 0.44504719872502146, "grad_norm": 1.2647541971240144, "learning_rate": 6.123178148469609e-06, "loss": 0.6934, "step": 14521 }, { "epoch": 0.44507784724776267, "grad_norm": 1.122841529900023, "learning_rate": 6.122694509576622e-06, "loss": 0.6172, "step": 14522 }, { "epoch": 0.4451084957705039, "grad_norm": 1.2027104750491253, "learning_rate": 6.122210859621439e-06, "loss": 0.6161, "step": 14523 }, { "epoch": 0.4451391442932451, "grad_norm": 1.201424152606859, "learning_rate": 6.121727198608827e-06, "loss": 0.6909, "step": 14524 }, { "epoch": 0.4451697928159863, "grad_norm": 1.1214483513985536, "learning_rate": 6.1212435265435475e-06, "loss": 0.5639, "step": 14525 }, { "epoch": 0.4452004413387275, "grad_norm": 1.279725655561174, "learning_rate": 6.120759843430371e-06, "loss": 0.7013, "step": 14526 }, { "epoch": 0.4452310898614687, "grad_norm": 1.180972063674772, "learning_rate": 6.1202761492740595e-06, "loss": 0.6532, "step": 14527 }, { "epoch": 0.4452617383842099, "grad_norm": 1.1551550464588702, "learning_rate": 6.119792444079381e-06, "loss": 0.5723, "step": 14528 }, { "epoch": 0.4452923869069511, "grad_norm": 0.4958053262755221, "learning_rate": 6.119308727851101e-06, "loss": 0.4175, "step": 14529 }, { "epoch": 0.4453230354296923, "grad_norm": 1.3313041560287702, "learning_rate": 6.118825000593984e-06, "loss": 0.6845, "step": 14530 }, { "epoch": 0.44535368395243347, "grad_norm": 1.2294283573114977, "learning_rate": 6.1183412623128e-06, "loss": 0.6269, "step": 14531 }, { "epoch": 0.44538433247517467, "grad_norm": 1.107441212064029, "learning_rate": 6.117857513012314e-06, "loss": 0.6307, "step": 14532 }, { "epoch": 0.4454149809979159, "grad_norm": 1.3385088195603496, "learning_rate": 6.117373752697291e-06, "loss": 0.7661, "step": 14533 }, { "epoch": 0.4454456295206571, "grad_norm": 1.140436347938561, "learning_rate": 6.116889981372498e-06, "loss": 0.7236, "step": 14534 }, { "epoch": 0.4454762780433983, "grad_norm": 1.2424341203378508, "learning_rate": 6.116406199042703e-06, "loss": 0.6898, "step": 14535 }, { "epoch": 0.4455069265661395, "grad_norm": 1.1847003838851737, "learning_rate": 6.115922405712672e-06, "loss": 0.6304, "step": 14536 }, { "epoch": 0.4455375750888807, "grad_norm": 0.46305204493806396, "learning_rate": 6.115438601387172e-06, "loss": 0.427, "step": 14537 }, { "epoch": 0.4455682236116219, "grad_norm": 1.2876472787759807, "learning_rate": 6.114954786070969e-06, "loss": 0.6564, "step": 14538 }, { "epoch": 0.4455988721343631, "grad_norm": 1.2372122294174166, "learning_rate": 6.114470959768832e-06, "loss": 0.635, "step": 14539 }, { "epoch": 0.4456295206571043, "grad_norm": 1.243403901683546, "learning_rate": 6.1139871224855285e-06, "loss": 0.6209, "step": 14540 }, { "epoch": 0.4456601691798455, "grad_norm": 0.43568598293352284, "learning_rate": 6.113503274225824e-06, "loss": 0.4256, "step": 14541 }, { "epoch": 0.44569081770258673, "grad_norm": 1.4110738123373585, "learning_rate": 6.113019414994485e-06, "loss": 0.6883, "step": 14542 }, { "epoch": 0.44572146622532793, "grad_norm": 1.1746596278343555, "learning_rate": 6.112535544796284e-06, "loss": 0.6765, "step": 14543 }, { "epoch": 0.44575211474806914, "grad_norm": 0.4373883866865127, "learning_rate": 6.112051663635985e-06, "loss": 0.4181, "step": 14544 }, { "epoch": 0.44578276327081034, "grad_norm": 1.3720254349041783, "learning_rate": 6.111567771518354e-06, "loss": 0.7219, "step": 14545 }, { "epoch": 0.44581341179355155, "grad_norm": 1.3625588384045813, "learning_rate": 6.1110838684481645e-06, "loss": 0.6842, "step": 14546 }, { "epoch": 0.44584406031629276, "grad_norm": 1.3205324492015607, "learning_rate": 6.11059995443018e-06, "loss": 0.6983, "step": 14547 }, { "epoch": 0.44587470883903396, "grad_norm": 1.2222067639635443, "learning_rate": 6.11011602946917e-06, "loss": 0.6869, "step": 14548 }, { "epoch": 0.44590535736177517, "grad_norm": 1.2227822972453675, "learning_rate": 6.109632093569902e-06, "loss": 0.6394, "step": 14549 }, { "epoch": 0.4459360058845164, "grad_norm": 1.2033274458393552, "learning_rate": 6.109148146737146e-06, "loss": 0.7176, "step": 14550 }, { "epoch": 0.4459666544072576, "grad_norm": 1.2422652147667423, "learning_rate": 6.108664188975669e-06, "loss": 0.686, "step": 14551 }, { "epoch": 0.4459973029299988, "grad_norm": 1.3970874812669627, "learning_rate": 6.108180220290241e-06, "loss": 0.6686, "step": 14552 }, { "epoch": 0.44602795145274, "grad_norm": 1.1799441451356543, "learning_rate": 6.107696240685627e-06, "loss": 0.665, "step": 14553 }, { "epoch": 0.4460585999754812, "grad_norm": 1.196560033109216, "learning_rate": 6.107212250166602e-06, "loss": 0.5308, "step": 14554 }, { "epoch": 0.4460892484982224, "grad_norm": 1.0536103642988326, "learning_rate": 6.1067282487379295e-06, "loss": 0.6933, "step": 14555 }, { "epoch": 0.4461198970209636, "grad_norm": 1.307914728912787, "learning_rate": 6.10624423640438e-06, "loss": 0.7481, "step": 14556 }, { "epoch": 0.4461505455437048, "grad_norm": 1.3610052304301286, "learning_rate": 6.105760213170725e-06, "loss": 0.76, "step": 14557 }, { "epoch": 0.446181194066446, "grad_norm": 1.2306395017071687, "learning_rate": 6.1052761790417315e-06, "loss": 0.6316, "step": 14558 }, { "epoch": 0.4462118425891872, "grad_norm": 1.2210363095901975, "learning_rate": 6.104792134022169e-06, "loss": 0.7466, "step": 14559 }, { "epoch": 0.44624249111192843, "grad_norm": 1.2724911212847174, "learning_rate": 6.104308078116804e-06, "loss": 0.6101, "step": 14560 }, { "epoch": 0.44627313963466964, "grad_norm": 1.2121039855605347, "learning_rate": 6.103824011330411e-06, "loss": 0.6444, "step": 14561 }, { "epoch": 0.4463037881574108, "grad_norm": 0.4841578956479419, "learning_rate": 6.103339933667757e-06, "loss": 0.3695, "step": 14562 }, { "epoch": 0.446334436680152, "grad_norm": 1.2747991448220601, "learning_rate": 6.102855845133615e-06, "loss": 0.763, "step": 14563 }, { "epoch": 0.4463650852028932, "grad_norm": 0.45008597932412325, "learning_rate": 6.102371745732749e-06, "loss": 0.3976, "step": 14564 }, { "epoch": 0.4463957337256344, "grad_norm": 1.2710882309252964, "learning_rate": 6.101887635469933e-06, "loss": 0.5948, "step": 14565 }, { "epoch": 0.4464263822483756, "grad_norm": 1.354043452071903, "learning_rate": 6.101403514349936e-06, "loss": 0.6352, "step": 14566 }, { "epoch": 0.4464570307711168, "grad_norm": 1.5585810397755848, "learning_rate": 6.100919382377531e-06, "loss": 0.7128, "step": 14567 }, { "epoch": 0.446487679293858, "grad_norm": 1.239074987132325, "learning_rate": 6.100435239557482e-06, "loss": 0.7041, "step": 14568 }, { "epoch": 0.4465183278165992, "grad_norm": 1.2232053182982885, "learning_rate": 6.0999510858945646e-06, "loss": 0.7306, "step": 14569 }, { "epoch": 0.44654897633934043, "grad_norm": 0.5307984867959871, "learning_rate": 6.099466921393546e-06, "loss": 0.4422, "step": 14570 }, { "epoch": 0.44657962486208164, "grad_norm": 1.500626726220204, "learning_rate": 6.098982746059201e-06, "loss": 0.7829, "step": 14571 }, { "epoch": 0.44661027338482284, "grad_norm": 1.1183337859393776, "learning_rate": 6.098498559896298e-06, "loss": 0.6093, "step": 14572 }, { "epoch": 0.44664092190756405, "grad_norm": 1.2340684973442058, "learning_rate": 6.098014362909606e-06, "loss": 0.6834, "step": 14573 }, { "epoch": 0.44667157043030525, "grad_norm": 0.45521622719364474, "learning_rate": 6.097530155103899e-06, "loss": 0.4154, "step": 14574 }, { "epoch": 0.44670221895304646, "grad_norm": 1.12160564513177, "learning_rate": 6.097045936483944e-06, "loss": 0.6672, "step": 14575 }, { "epoch": 0.44673286747578766, "grad_norm": 1.2590385898906378, "learning_rate": 6.096561707054517e-06, "loss": 0.6969, "step": 14576 }, { "epoch": 0.44676351599852887, "grad_norm": 1.3240456906646223, "learning_rate": 6.096077466820386e-06, "loss": 0.7366, "step": 14577 }, { "epoch": 0.4467941645212701, "grad_norm": 1.1476557108719891, "learning_rate": 6.095593215786324e-06, "loss": 0.6142, "step": 14578 }, { "epoch": 0.4468248130440113, "grad_norm": 1.093978691334487, "learning_rate": 6.095108953957101e-06, "loss": 0.6056, "step": 14579 }, { "epoch": 0.4468554615667525, "grad_norm": 1.3248682898769084, "learning_rate": 6.09462468133749e-06, "loss": 0.5798, "step": 14580 }, { "epoch": 0.4468861100894937, "grad_norm": 1.226357392273885, "learning_rate": 6.09414039793226e-06, "loss": 0.5867, "step": 14581 }, { "epoch": 0.4469167586122349, "grad_norm": 1.29496322877173, "learning_rate": 6.093656103746187e-06, "loss": 0.7372, "step": 14582 }, { "epoch": 0.4469474071349761, "grad_norm": 1.2361025857742824, "learning_rate": 6.09317179878404e-06, "loss": 0.6489, "step": 14583 }, { "epoch": 0.4469780556577173, "grad_norm": 1.1486717587124577, "learning_rate": 6.092687483050592e-06, "loss": 0.6035, "step": 14584 }, { "epoch": 0.4470087041804585, "grad_norm": 1.2084338155653833, "learning_rate": 6.092203156550614e-06, "loss": 0.7123, "step": 14585 }, { "epoch": 0.4470393527031997, "grad_norm": 1.2859082743897394, "learning_rate": 6.091718819288879e-06, "loss": 0.6478, "step": 14586 }, { "epoch": 0.4470700012259409, "grad_norm": 1.3210788927710524, "learning_rate": 6.091234471270159e-06, "loss": 0.626, "step": 14587 }, { "epoch": 0.44710064974868213, "grad_norm": 1.2285518734402945, "learning_rate": 6.090750112499226e-06, "loss": 0.6311, "step": 14588 }, { "epoch": 0.44713129827142334, "grad_norm": 1.1905574566219574, "learning_rate": 6.0902657429808535e-06, "loss": 0.5954, "step": 14589 }, { "epoch": 0.44716194679416454, "grad_norm": 1.3331393486751202, "learning_rate": 6.089781362719813e-06, "loss": 0.5545, "step": 14590 }, { "epoch": 0.44719259531690575, "grad_norm": 1.2270931354875518, "learning_rate": 6.08929697172088e-06, "loss": 0.6231, "step": 14591 }, { "epoch": 0.44722324383964696, "grad_norm": 0.5052630531940776, "learning_rate": 6.088812569988822e-06, "loss": 0.4201, "step": 14592 }, { "epoch": 0.4472538923623881, "grad_norm": 1.1747476462169708, "learning_rate": 6.088328157528418e-06, "loss": 0.6449, "step": 14593 }, { "epoch": 0.4472845408851293, "grad_norm": 1.2106309161448006, "learning_rate": 6.0878437343444375e-06, "loss": 0.6834, "step": 14594 }, { "epoch": 0.4473151894078705, "grad_norm": 1.2354105668361854, "learning_rate": 6.087359300441655e-06, "loss": 0.6138, "step": 14595 }, { "epoch": 0.4473458379306117, "grad_norm": 0.4367627030324328, "learning_rate": 6.086874855824842e-06, "loss": 0.4325, "step": 14596 }, { "epoch": 0.44737648645335293, "grad_norm": 1.2223080595730107, "learning_rate": 6.086390400498773e-06, "loss": 0.6908, "step": 14597 }, { "epoch": 0.44740713497609413, "grad_norm": 1.2141290772586475, "learning_rate": 6.085905934468221e-06, "loss": 0.5727, "step": 14598 }, { "epoch": 0.44743778349883534, "grad_norm": 1.1763626511782805, "learning_rate": 6.085421457737961e-06, "loss": 0.6765, "step": 14599 }, { "epoch": 0.44746843202157655, "grad_norm": 1.3107226274957986, "learning_rate": 6.084936970312764e-06, "loss": 0.7066, "step": 14600 }, { "epoch": 0.44749908054431775, "grad_norm": 1.193362239654733, "learning_rate": 6.084452472197408e-06, "loss": 0.6834, "step": 14601 }, { "epoch": 0.44752972906705896, "grad_norm": 1.3979736370036713, "learning_rate": 6.0839679633966635e-06, "loss": 0.7636, "step": 14602 }, { "epoch": 0.44756037758980016, "grad_norm": 0.4656546313253843, "learning_rate": 6.0834834439153034e-06, "loss": 0.4122, "step": 14603 }, { "epoch": 0.44759102611254137, "grad_norm": 1.2828182038379865, "learning_rate": 6.082998913758106e-06, "loss": 0.5898, "step": 14604 }, { "epoch": 0.4476216746352826, "grad_norm": 0.46728668534506096, "learning_rate": 6.082514372929843e-06, "loss": 0.4322, "step": 14605 }, { "epoch": 0.4476523231580238, "grad_norm": 1.3312694084380672, "learning_rate": 6.082029821435288e-06, "loss": 0.7381, "step": 14606 }, { "epoch": 0.447682971680765, "grad_norm": 1.1632362766875264, "learning_rate": 6.081545259279216e-06, "loss": 0.651, "step": 14607 }, { "epoch": 0.4477136202035062, "grad_norm": 1.422227406896925, "learning_rate": 6.081060686466403e-06, "loss": 0.6002, "step": 14608 }, { "epoch": 0.4477442687262474, "grad_norm": 1.2543100737301809, "learning_rate": 6.080576103001622e-06, "loss": 0.7218, "step": 14609 }, { "epoch": 0.4477749172489886, "grad_norm": 1.2987602593258833, "learning_rate": 6.080091508889649e-06, "loss": 0.6977, "step": 14610 }, { "epoch": 0.4478055657717298, "grad_norm": 1.6431738552925514, "learning_rate": 6.079606904135256e-06, "loss": 0.5653, "step": 14611 }, { "epoch": 0.447836214294471, "grad_norm": 1.3579909651851294, "learning_rate": 6.079122288743221e-06, "loss": 0.7765, "step": 14612 }, { "epoch": 0.4478668628172122, "grad_norm": 1.2692180499428676, "learning_rate": 6.078637662718319e-06, "loss": 0.6754, "step": 14613 }, { "epoch": 0.4478975113399534, "grad_norm": 0.4876701578447915, "learning_rate": 6.078153026065321e-06, "loss": 0.4202, "step": 14614 }, { "epoch": 0.44792815986269463, "grad_norm": 1.636267093461523, "learning_rate": 6.0776683787890075e-06, "loss": 0.5867, "step": 14615 }, { "epoch": 0.44795880838543584, "grad_norm": 1.3276583360371124, "learning_rate": 6.077183720894152e-06, "loss": 0.6148, "step": 14616 }, { "epoch": 0.44798945690817704, "grad_norm": 1.4401789775268985, "learning_rate": 6.076699052385531e-06, "loss": 0.7082, "step": 14617 }, { "epoch": 0.44802010543091825, "grad_norm": 1.3549612903054618, "learning_rate": 6.0762143732679156e-06, "loss": 0.6483, "step": 14618 }, { "epoch": 0.44805075395365945, "grad_norm": 1.2355800641207053, "learning_rate": 6.075729683546087e-06, "loss": 0.5604, "step": 14619 }, { "epoch": 0.44808140247640066, "grad_norm": 1.283195897656851, "learning_rate": 6.075244983224816e-06, "loss": 0.6441, "step": 14620 }, { "epoch": 0.44811205099914186, "grad_norm": 0.4695430384384152, "learning_rate": 6.074760272308885e-06, "loss": 0.4393, "step": 14621 }, { "epoch": 0.44814269952188307, "grad_norm": 1.290925130622757, "learning_rate": 6.074275550803063e-06, "loss": 0.7424, "step": 14622 }, { "epoch": 0.4481733480446243, "grad_norm": 1.3028559233114607, "learning_rate": 6.073790818712131e-06, "loss": 0.6646, "step": 14623 }, { "epoch": 0.4482039965673654, "grad_norm": 1.4177683056829387, "learning_rate": 6.073306076040861e-06, "loss": 0.7042, "step": 14624 }, { "epoch": 0.44823464509010663, "grad_norm": 1.2616667871530807, "learning_rate": 6.072821322794034e-06, "loss": 0.6349, "step": 14625 }, { "epoch": 0.44826529361284784, "grad_norm": 1.1657217122114871, "learning_rate": 6.0723365589764224e-06, "loss": 0.5971, "step": 14626 }, { "epoch": 0.44829594213558904, "grad_norm": 0.4395499393825275, "learning_rate": 6.0718517845928035e-06, "loss": 0.4049, "step": 14627 }, { "epoch": 0.44832659065833025, "grad_norm": 1.2108217310075096, "learning_rate": 6.0713669996479584e-06, "loss": 0.6855, "step": 14628 }, { "epoch": 0.44835723918107145, "grad_norm": 1.2209515477625064, "learning_rate": 6.070882204146656e-06, "loss": 0.6851, "step": 14629 }, { "epoch": 0.44838788770381266, "grad_norm": 1.1471562966765316, "learning_rate": 6.070397398093681e-06, "loss": 0.6136, "step": 14630 }, { "epoch": 0.44841853622655387, "grad_norm": 1.2737251809211285, "learning_rate": 6.069912581493803e-06, "loss": 0.6104, "step": 14631 }, { "epoch": 0.44844918474929507, "grad_norm": 1.2590583657748464, "learning_rate": 6.069427754351805e-06, "loss": 0.5773, "step": 14632 }, { "epoch": 0.4484798332720363, "grad_norm": 1.6775053015973707, "learning_rate": 6.068942916672461e-06, "loss": 0.7776, "step": 14633 }, { "epoch": 0.4485104817947775, "grad_norm": 1.1448379284494041, "learning_rate": 6.068458068460549e-06, "loss": 0.6409, "step": 14634 }, { "epoch": 0.4485411303175187, "grad_norm": 1.2700131139785322, "learning_rate": 6.067973209720845e-06, "loss": 0.6213, "step": 14635 }, { "epoch": 0.4485717788402599, "grad_norm": 1.1526487356677924, "learning_rate": 6.067488340458131e-06, "loss": 0.6618, "step": 14636 }, { "epoch": 0.4486024273630011, "grad_norm": 1.312199568761452, "learning_rate": 6.067003460677177e-06, "loss": 0.7063, "step": 14637 }, { "epoch": 0.4486330758857423, "grad_norm": 1.340661268525222, "learning_rate": 6.066518570382768e-06, "loss": 0.7047, "step": 14638 }, { "epoch": 0.4486637244084835, "grad_norm": 0.6081119213238334, "learning_rate": 6.066033669579677e-06, "loss": 0.4248, "step": 14639 }, { "epoch": 0.4486943729312247, "grad_norm": 1.2327337088917292, "learning_rate": 6.065548758272684e-06, "loss": 0.5742, "step": 14640 }, { "epoch": 0.4487250214539659, "grad_norm": 1.1088340428923036, "learning_rate": 6.065063836466567e-06, "loss": 0.626, "step": 14641 }, { "epoch": 0.44875566997670713, "grad_norm": 1.3352456673509445, "learning_rate": 6.064578904166103e-06, "loss": 0.6248, "step": 14642 }, { "epoch": 0.44878631849944833, "grad_norm": 1.433088964671522, "learning_rate": 6.0640939613760705e-06, "loss": 0.6811, "step": 14643 }, { "epoch": 0.44881696702218954, "grad_norm": 1.1854893586484052, "learning_rate": 6.063609008101249e-06, "loss": 0.6605, "step": 14644 }, { "epoch": 0.44884761554493074, "grad_norm": 1.1476086800795158, "learning_rate": 6.063124044346415e-06, "loss": 0.7011, "step": 14645 }, { "epoch": 0.44887826406767195, "grad_norm": 1.1860975457442189, "learning_rate": 6.0626390701163474e-06, "loss": 0.5867, "step": 14646 }, { "epoch": 0.44890891259041316, "grad_norm": 1.0746634225814098, "learning_rate": 6.062154085415826e-06, "loss": 0.6209, "step": 14647 }, { "epoch": 0.44893956111315436, "grad_norm": 0.46975059250366585, "learning_rate": 6.061669090249628e-06, "loss": 0.4175, "step": 14648 }, { "epoch": 0.44897020963589557, "grad_norm": 1.376749188013212, "learning_rate": 6.061184084622534e-06, "loss": 0.6447, "step": 14649 }, { "epoch": 0.4490008581586368, "grad_norm": 1.2380077052198726, "learning_rate": 6.060699068539319e-06, "loss": 0.6302, "step": 14650 }, { "epoch": 0.449031506681378, "grad_norm": 1.3314007785274842, "learning_rate": 6.060214042004767e-06, "loss": 0.6182, "step": 14651 }, { "epoch": 0.4490621552041192, "grad_norm": 1.1744349609760774, "learning_rate": 6.059729005023655e-06, "loss": 0.6047, "step": 14652 }, { "epoch": 0.4490928037268604, "grad_norm": 1.2133992473205177, "learning_rate": 6.05924395760076e-06, "loss": 0.6006, "step": 14653 }, { "epoch": 0.4491234522496016, "grad_norm": 1.1250244892316186, "learning_rate": 6.0587588997408646e-06, "loss": 0.5867, "step": 14654 }, { "epoch": 0.44915410077234275, "grad_norm": 1.3222741703070877, "learning_rate": 6.058273831448747e-06, "loss": 0.6662, "step": 14655 }, { "epoch": 0.44918474929508395, "grad_norm": 1.319053393608901, "learning_rate": 6.057788752729187e-06, "loss": 0.6236, "step": 14656 }, { "epoch": 0.44921539781782516, "grad_norm": 1.1857701269508458, "learning_rate": 6.057303663586962e-06, "loss": 0.5548, "step": 14657 }, { "epoch": 0.44924604634056636, "grad_norm": 1.214481470253447, "learning_rate": 6.056818564026855e-06, "loss": 0.6984, "step": 14658 }, { "epoch": 0.44927669486330757, "grad_norm": 1.2192368928435622, "learning_rate": 6.056333454053645e-06, "loss": 0.6514, "step": 14659 }, { "epoch": 0.4493073433860488, "grad_norm": 1.2309151642160023, "learning_rate": 6.05584833367211e-06, "loss": 0.6881, "step": 14660 }, { "epoch": 0.44933799190879, "grad_norm": 1.220110634478921, "learning_rate": 6.0553632028870305e-06, "loss": 0.693, "step": 14661 }, { "epoch": 0.4493686404315312, "grad_norm": 1.2277039606604296, "learning_rate": 6.0548780617031875e-06, "loss": 0.5654, "step": 14662 }, { "epoch": 0.4493992889542724, "grad_norm": 1.1013918618595835, "learning_rate": 6.054392910125362e-06, "loss": 0.6111, "step": 14663 }, { "epoch": 0.4494299374770136, "grad_norm": 1.131495584559611, "learning_rate": 6.053907748158333e-06, "loss": 0.5646, "step": 14664 }, { "epoch": 0.4494605859997548, "grad_norm": 1.1029334923936192, "learning_rate": 6.053422575806881e-06, "loss": 0.5971, "step": 14665 }, { "epoch": 0.449491234522496, "grad_norm": 1.2573635618016188, "learning_rate": 6.052937393075787e-06, "loss": 0.738, "step": 14666 }, { "epoch": 0.4495218830452372, "grad_norm": 1.3583745770428617, "learning_rate": 6.0524521999698315e-06, "loss": 0.6733, "step": 14667 }, { "epoch": 0.4495525315679784, "grad_norm": 0.47742215163872925, "learning_rate": 6.051966996493795e-06, "loss": 0.4064, "step": 14668 }, { "epoch": 0.4495831800907196, "grad_norm": 0.47929821759202623, "learning_rate": 6.05148178265246e-06, "loss": 0.4478, "step": 14669 }, { "epoch": 0.44961382861346083, "grad_norm": 1.3218387608240956, "learning_rate": 6.0509965584506035e-06, "loss": 0.6722, "step": 14670 }, { "epoch": 0.44964447713620204, "grad_norm": 1.2395202453645797, "learning_rate": 6.050511323893011e-06, "loss": 0.6676, "step": 14671 }, { "epoch": 0.44967512565894324, "grad_norm": 0.44345544122403724, "learning_rate": 6.05002607898446e-06, "loss": 0.4263, "step": 14672 }, { "epoch": 0.44970577418168445, "grad_norm": 0.4326813222475614, "learning_rate": 6.049540823729735e-06, "loss": 0.4334, "step": 14673 }, { "epoch": 0.44973642270442565, "grad_norm": 1.4892996623279513, "learning_rate": 6.049055558133614e-06, "loss": 0.6814, "step": 14674 }, { "epoch": 0.44976707122716686, "grad_norm": 1.2646852214880857, "learning_rate": 6.048570282200883e-06, "loss": 0.6352, "step": 14675 }, { "epoch": 0.44979771974990806, "grad_norm": 1.3745026245078384, "learning_rate": 6.0480849959363175e-06, "loss": 0.6489, "step": 14676 }, { "epoch": 0.44982836827264927, "grad_norm": 0.4607108781302497, "learning_rate": 6.047599699344704e-06, "loss": 0.4234, "step": 14677 }, { "epoch": 0.4498590167953905, "grad_norm": 1.1959357156040193, "learning_rate": 6.047114392430823e-06, "loss": 0.6779, "step": 14678 }, { "epoch": 0.4498896653181317, "grad_norm": 1.2511187053780233, "learning_rate": 6.046629075199456e-06, "loss": 0.6931, "step": 14679 }, { "epoch": 0.4499203138408729, "grad_norm": 1.2375651372290737, "learning_rate": 6.046143747655383e-06, "loss": 0.641, "step": 14680 }, { "epoch": 0.4499509623636141, "grad_norm": 1.0675180512876954, "learning_rate": 6.04565840980339e-06, "loss": 0.5972, "step": 14681 }, { "epoch": 0.4499816108863553, "grad_norm": 1.3019880201623006, "learning_rate": 6.045173061648256e-06, "loss": 0.6045, "step": 14682 }, { "epoch": 0.4500122594090965, "grad_norm": 1.4741482881561268, "learning_rate": 6.044687703194765e-06, "loss": 0.5782, "step": 14683 }, { "epoch": 0.4500429079318377, "grad_norm": 1.1844481366275061, "learning_rate": 6.044202334447698e-06, "loss": 0.6263, "step": 14684 }, { "epoch": 0.4500735564545789, "grad_norm": 1.1453665153030135, "learning_rate": 6.043716955411839e-06, "loss": 0.7187, "step": 14685 }, { "epoch": 0.45010420497732007, "grad_norm": 1.323136728148735, "learning_rate": 6.04323156609197e-06, "loss": 0.598, "step": 14686 }, { "epoch": 0.45013485350006127, "grad_norm": 1.1654644394341622, "learning_rate": 6.042746166492873e-06, "loss": 0.6549, "step": 14687 }, { "epoch": 0.4501655020228025, "grad_norm": 1.2599799268399465, "learning_rate": 6.042260756619331e-06, "loss": 0.6606, "step": 14688 }, { "epoch": 0.4501961505455437, "grad_norm": 0.47340173105965494, "learning_rate": 6.041775336476128e-06, "loss": 0.411, "step": 14689 }, { "epoch": 0.4502267990682849, "grad_norm": 0.46901519627463334, "learning_rate": 6.041289906068046e-06, "loss": 0.4239, "step": 14690 }, { "epoch": 0.4502574475910261, "grad_norm": 1.2714146405796936, "learning_rate": 6.040804465399867e-06, "loss": 0.6706, "step": 14691 }, { "epoch": 0.4502880961137673, "grad_norm": 1.2092547590490792, "learning_rate": 6.040319014476376e-06, "loss": 0.685, "step": 14692 }, { "epoch": 0.4503187446365085, "grad_norm": 1.3561139691478004, "learning_rate": 6.0398335533023546e-06, "loss": 0.738, "step": 14693 }, { "epoch": 0.4503493931592497, "grad_norm": 1.2809546186297358, "learning_rate": 6.039348081882589e-06, "loss": 0.6572, "step": 14694 }, { "epoch": 0.4503800416819909, "grad_norm": 1.1622990507820936, "learning_rate": 6.03886260022186e-06, "loss": 0.6141, "step": 14695 }, { "epoch": 0.4504106902047321, "grad_norm": 1.4365593956551757, "learning_rate": 6.038377108324951e-06, "loss": 0.6788, "step": 14696 }, { "epoch": 0.45044133872747333, "grad_norm": 0.48457642681504215, "learning_rate": 6.037891606196648e-06, "loss": 0.419, "step": 14697 }, { "epoch": 0.45047198725021453, "grad_norm": 1.3515870580190166, "learning_rate": 6.037406093841732e-06, "loss": 0.6116, "step": 14698 }, { "epoch": 0.45050263577295574, "grad_norm": 1.0896040173405845, "learning_rate": 6.03692057126499e-06, "loss": 0.5667, "step": 14699 }, { "epoch": 0.45053328429569695, "grad_norm": 0.42774695435530186, "learning_rate": 6.036435038471203e-06, "loss": 0.4183, "step": 14700 }, { "epoch": 0.45056393281843815, "grad_norm": 1.1352613846093595, "learning_rate": 6.035949495465157e-06, "loss": 0.6731, "step": 14701 }, { "epoch": 0.45059458134117936, "grad_norm": 1.6551697538065355, "learning_rate": 6.035463942251636e-06, "loss": 0.7205, "step": 14702 }, { "epoch": 0.45062522986392056, "grad_norm": 1.4140278207318628, "learning_rate": 6.0349783788354235e-06, "loss": 0.6641, "step": 14703 }, { "epoch": 0.45065587838666177, "grad_norm": 1.1689292953981563, "learning_rate": 6.034492805221304e-06, "loss": 0.6518, "step": 14704 }, { "epoch": 0.450686526909403, "grad_norm": 1.10454026331927, "learning_rate": 6.034007221414064e-06, "loss": 0.6146, "step": 14705 }, { "epoch": 0.4507171754321442, "grad_norm": 1.1962436267781424, "learning_rate": 6.033521627418483e-06, "loss": 0.7671, "step": 14706 }, { "epoch": 0.4507478239548854, "grad_norm": 1.1787020046997134, "learning_rate": 6.033036023239352e-06, "loss": 0.7243, "step": 14707 }, { "epoch": 0.4507784724776266, "grad_norm": 1.313177827948138, "learning_rate": 6.032550408881449e-06, "loss": 0.7404, "step": 14708 }, { "epoch": 0.4508091210003678, "grad_norm": 1.1811839843486, "learning_rate": 6.032064784349566e-06, "loss": 0.6743, "step": 14709 }, { "epoch": 0.450839769523109, "grad_norm": 1.2156331287803868, "learning_rate": 6.031579149648483e-06, "loss": 0.6831, "step": 14710 }, { "epoch": 0.4508704180458502, "grad_norm": 1.0631105798998544, "learning_rate": 6.031093504782987e-06, "loss": 0.7817, "step": 14711 }, { "epoch": 0.4509010665685914, "grad_norm": 1.2409701420856283, "learning_rate": 6.0306078497578636e-06, "loss": 0.6693, "step": 14712 }, { "epoch": 0.4509317150913326, "grad_norm": 0.47041353411868836, "learning_rate": 6.030122184577897e-06, "loss": 0.4291, "step": 14713 }, { "epoch": 0.4509623636140738, "grad_norm": 1.2611384332112276, "learning_rate": 6.029636509247874e-06, "loss": 0.6787, "step": 14714 }, { "epoch": 0.45099301213681503, "grad_norm": 1.1648058235693914, "learning_rate": 6.029150823772576e-06, "loss": 0.7393, "step": 14715 }, { "epoch": 0.45102366065955624, "grad_norm": 1.30119168426467, "learning_rate": 6.028665128156794e-06, "loss": 0.6635, "step": 14716 }, { "epoch": 0.4510543091822974, "grad_norm": 1.2826943383813727, "learning_rate": 6.0281794224053115e-06, "loss": 0.5997, "step": 14717 }, { "epoch": 0.4510849577050386, "grad_norm": 1.1862897142188478, "learning_rate": 6.027693706522914e-06, "loss": 0.6736, "step": 14718 }, { "epoch": 0.4511156062277798, "grad_norm": 1.214647698102224, "learning_rate": 6.0272079805143855e-06, "loss": 0.6588, "step": 14719 }, { "epoch": 0.451146254750521, "grad_norm": 1.4160096617406934, "learning_rate": 6.026722244384515e-06, "loss": 0.7386, "step": 14720 }, { "epoch": 0.4511769032732622, "grad_norm": 1.224052827903787, "learning_rate": 6.0262364981380884e-06, "loss": 0.6684, "step": 14721 }, { "epoch": 0.4512075517960034, "grad_norm": 1.1899716255901762, "learning_rate": 6.02575074177989e-06, "loss": 0.6653, "step": 14722 }, { "epoch": 0.4512382003187446, "grad_norm": 1.1888862317463504, "learning_rate": 6.025264975314708e-06, "loss": 0.5455, "step": 14723 }, { "epoch": 0.4512688488414858, "grad_norm": 1.1935082256705487, "learning_rate": 6.024779198747327e-06, "loss": 0.6438, "step": 14724 }, { "epoch": 0.45129949736422703, "grad_norm": 1.1979652014169704, "learning_rate": 6.024293412082534e-06, "loss": 0.6814, "step": 14725 }, { "epoch": 0.45133014588696824, "grad_norm": 1.3799744864389878, "learning_rate": 6.023807615325117e-06, "loss": 0.6023, "step": 14726 }, { "epoch": 0.45136079440970944, "grad_norm": 1.2177799373507128, "learning_rate": 6.023321808479862e-06, "loss": 0.6331, "step": 14727 }, { "epoch": 0.45139144293245065, "grad_norm": 1.3641010587792886, "learning_rate": 6.022835991551555e-06, "loss": 0.6133, "step": 14728 }, { "epoch": 0.45142209145519185, "grad_norm": 0.48045273835311997, "learning_rate": 6.022350164544982e-06, "loss": 0.3993, "step": 14729 }, { "epoch": 0.45145273997793306, "grad_norm": 1.1929255234324054, "learning_rate": 6.021864327464933e-06, "loss": 0.7451, "step": 14730 }, { "epoch": 0.45148338850067427, "grad_norm": 1.4190608338026693, "learning_rate": 6.021378480316193e-06, "loss": 0.7379, "step": 14731 }, { "epoch": 0.45151403702341547, "grad_norm": 1.2123028347698865, "learning_rate": 6.020892623103548e-06, "loss": 0.6448, "step": 14732 }, { "epoch": 0.4515446855461567, "grad_norm": 1.3581232544916892, "learning_rate": 6.020406755831788e-06, "loss": 0.6938, "step": 14733 }, { "epoch": 0.4515753340688979, "grad_norm": 1.114495137226377, "learning_rate": 6.0199208785056985e-06, "loss": 0.6199, "step": 14734 }, { "epoch": 0.4516059825916391, "grad_norm": 1.2829924528147083, "learning_rate": 6.019434991130069e-06, "loss": 0.6297, "step": 14735 }, { "epoch": 0.4516366311143803, "grad_norm": 1.292593022751831, "learning_rate": 6.018949093709684e-06, "loss": 0.6939, "step": 14736 }, { "epoch": 0.4516672796371215, "grad_norm": 1.1484157398920132, "learning_rate": 6.018463186249333e-06, "loss": 0.5816, "step": 14737 }, { "epoch": 0.4516979281598627, "grad_norm": 1.5193752841288928, "learning_rate": 6.017977268753805e-06, "loss": 0.7227, "step": 14738 }, { "epoch": 0.4517285766826039, "grad_norm": 1.2692230403758518, "learning_rate": 6.017491341227884e-06, "loss": 0.5789, "step": 14739 }, { "epoch": 0.4517592252053451, "grad_norm": 1.1443651372218449, "learning_rate": 6.017005403676365e-06, "loss": 0.6248, "step": 14740 }, { "epoch": 0.4517898737280863, "grad_norm": 1.3778883103128987, "learning_rate": 6.016519456104028e-06, "loss": 0.6754, "step": 14741 }, { "epoch": 0.4518205222508275, "grad_norm": 1.2021657766258804, "learning_rate": 6.016033498515665e-06, "loss": 0.6386, "step": 14742 }, { "epoch": 0.45185117077356873, "grad_norm": 1.2717023909591518, "learning_rate": 6.015547530916064e-06, "loss": 0.5938, "step": 14743 }, { "epoch": 0.45188181929630994, "grad_norm": 1.2344910086706324, "learning_rate": 6.015061553310016e-06, "loss": 0.6607, "step": 14744 }, { "epoch": 0.45191246781905114, "grad_norm": 1.4357620969022968, "learning_rate": 6.014575565702303e-06, "loss": 0.6986, "step": 14745 }, { "epoch": 0.45194311634179235, "grad_norm": 0.5312427088774144, "learning_rate": 6.01408956809772e-06, "loss": 0.4314, "step": 14746 }, { "epoch": 0.45197376486453356, "grad_norm": 1.064228818832909, "learning_rate": 6.01360356050105e-06, "loss": 0.6949, "step": 14747 }, { "epoch": 0.4520044133872747, "grad_norm": 1.2373634091858752, "learning_rate": 6.013117542917087e-06, "loss": 0.7059, "step": 14748 }, { "epoch": 0.4520350619100159, "grad_norm": 1.145221569711262, "learning_rate": 6.012631515350619e-06, "loss": 0.5529, "step": 14749 }, { "epoch": 0.4520657104327571, "grad_norm": 1.181072063015754, "learning_rate": 6.012145477806431e-06, "loss": 0.605, "step": 14750 }, { "epoch": 0.4520963589554983, "grad_norm": 1.3869763958093762, "learning_rate": 6.011659430289316e-06, "loss": 0.7094, "step": 14751 }, { "epoch": 0.45212700747823953, "grad_norm": 0.47983555628773683, "learning_rate": 6.011173372804061e-06, "loss": 0.4451, "step": 14752 }, { "epoch": 0.45215765600098073, "grad_norm": 1.337921624376603, "learning_rate": 6.010687305355457e-06, "loss": 0.6784, "step": 14753 }, { "epoch": 0.45218830452372194, "grad_norm": 1.2379420749805732, "learning_rate": 6.0102012279482915e-06, "loss": 0.6455, "step": 14754 }, { "epoch": 0.45221895304646315, "grad_norm": 1.3601465026456385, "learning_rate": 6.009715140587357e-06, "loss": 0.7069, "step": 14755 }, { "epoch": 0.45224960156920435, "grad_norm": 1.2048103820541476, "learning_rate": 6.0092290432774384e-06, "loss": 0.6755, "step": 14756 }, { "epoch": 0.45228025009194556, "grad_norm": 1.308182520837396, "learning_rate": 6.008742936023328e-06, "loss": 0.6384, "step": 14757 }, { "epoch": 0.45231089861468676, "grad_norm": 1.2922083801670723, "learning_rate": 6.008256818829815e-06, "loss": 0.637, "step": 14758 }, { "epoch": 0.45234154713742797, "grad_norm": 1.31239805980726, "learning_rate": 6.007770691701692e-06, "loss": 0.7268, "step": 14759 }, { "epoch": 0.4523721956601692, "grad_norm": 0.4701132001822838, "learning_rate": 6.007284554643744e-06, "loss": 0.4054, "step": 14760 }, { "epoch": 0.4524028441829104, "grad_norm": 1.264631337596803, "learning_rate": 6.006798407660764e-06, "loss": 0.6616, "step": 14761 }, { "epoch": 0.4524334927056516, "grad_norm": 1.5572911613269036, "learning_rate": 6.006312250757542e-06, "loss": 0.6937, "step": 14762 }, { "epoch": 0.4524641412283928, "grad_norm": 1.2054392027232956, "learning_rate": 6.005826083938868e-06, "loss": 0.6894, "step": 14763 }, { "epoch": 0.452494789751134, "grad_norm": 1.1886523911448545, "learning_rate": 6.005339907209533e-06, "loss": 0.616, "step": 14764 }, { "epoch": 0.4525254382738752, "grad_norm": 1.2959929764370333, "learning_rate": 6.004853720574325e-06, "loss": 0.7063, "step": 14765 }, { "epoch": 0.4525560867966164, "grad_norm": 1.2852056038323763, "learning_rate": 6.0043675240380385e-06, "loss": 0.6355, "step": 14766 }, { "epoch": 0.4525867353193576, "grad_norm": 1.091179290825803, "learning_rate": 6.00388131760546e-06, "loss": 0.5794, "step": 14767 }, { "epoch": 0.4526173838420988, "grad_norm": 1.4487087653029047, "learning_rate": 6.0033951012813825e-06, "loss": 0.6666, "step": 14768 }, { "epoch": 0.45264803236484, "grad_norm": 1.2206816664836524, "learning_rate": 6.002908875070597e-06, "loss": 0.5974, "step": 14769 }, { "epoch": 0.45267868088758123, "grad_norm": 1.3683048573880081, "learning_rate": 6.002422638977892e-06, "loss": 0.6299, "step": 14770 }, { "epoch": 0.45270932941032244, "grad_norm": 0.47036429539164976, "learning_rate": 6.001936393008062e-06, "loss": 0.415, "step": 14771 }, { "epoch": 0.45273997793306364, "grad_norm": 1.317950627062061, "learning_rate": 6.001450137165896e-06, "loss": 0.6837, "step": 14772 }, { "epoch": 0.45277062645580485, "grad_norm": 1.2328756114441408, "learning_rate": 6.0009638714561846e-06, "loss": 0.627, "step": 14773 }, { "epoch": 0.45280127497854605, "grad_norm": 1.2286607448889175, "learning_rate": 6.000477595883721e-06, "loss": 0.6544, "step": 14774 }, { "epoch": 0.45283192350128726, "grad_norm": 1.2464860903924613, "learning_rate": 5.999991310453296e-06, "loss": 0.6842, "step": 14775 }, { "epoch": 0.45286257202402846, "grad_norm": 1.1834978086486303, "learning_rate": 5.999505015169701e-06, "loss": 0.6265, "step": 14776 }, { "epoch": 0.45289322054676967, "grad_norm": 1.2067277488989037, "learning_rate": 5.999018710037725e-06, "loss": 0.6644, "step": 14777 }, { "epoch": 0.4529238690695109, "grad_norm": 1.289079508541473, "learning_rate": 5.998532395062165e-06, "loss": 0.6531, "step": 14778 }, { "epoch": 0.452954517592252, "grad_norm": 1.3084931790455432, "learning_rate": 5.9980460702478084e-06, "loss": 0.7215, "step": 14779 }, { "epoch": 0.45298516611499323, "grad_norm": 1.4039807395829207, "learning_rate": 5.997559735599448e-06, "loss": 0.6838, "step": 14780 }, { "epoch": 0.45301581463773444, "grad_norm": 1.2180962405718878, "learning_rate": 5.997073391121876e-06, "loss": 0.643, "step": 14781 }, { "epoch": 0.45304646316047564, "grad_norm": 1.13406110394458, "learning_rate": 5.996587036819887e-06, "loss": 0.6224, "step": 14782 }, { "epoch": 0.45307711168321685, "grad_norm": 1.2393336567644075, "learning_rate": 5.996100672698269e-06, "loss": 0.6837, "step": 14783 }, { "epoch": 0.45310776020595805, "grad_norm": 1.3451023058291898, "learning_rate": 5.995614298761816e-06, "loss": 0.7008, "step": 14784 }, { "epoch": 0.45313840872869926, "grad_norm": 1.0958641762085046, "learning_rate": 5.995127915015322e-06, "loss": 0.6647, "step": 14785 }, { "epoch": 0.45316905725144047, "grad_norm": 1.3985035595360396, "learning_rate": 5.994641521463578e-06, "loss": 0.6523, "step": 14786 }, { "epoch": 0.45319970577418167, "grad_norm": 1.1527788229977882, "learning_rate": 5.994155118111376e-06, "loss": 0.6498, "step": 14787 }, { "epoch": 0.4532303542969229, "grad_norm": 1.1905392551748994, "learning_rate": 5.9936687049635075e-06, "loss": 0.7291, "step": 14788 }, { "epoch": 0.4532610028196641, "grad_norm": 0.4755788231191121, "learning_rate": 5.99318228202477e-06, "loss": 0.4027, "step": 14789 }, { "epoch": 0.4532916513424053, "grad_norm": 0.4695335084338884, "learning_rate": 5.992695849299952e-06, "loss": 0.4163, "step": 14790 }, { "epoch": 0.4533222998651465, "grad_norm": 1.4928847863414385, "learning_rate": 5.992209406793847e-06, "loss": 0.7148, "step": 14791 }, { "epoch": 0.4533529483878877, "grad_norm": 1.5000725842284262, "learning_rate": 5.99172295451125e-06, "loss": 0.6512, "step": 14792 }, { "epoch": 0.4533835969106289, "grad_norm": 1.1796526316297005, "learning_rate": 5.991236492456952e-06, "loss": 0.6092, "step": 14793 }, { "epoch": 0.4534142454333701, "grad_norm": 1.1550318726785571, "learning_rate": 5.99075002063575e-06, "loss": 0.6686, "step": 14794 }, { "epoch": 0.4534448939561113, "grad_norm": 0.4813355222922889, "learning_rate": 5.990263539052431e-06, "loss": 0.4228, "step": 14795 }, { "epoch": 0.4534755424788525, "grad_norm": 1.2395411392145759, "learning_rate": 5.989777047711793e-06, "loss": 0.7104, "step": 14796 }, { "epoch": 0.45350619100159373, "grad_norm": 1.2367988720169942, "learning_rate": 5.9892905466186294e-06, "loss": 0.5529, "step": 14797 }, { "epoch": 0.45353683952433493, "grad_norm": 1.143433013943318, "learning_rate": 5.9888040357777334e-06, "loss": 0.6955, "step": 14798 }, { "epoch": 0.45356748804707614, "grad_norm": 1.5185050839302945, "learning_rate": 5.988317515193897e-06, "loss": 0.6389, "step": 14799 }, { "epoch": 0.45359813656981735, "grad_norm": 1.2459123387479085, "learning_rate": 5.987830984871915e-06, "loss": 0.7002, "step": 14800 }, { "epoch": 0.45362878509255855, "grad_norm": 1.3339557334333263, "learning_rate": 5.987344444816582e-06, "loss": 0.7216, "step": 14801 }, { "epoch": 0.45365943361529976, "grad_norm": 1.4120061084038849, "learning_rate": 5.9868578950326926e-06, "loss": 0.7332, "step": 14802 }, { "epoch": 0.45369008213804096, "grad_norm": 1.2033584783531845, "learning_rate": 5.986371335525038e-06, "loss": 0.6293, "step": 14803 }, { "epoch": 0.45372073066078217, "grad_norm": 1.2429889856516654, "learning_rate": 5.985884766298415e-06, "loss": 0.6572, "step": 14804 }, { "epoch": 0.4537513791835234, "grad_norm": 1.2724606634575812, "learning_rate": 5.985398187357618e-06, "loss": 0.6699, "step": 14805 }, { "epoch": 0.4537820277062646, "grad_norm": 1.1587024497458007, "learning_rate": 5.984911598707439e-06, "loss": 0.5803, "step": 14806 }, { "epoch": 0.4538126762290058, "grad_norm": 0.4746227081897628, "learning_rate": 5.9844250003526764e-06, "loss": 0.4315, "step": 14807 }, { "epoch": 0.453843324751747, "grad_norm": 0.46347800526762933, "learning_rate": 5.98393839229812e-06, "loss": 0.4267, "step": 14808 }, { "epoch": 0.4538739732744882, "grad_norm": 1.1545802770556102, "learning_rate": 5.983451774548568e-06, "loss": 0.6479, "step": 14809 }, { "epoch": 0.45390462179722935, "grad_norm": 1.3074657846869577, "learning_rate": 5.982965147108813e-06, "loss": 0.574, "step": 14810 }, { "epoch": 0.45393527031997055, "grad_norm": 0.45628250017122796, "learning_rate": 5.982478509983652e-06, "loss": 0.42, "step": 14811 }, { "epoch": 0.45396591884271176, "grad_norm": 1.1302290411047422, "learning_rate": 5.981991863177878e-06, "loss": 0.5928, "step": 14812 }, { "epoch": 0.45399656736545296, "grad_norm": 1.1890011144081698, "learning_rate": 5.98150520669629e-06, "loss": 0.6328, "step": 14813 }, { "epoch": 0.45402721588819417, "grad_norm": 1.1921699797961611, "learning_rate": 5.981018540543676e-06, "loss": 0.6864, "step": 14814 }, { "epoch": 0.4540578644109354, "grad_norm": 1.3413819588140852, "learning_rate": 5.9805318647248376e-06, "loss": 0.6392, "step": 14815 }, { "epoch": 0.4540885129336766, "grad_norm": 1.3934842326705503, "learning_rate": 5.9800451792445655e-06, "loss": 0.645, "step": 14816 }, { "epoch": 0.4541191614564178, "grad_norm": 1.506736035618407, "learning_rate": 5.9795584841076605e-06, "loss": 0.6326, "step": 14817 }, { "epoch": 0.454149809979159, "grad_norm": 1.4474485401227384, "learning_rate": 5.979071779318913e-06, "loss": 0.6911, "step": 14818 }, { "epoch": 0.4541804585019002, "grad_norm": 1.1874748048718224, "learning_rate": 5.9785850648831215e-06, "loss": 0.6554, "step": 14819 }, { "epoch": 0.4542111070246414, "grad_norm": 1.1815037915764115, "learning_rate": 5.978098340805081e-06, "loss": 0.6556, "step": 14820 }, { "epoch": 0.4542417555473826, "grad_norm": 1.264251314167021, "learning_rate": 5.977611607089588e-06, "loss": 0.699, "step": 14821 }, { "epoch": 0.4542724040701238, "grad_norm": 1.2530760650614412, "learning_rate": 5.977124863741437e-06, "loss": 0.6945, "step": 14822 }, { "epoch": 0.454303052592865, "grad_norm": 1.248979078147218, "learning_rate": 5.976638110765424e-06, "loss": 0.7035, "step": 14823 }, { "epoch": 0.4543337011156062, "grad_norm": 1.177816858011954, "learning_rate": 5.976151348166347e-06, "loss": 0.6263, "step": 14824 }, { "epoch": 0.45436434963834743, "grad_norm": 1.204766951530458, "learning_rate": 5.975664575949001e-06, "loss": 0.7033, "step": 14825 }, { "epoch": 0.45439499816108864, "grad_norm": 1.261299285865847, "learning_rate": 5.975177794118182e-06, "loss": 0.6757, "step": 14826 }, { "epoch": 0.45442564668382984, "grad_norm": 1.064884661617897, "learning_rate": 5.974691002678685e-06, "loss": 0.6207, "step": 14827 }, { "epoch": 0.45445629520657105, "grad_norm": 1.345290467460924, "learning_rate": 5.974204201635311e-06, "loss": 0.5735, "step": 14828 }, { "epoch": 0.45448694372931225, "grad_norm": 1.251006689465416, "learning_rate": 5.973717390992853e-06, "loss": 0.7185, "step": 14829 }, { "epoch": 0.45451759225205346, "grad_norm": 1.333205651121387, "learning_rate": 5.973230570756108e-06, "loss": 0.6684, "step": 14830 }, { "epoch": 0.45454824077479467, "grad_norm": 0.5407063245079352, "learning_rate": 5.972743740929871e-06, "loss": 0.403, "step": 14831 }, { "epoch": 0.45457888929753587, "grad_norm": 1.370347882722973, "learning_rate": 5.972256901518944e-06, "loss": 0.6437, "step": 14832 }, { "epoch": 0.4546095378202771, "grad_norm": 1.3170876557382027, "learning_rate": 5.9717700525281195e-06, "loss": 0.7013, "step": 14833 }, { "epoch": 0.4546401863430183, "grad_norm": 1.1617568240566842, "learning_rate": 5.971283193962197e-06, "loss": 0.5916, "step": 14834 }, { "epoch": 0.4546708348657595, "grad_norm": 1.2723990798036695, "learning_rate": 5.970796325825971e-06, "loss": 0.7054, "step": 14835 }, { "epoch": 0.4547014833885007, "grad_norm": 1.2209760748767233, "learning_rate": 5.970309448124243e-06, "loss": 0.6197, "step": 14836 }, { "epoch": 0.4547321319112419, "grad_norm": 1.142136160073946, "learning_rate": 5.9698225608618066e-06, "loss": 0.6088, "step": 14837 }, { "epoch": 0.4547627804339831, "grad_norm": 1.2808866830084413, "learning_rate": 5.969335664043458e-06, "loss": 0.687, "step": 14838 }, { "epoch": 0.4547934289567243, "grad_norm": 1.3460722573729886, "learning_rate": 5.968848757674e-06, "loss": 0.6837, "step": 14839 }, { "epoch": 0.4548240774794655, "grad_norm": 1.1161593031378982, "learning_rate": 5.968361841758228e-06, "loss": 0.6276, "step": 14840 }, { "epoch": 0.45485472600220667, "grad_norm": 1.2903031929997644, "learning_rate": 5.967874916300937e-06, "loss": 0.7755, "step": 14841 }, { "epoch": 0.45488537452494787, "grad_norm": 1.154170156430697, "learning_rate": 5.967387981306927e-06, "loss": 0.667, "step": 14842 }, { "epoch": 0.4549160230476891, "grad_norm": 0.49927369193000315, "learning_rate": 5.966901036780997e-06, "loss": 0.4366, "step": 14843 }, { "epoch": 0.4549466715704303, "grad_norm": 1.3439860815444933, "learning_rate": 5.966414082727943e-06, "loss": 0.6943, "step": 14844 }, { "epoch": 0.4549773200931715, "grad_norm": 0.4928341375404728, "learning_rate": 5.965927119152561e-06, "loss": 0.4124, "step": 14845 }, { "epoch": 0.4550079686159127, "grad_norm": 1.324873505108165, "learning_rate": 5.965440146059656e-06, "loss": 0.7333, "step": 14846 }, { "epoch": 0.4550386171386539, "grad_norm": 1.1900510751639188, "learning_rate": 5.96495316345402e-06, "loss": 0.7006, "step": 14847 }, { "epoch": 0.4550692656613951, "grad_norm": 1.3241696453537275, "learning_rate": 5.964466171340455e-06, "loss": 0.6925, "step": 14848 }, { "epoch": 0.4550999141841363, "grad_norm": 1.21355460758406, "learning_rate": 5.963979169723757e-06, "loss": 0.606, "step": 14849 }, { "epoch": 0.4551305627068775, "grad_norm": 1.1868535277536485, "learning_rate": 5.963492158608726e-06, "loss": 0.6418, "step": 14850 }, { "epoch": 0.4551612112296187, "grad_norm": 1.2915635179850673, "learning_rate": 5.963005138000159e-06, "loss": 0.672, "step": 14851 }, { "epoch": 0.45519185975235993, "grad_norm": 1.3154601468635143, "learning_rate": 5.962518107902859e-06, "loss": 0.6435, "step": 14852 }, { "epoch": 0.45522250827510113, "grad_norm": 1.4857788739231235, "learning_rate": 5.962031068321619e-06, "loss": 0.7901, "step": 14853 }, { "epoch": 0.45525315679784234, "grad_norm": 1.1349809595474063, "learning_rate": 5.961544019261242e-06, "loss": 0.5573, "step": 14854 }, { "epoch": 0.45528380532058355, "grad_norm": 1.1945336526550105, "learning_rate": 5.961056960726525e-06, "loss": 0.6398, "step": 14855 }, { "epoch": 0.45531445384332475, "grad_norm": 1.199815614249253, "learning_rate": 5.96056989272227e-06, "loss": 0.6335, "step": 14856 }, { "epoch": 0.45534510236606596, "grad_norm": 1.3119162726643683, "learning_rate": 5.96008281525327e-06, "loss": 0.6834, "step": 14857 }, { "epoch": 0.45537575088880716, "grad_norm": 1.2463717319688938, "learning_rate": 5.9595957283243326e-06, "loss": 0.692, "step": 14858 }, { "epoch": 0.45540639941154837, "grad_norm": 1.1995168570035828, "learning_rate": 5.959108631940251e-06, "loss": 0.6523, "step": 14859 }, { "epoch": 0.4554370479342896, "grad_norm": 1.227132430060796, "learning_rate": 5.958621526105825e-06, "loss": 0.6658, "step": 14860 }, { "epoch": 0.4554676964570308, "grad_norm": 0.5234994094599699, "learning_rate": 5.958134410825859e-06, "loss": 0.4274, "step": 14861 }, { "epoch": 0.455498344979772, "grad_norm": 1.4406925402141022, "learning_rate": 5.9576472861051474e-06, "loss": 0.6711, "step": 14862 }, { "epoch": 0.4555289935025132, "grad_norm": 1.118934552743122, "learning_rate": 5.957160151948493e-06, "loss": 0.6349, "step": 14863 }, { "epoch": 0.4555596420252544, "grad_norm": 1.2919986121181826, "learning_rate": 5.956673008360695e-06, "loss": 0.702, "step": 14864 }, { "epoch": 0.4555902905479956, "grad_norm": 1.188825591695007, "learning_rate": 5.956185855346552e-06, "loss": 0.6209, "step": 14865 }, { "epoch": 0.4556209390707368, "grad_norm": 1.139781819510014, "learning_rate": 5.955698692910865e-06, "loss": 0.5979, "step": 14866 }, { "epoch": 0.455651587593478, "grad_norm": 1.412544180834915, "learning_rate": 5.955211521058437e-06, "loss": 0.6598, "step": 14867 }, { "epoch": 0.4556822361162192, "grad_norm": 1.0884475046741278, "learning_rate": 5.954724339794062e-06, "loss": 0.5088, "step": 14868 }, { "epoch": 0.4557128846389604, "grad_norm": 0.4753501417807592, "learning_rate": 5.954237149122546e-06, "loss": 0.4124, "step": 14869 }, { "epoch": 0.45574353316170163, "grad_norm": 1.2287407880433643, "learning_rate": 5.953749949048686e-06, "loss": 0.6889, "step": 14870 }, { "epoch": 0.45577418168444284, "grad_norm": 1.2819492940841897, "learning_rate": 5.953262739577283e-06, "loss": 0.6517, "step": 14871 }, { "epoch": 0.455804830207184, "grad_norm": 0.42526217440022, "learning_rate": 5.952775520713141e-06, "loss": 0.3878, "step": 14872 }, { "epoch": 0.4558354787299252, "grad_norm": 1.435572984099942, "learning_rate": 5.952288292461057e-06, "loss": 0.8045, "step": 14873 }, { "epoch": 0.4558661272526664, "grad_norm": 1.234449592908598, "learning_rate": 5.951801054825831e-06, "loss": 0.5757, "step": 14874 }, { "epoch": 0.4558967757754076, "grad_norm": 1.3296475586066736, "learning_rate": 5.951313807812268e-06, "loss": 0.673, "step": 14875 }, { "epoch": 0.4559274242981488, "grad_norm": 1.3959632895312046, "learning_rate": 5.950826551425165e-06, "loss": 0.6831, "step": 14876 }, { "epoch": 0.45595807282089, "grad_norm": 1.1692897175787047, "learning_rate": 5.950339285669324e-06, "loss": 0.7003, "step": 14877 }, { "epoch": 0.4559887213436312, "grad_norm": 1.2584120765124827, "learning_rate": 5.94985201054955e-06, "loss": 0.7449, "step": 14878 }, { "epoch": 0.4560193698663724, "grad_norm": 1.45872685563111, "learning_rate": 5.949364726070639e-06, "loss": 0.6591, "step": 14879 }, { "epoch": 0.45605001838911363, "grad_norm": 1.2620254427162267, "learning_rate": 5.948877432237396e-06, "loss": 0.7162, "step": 14880 }, { "epoch": 0.45608066691185484, "grad_norm": 1.39503676787754, "learning_rate": 5.948390129054617e-06, "loss": 0.7323, "step": 14881 }, { "epoch": 0.45611131543459604, "grad_norm": 1.1027675329558075, "learning_rate": 5.947902816527112e-06, "loss": 0.6007, "step": 14882 }, { "epoch": 0.45614196395733725, "grad_norm": 1.1733034680261396, "learning_rate": 5.947415494659675e-06, "loss": 0.6228, "step": 14883 }, { "epoch": 0.45617261248007845, "grad_norm": 1.2673257114203005, "learning_rate": 5.946928163457113e-06, "loss": 0.5879, "step": 14884 }, { "epoch": 0.45620326100281966, "grad_norm": 1.295710552103075, "learning_rate": 5.9464408229242235e-06, "loss": 0.6701, "step": 14885 }, { "epoch": 0.45623390952556087, "grad_norm": 1.248186414410258, "learning_rate": 5.945953473065811e-06, "loss": 0.6689, "step": 14886 }, { "epoch": 0.45626455804830207, "grad_norm": 1.445236815410322, "learning_rate": 5.945466113886678e-06, "loss": 0.6523, "step": 14887 }, { "epoch": 0.4562952065710433, "grad_norm": 1.111483303727753, "learning_rate": 5.944978745391623e-06, "loss": 0.6323, "step": 14888 }, { "epoch": 0.4563258550937845, "grad_norm": 1.152604337004706, "learning_rate": 5.944491367585453e-06, "loss": 0.6443, "step": 14889 }, { "epoch": 0.4563565036165257, "grad_norm": 1.2010692295804557, "learning_rate": 5.944003980472968e-06, "loss": 0.6359, "step": 14890 }, { "epoch": 0.4563871521392669, "grad_norm": 1.4058099457327604, "learning_rate": 5.9435165840589695e-06, "loss": 0.721, "step": 14891 }, { "epoch": 0.4564178006620081, "grad_norm": 1.1622016926344847, "learning_rate": 5.9430291783482596e-06, "loss": 0.572, "step": 14892 }, { "epoch": 0.4564484491847493, "grad_norm": 0.48819901502340673, "learning_rate": 5.942541763345643e-06, "loss": 0.4265, "step": 14893 }, { "epoch": 0.4564790977074905, "grad_norm": 1.1455061093061931, "learning_rate": 5.942054339055922e-06, "loss": 0.6201, "step": 14894 }, { "epoch": 0.4565097462302317, "grad_norm": 1.2721207644883719, "learning_rate": 5.941566905483898e-06, "loss": 0.6012, "step": 14895 }, { "epoch": 0.4565403947529729, "grad_norm": 1.3433945381191503, "learning_rate": 5.941079462634373e-06, "loss": 0.7538, "step": 14896 }, { "epoch": 0.45657104327571413, "grad_norm": 0.44157177703937245, "learning_rate": 5.940592010512152e-06, "loss": 0.4163, "step": 14897 }, { "epoch": 0.45660169179845533, "grad_norm": 1.4188441215397891, "learning_rate": 5.940104549122039e-06, "loss": 0.6075, "step": 14898 }, { "epoch": 0.45663234032119654, "grad_norm": 1.1915339471498667, "learning_rate": 5.939617078468834e-06, "loss": 0.5871, "step": 14899 }, { "epoch": 0.45666298884393774, "grad_norm": 1.1457273362240041, "learning_rate": 5.9391295985573405e-06, "loss": 0.6928, "step": 14900 }, { "epoch": 0.45669363736667895, "grad_norm": 1.1091342318340907, "learning_rate": 5.938642109392364e-06, "loss": 0.6125, "step": 14901 }, { "epoch": 0.45672428588942016, "grad_norm": 1.2571441751540415, "learning_rate": 5.9381546109787055e-06, "loss": 0.7062, "step": 14902 }, { "epoch": 0.4567549344121613, "grad_norm": 1.1651675856819188, "learning_rate": 5.937667103321171e-06, "loss": 0.6845, "step": 14903 }, { "epoch": 0.4567855829349025, "grad_norm": 1.2703125676255218, "learning_rate": 5.937179586424562e-06, "loss": 0.623, "step": 14904 }, { "epoch": 0.4568162314576437, "grad_norm": 1.2688821721489008, "learning_rate": 5.936692060293681e-06, "loss": 0.562, "step": 14905 }, { "epoch": 0.4568468799803849, "grad_norm": 1.221307593526966, "learning_rate": 5.936204524933338e-06, "loss": 0.7268, "step": 14906 }, { "epoch": 0.45687752850312613, "grad_norm": 1.1980328126585036, "learning_rate": 5.935716980348329e-06, "loss": 0.5976, "step": 14907 }, { "epoch": 0.45690817702586733, "grad_norm": 1.174239727054021, "learning_rate": 5.935229426543461e-06, "loss": 0.6146, "step": 14908 }, { "epoch": 0.45693882554860854, "grad_norm": 1.221641463908405, "learning_rate": 5.93474186352354e-06, "loss": 0.661, "step": 14909 }, { "epoch": 0.45696947407134975, "grad_norm": 1.3014211981870596, "learning_rate": 5.934254291293367e-06, "loss": 0.6339, "step": 14910 }, { "epoch": 0.45700012259409095, "grad_norm": 1.4170145886148464, "learning_rate": 5.933766709857746e-06, "loss": 0.7924, "step": 14911 }, { "epoch": 0.45703077111683216, "grad_norm": 1.3277004035222355, "learning_rate": 5.933279119221485e-06, "loss": 0.6569, "step": 14912 }, { "epoch": 0.45706141963957336, "grad_norm": 0.45415351183703884, "learning_rate": 5.932791519389386e-06, "loss": 0.4058, "step": 14913 }, { "epoch": 0.45709206816231457, "grad_norm": 1.277253124326089, "learning_rate": 5.932303910366252e-06, "loss": 0.7639, "step": 14914 }, { "epoch": 0.4571227166850558, "grad_norm": 1.2480156494266708, "learning_rate": 5.93181629215689e-06, "loss": 0.6547, "step": 14915 }, { "epoch": 0.457153365207797, "grad_norm": 1.197953575695833, "learning_rate": 5.931328664766102e-06, "loss": 0.6057, "step": 14916 }, { "epoch": 0.4571840137305382, "grad_norm": 1.1721446049052513, "learning_rate": 5.930841028198698e-06, "loss": 0.5968, "step": 14917 }, { "epoch": 0.4572146622532794, "grad_norm": 1.1647207261098207, "learning_rate": 5.930353382459476e-06, "loss": 0.5915, "step": 14918 }, { "epoch": 0.4572453107760206, "grad_norm": 0.4646508655234682, "learning_rate": 5.929865727553246e-06, "loss": 0.4298, "step": 14919 }, { "epoch": 0.4572759592987618, "grad_norm": 1.3549846457364747, "learning_rate": 5.9293780634848096e-06, "loss": 0.6845, "step": 14920 }, { "epoch": 0.457306607821503, "grad_norm": 1.3112453407132847, "learning_rate": 5.928890390258975e-06, "loss": 0.6225, "step": 14921 }, { "epoch": 0.4573372563442442, "grad_norm": 1.0872781721700904, "learning_rate": 5.928402707880544e-06, "loss": 0.6202, "step": 14922 }, { "epoch": 0.4573679048669854, "grad_norm": 1.2501038410163268, "learning_rate": 5.927915016354324e-06, "loss": 0.67, "step": 14923 }, { "epoch": 0.4573985533897266, "grad_norm": 0.4313920603438568, "learning_rate": 5.92742731568512e-06, "loss": 0.3919, "step": 14924 }, { "epoch": 0.45742920191246783, "grad_norm": 0.4485408895008824, "learning_rate": 5.926939605877738e-06, "loss": 0.4169, "step": 14925 }, { "epoch": 0.45745985043520904, "grad_norm": 1.058966670942153, "learning_rate": 5.926451886936983e-06, "loss": 0.6668, "step": 14926 }, { "epoch": 0.45749049895795024, "grad_norm": 1.1726518734124909, "learning_rate": 5.925964158867659e-06, "loss": 0.5827, "step": 14927 }, { "epoch": 0.45752114748069145, "grad_norm": 1.35043948594881, "learning_rate": 5.925476421674574e-06, "loss": 0.7126, "step": 14928 }, { "epoch": 0.45755179600343265, "grad_norm": 1.1751426393318178, "learning_rate": 5.924988675362534e-06, "loss": 0.6727, "step": 14929 }, { "epoch": 0.45758244452617386, "grad_norm": 1.338919674439037, "learning_rate": 5.9245009199363435e-06, "loss": 0.7132, "step": 14930 }, { "epoch": 0.45761309304891506, "grad_norm": 0.43968109184321363, "learning_rate": 5.9240131554008074e-06, "loss": 0.4008, "step": 14931 }, { "epoch": 0.45764374157165627, "grad_norm": 1.3253431750188014, "learning_rate": 5.923525381760735e-06, "loss": 0.6905, "step": 14932 }, { "epoch": 0.4576743900943975, "grad_norm": 1.5344430195093524, "learning_rate": 5.92303759902093e-06, "loss": 0.7061, "step": 14933 }, { "epoch": 0.4577050386171386, "grad_norm": 1.183345754627549, "learning_rate": 5.9225498071861994e-06, "loss": 0.6137, "step": 14934 }, { "epoch": 0.45773568713987983, "grad_norm": 0.48657801286965063, "learning_rate": 5.922062006261349e-06, "loss": 0.423, "step": 14935 }, { "epoch": 0.45776633566262104, "grad_norm": 1.1560750731669756, "learning_rate": 5.921574196251188e-06, "loss": 0.633, "step": 14936 }, { "epoch": 0.45779698418536224, "grad_norm": 1.3591294332346073, "learning_rate": 5.921086377160519e-06, "loss": 0.673, "step": 14937 }, { "epoch": 0.45782763270810345, "grad_norm": 1.0944546991690627, "learning_rate": 5.92059854899415e-06, "loss": 0.6066, "step": 14938 }, { "epoch": 0.45785828123084465, "grad_norm": 2.3856563284439813, "learning_rate": 5.9201107117568865e-06, "loss": 0.6802, "step": 14939 }, { "epoch": 0.45788892975358586, "grad_norm": 1.4834485312541987, "learning_rate": 5.919622865453539e-06, "loss": 0.6531, "step": 14940 }, { "epoch": 0.45791957827632707, "grad_norm": 1.156657923640996, "learning_rate": 5.9191350100889126e-06, "loss": 0.5177, "step": 14941 }, { "epoch": 0.45795022679906827, "grad_norm": 1.3684314155931323, "learning_rate": 5.918647145667811e-06, "loss": 0.6487, "step": 14942 }, { "epoch": 0.4579808753218095, "grad_norm": 1.3912424971171893, "learning_rate": 5.918159272195046e-06, "loss": 0.698, "step": 14943 }, { "epoch": 0.4580115238445507, "grad_norm": 1.2499710702099915, "learning_rate": 5.917671389675424e-06, "loss": 0.616, "step": 14944 }, { "epoch": 0.4580421723672919, "grad_norm": 1.1812039240936676, "learning_rate": 5.917183498113749e-06, "loss": 0.5743, "step": 14945 }, { "epoch": 0.4580728208900331, "grad_norm": 1.216126371503001, "learning_rate": 5.91669559751483e-06, "loss": 0.6804, "step": 14946 }, { "epoch": 0.4581034694127743, "grad_norm": 1.3379265800183489, "learning_rate": 5.916207687883476e-06, "loss": 0.6373, "step": 14947 }, { "epoch": 0.4581341179355155, "grad_norm": 1.3796789601471606, "learning_rate": 5.9157197692244925e-06, "loss": 0.6936, "step": 14948 }, { "epoch": 0.4581647664582567, "grad_norm": 1.2032090757202663, "learning_rate": 5.915231841542689e-06, "loss": 0.6489, "step": 14949 }, { "epoch": 0.4581954149809979, "grad_norm": 1.3457031596339801, "learning_rate": 5.91474390484287e-06, "loss": 0.5902, "step": 14950 }, { "epoch": 0.4582260635037391, "grad_norm": 0.44142681252279886, "learning_rate": 5.914255959129846e-06, "loss": 0.4013, "step": 14951 }, { "epoch": 0.45825671202648033, "grad_norm": 1.3498850913663574, "learning_rate": 5.913768004408425e-06, "loss": 0.6944, "step": 14952 }, { "epoch": 0.45828736054922153, "grad_norm": 1.3888986226224869, "learning_rate": 5.913280040683414e-06, "loss": 0.6334, "step": 14953 }, { "epoch": 0.45831800907196274, "grad_norm": 1.3448229747274274, "learning_rate": 5.912792067959619e-06, "loss": 0.6937, "step": 14954 }, { "epoch": 0.45834865759470395, "grad_norm": 1.155792652776449, "learning_rate": 5.912304086241853e-06, "loss": 0.6465, "step": 14955 }, { "epoch": 0.45837930611744515, "grad_norm": 1.235773130722511, "learning_rate": 5.91181609553492e-06, "loss": 0.6349, "step": 14956 }, { "epoch": 0.45840995464018636, "grad_norm": 1.359294224443925, "learning_rate": 5.911328095843629e-06, "loss": 0.6658, "step": 14957 }, { "epoch": 0.45844060316292756, "grad_norm": 1.1788573553426935, "learning_rate": 5.910840087172791e-06, "loss": 0.7008, "step": 14958 }, { "epoch": 0.45847125168566877, "grad_norm": 1.3397342115827326, "learning_rate": 5.910352069527211e-06, "loss": 0.7919, "step": 14959 }, { "epoch": 0.45850190020841, "grad_norm": 0.47715066155711106, "learning_rate": 5.909864042911702e-06, "loss": 0.4288, "step": 14960 }, { "epoch": 0.4585325487311512, "grad_norm": 0.48045472724598803, "learning_rate": 5.909376007331066e-06, "loss": 0.4186, "step": 14961 }, { "epoch": 0.4585631972538924, "grad_norm": 1.36698002093868, "learning_rate": 5.908887962790117e-06, "loss": 0.64, "step": 14962 }, { "epoch": 0.4585938457766336, "grad_norm": 1.2659243615371059, "learning_rate": 5.9083999092936625e-06, "loss": 0.7408, "step": 14963 }, { "epoch": 0.4586244942993748, "grad_norm": 1.441700523313406, "learning_rate": 5.907911846846511e-06, "loss": 0.6063, "step": 14964 }, { "epoch": 0.45865514282211595, "grad_norm": 1.3762894618580825, "learning_rate": 5.907423775453472e-06, "loss": 0.7474, "step": 14965 }, { "epoch": 0.45868579134485715, "grad_norm": 1.258745248474834, "learning_rate": 5.906935695119354e-06, "loss": 0.6895, "step": 14966 }, { "epoch": 0.45871643986759836, "grad_norm": 0.4599912276029898, "learning_rate": 5.906447605848967e-06, "loss": 0.417, "step": 14967 }, { "epoch": 0.45874708839033956, "grad_norm": 1.2309414822624418, "learning_rate": 5.90595950764712e-06, "loss": 0.6684, "step": 14968 }, { "epoch": 0.45877773691308077, "grad_norm": 1.3063907095433933, "learning_rate": 5.905471400518622e-06, "loss": 0.8039, "step": 14969 }, { "epoch": 0.458808385435822, "grad_norm": 1.3129227004971566, "learning_rate": 5.904983284468282e-06, "loss": 0.6542, "step": 14970 }, { "epoch": 0.4588390339585632, "grad_norm": 0.43702865835828847, "learning_rate": 5.9044951595009114e-06, "loss": 0.4154, "step": 14971 }, { "epoch": 0.4588696824813044, "grad_norm": 1.2905257555326293, "learning_rate": 5.9040070256213166e-06, "loss": 0.6953, "step": 14972 }, { "epoch": 0.4589003310040456, "grad_norm": 1.3329590197306034, "learning_rate": 5.903518882834311e-06, "loss": 0.6394, "step": 14973 }, { "epoch": 0.4589309795267868, "grad_norm": 1.235006592067912, "learning_rate": 5.903030731144701e-06, "loss": 0.6369, "step": 14974 }, { "epoch": 0.458961628049528, "grad_norm": 1.250453474534786, "learning_rate": 5.902542570557302e-06, "loss": 0.6771, "step": 14975 }, { "epoch": 0.4589922765722692, "grad_norm": 1.229166217709501, "learning_rate": 5.9020544010769155e-06, "loss": 0.6586, "step": 14976 }, { "epoch": 0.4590229250950104, "grad_norm": 1.181348005428687, "learning_rate": 5.901566222708359e-06, "loss": 0.731, "step": 14977 }, { "epoch": 0.4590535736177516, "grad_norm": 1.408890247374561, "learning_rate": 5.901078035456438e-06, "loss": 0.7387, "step": 14978 }, { "epoch": 0.4590842221404928, "grad_norm": 1.2666656154876272, "learning_rate": 5.900589839325966e-06, "loss": 0.6631, "step": 14979 }, { "epoch": 0.45911487066323403, "grad_norm": 1.0274515580148615, "learning_rate": 5.900101634321751e-06, "loss": 0.6154, "step": 14980 }, { "epoch": 0.45914551918597524, "grad_norm": 1.2730892660661368, "learning_rate": 5.899613420448606e-06, "loss": 0.6653, "step": 14981 }, { "epoch": 0.45917616770871644, "grad_norm": 1.4616244777573633, "learning_rate": 5.899125197711337e-06, "loss": 0.6558, "step": 14982 }, { "epoch": 0.45920681623145765, "grad_norm": 1.3314371560257676, "learning_rate": 5.898636966114759e-06, "loss": 0.6208, "step": 14983 }, { "epoch": 0.45923746475419885, "grad_norm": 1.3147353496495682, "learning_rate": 5.898148725663682e-06, "loss": 0.6406, "step": 14984 }, { "epoch": 0.45926811327694006, "grad_norm": 1.2154361391809265, "learning_rate": 5.8976604763629135e-06, "loss": 0.5902, "step": 14985 }, { "epoch": 0.45929876179968127, "grad_norm": 1.3408647288839344, "learning_rate": 5.897172218217269e-06, "loss": 0.6929, "step": 14986 }, { "epoch": 0.45932941032242247, "grad_norm": 0.4688566602631153, "learning_rate": 5.896683951231554e-06, "loss": 0.4251, "step": 14987 }, { "epoch": 0.4593600588451637, "grad_norm": 1.3408263916319254, "learning_rate": 5.896195675410586e-06, "loss": 0.6527, "step": 14988 }, { "epoch": 0.4593907073679049, "grad_norm": 1.1404263920880584, "learning_rate": 5.89570739075917e-06, "loss": 0.5975, "step": 14989 }, { "epoch": 0.4594213558906461, "grad_norm": 1.2074614578928222, "learning_rate": 5.895219097282123e-06, "loss": 0.601, "step": 14990 }, { "epoch": 0.4594520044133873, "grad_norm": 1.3930095108064378, "learning_rate": 5.89473079498425e-06, "loss": 0.7243, "step": 14991 }, { "epoch": 0.4594826529361285, "grad_norm": 1.2221872149630528, "learning_rate": 5.894242483870367e-06, "loss": 0.6544, "step": 14992 }, { "epoch": 0.4595133014588697, "grad_norm": 1.421079376415241, "learning_rate": 5.893754163945283e-06, "loss": 0.7414, "step": 14993 }, { "epoch": 0.4595439499816109, "grad_norm": 1.3072950194555144, "learning_rate": 5.893265835213813e-06, "loss": 0.6723, "step": 14994 }, { "epoch": 0.4595745985043521, "grad_norm": 1.1817014629278872, "learning_rate": 5.8927774976807635e-06, "loss": 0.6391, "step": 14995 }, { "epoch": 0.45960524702709327, "grad_norm": 1.3347229187649423, "learning_rate": 5.892289151350951e-06, "loss": 0.6587, "step": 14996 }, { "epoch": 0.4596358955498345, "grad_norm": 1.2442243473879577, "learning_rate": 5.891800796229183e-06, "loss": 0.6127, "step": 14997 }, { "epoch": 0.4596665440725757, "grad_norm": 1.497003575796776, "learning_rate": 5.891312432320275e-06, "loss": 0.701, "step": 14998 }, { "epoch": 0.4596971925953169, "grad_norm": 1.2626876418613455, "learning_rate": 5.890824059629038e-06, "loss": 0.6229, "step": 14999 }, { "epoch": 0.4597278411180581, "grad_norm": 1.2537972149680234, "learning_rate": 5.890335678160282e-06, "loss": 0.6495, "step": 15000 }, { "epoch": 0.4597584896407993, "grad_norm": 1.3470032855302028, "learning_rate": 5.8898472879188216e-06, "loss": 0.5845, "step": 15001 }, { "epoch": 0.4597891381635405, "grad_norm": 0.45569933151596764, "learning_rate": 5.8893588889094684e-06, "loss": 0.4051, "step": 15002 }, { "epoch": 0.4598197866862817, "grad_norm": 1.0739590918385231, "learning_rate": 5.888870481137036e-06, "loss": 0.593, "step": 15003 }, { "epoch": 0.4598504352090229, "grad_norm": 1.1741083270149408, "learning_rate": 5.888382064606333e-06, "loss": 0.6418, "step": 15004 }, { "epoch": 0.4598810837317641, "grad_norm": 1.1741087632672544, "learning_rate": 5.887893639322174e-06, "loss": 0.6394, "step": 15005 }, { "epoch": 0.4599117322545053, "grad_norm": 1.2281299494923241, "learning_rate": 5.8874052052893734e-06, "loss": 0.6471, "step": 15006 }, { "epoch": 0.45994238077724653, "grad_norm": 1.2629978780513562, "learning_rate": 5.886916762512742e-06, "loss": 0.6254, "step": 15007 }, { "epoch": 0.45997302929998773, "grad_norm": 1.1567676478140465, "learning_rate": 5.886428310997092e-06, "loss": 0.6318, "step": 15008 }, { "epoch": 0.46000367782272894, "grad_norm": 1.4152781326858621, "learning_rate": 5.885939850747237e-06, "loss": 0.6735, "step": 15009 }, { "epoch": 0.46003432634547015, "grad_norm": 1.3584482053776603, "learning_rate": 5.885451381767991e-06, "loss": 0.7329, "step": 15010 }, { "epoch": 0.46006497486821135, "grad_norm": 1.4291392276140176, "learning_rate": 5.884962904064164e-06, "loss": 0.5148, "step": 15011 }, { "epoch": 0.46009562339095256, "grad_norm": 1.2320550236803425, "learning_rate": 5.884474417640573e-06, "loss": 0.6838, "step": 15012 }, { "epoch": 0.46012627191369376, "grad_norm": 1.3387549008110389, "learning_rate": 5.883985922502029e-06, "loss": 0.6576, "step": 15013 }, { "epoch": 0.46015692043643497, "grad_norm": 1.1555263371981759, "learning_rate": 5.8834974186533456e-06, "loss": 0.6757, "step": 15014 }, { "epoch": 0.4601875689591762, "grad_norm": 1.2352887651322735, "learning_rate": 5.883008906099336e-06, "loss": 0.7224, "step": 15015 }, { "epoch": 0.4602182174819174, "grad_norm": 1.2034189127739332, "learning_rate": 5.882520384844813e-06, "loss": 0.6231, "step": 15016 }, { "epoch": 0.4602488660046586, "grad_norm": 0.4862337861841475, "learning_rate": 5.8820318548945925e-06, "loss": 0.4159, "step": 15017 }, { "epoch": 0.4602795145273998, "grad_norm": 0.47183290092843966, "learning_rate": 5.881543316253485e-06, "loss": 0.4029, "step": 15018 }, { "epoch": 0.460310163050141, "grad_norm": 1.2951123434716, "learning_rate": 5.881054768926306e-06, "loss": 0.6271, "step": 15019 }, { "epoch": 0.4603408115728822, "grad_norm": 1.187588675487904, "learning_rate": 5.880566212917868e-06, "loss": 0.6511, "step": 15020 }, { "epoch": 0.4603714600956234, "grad_norm": 1.1990009546769913, "learning_rate": 5.880077648232987e-06, "loss": 0.6302, "step": 15021 }, { "epoch": 0.4604021086183646, "grad_norm": 0.4661758764877491, "learning_rate": 5.879589074876476e-06, "loss": 0.4296, "step": 15022 }, { "epoch": 0.4604327571411058, "grad_norm": 1.2841717587707238, "learning_rate": 5.879100492853147e-06, "loss": 0.5371, "step": 15023 }, { "epoch": 0.460463405663847, "grad_norm": 1.4235284558648857, "learning_rate": 5.878611902167818e-06, "loss": 0.6839, "step": 15024 }, { "epoch": 0.46049405418658823, "grad_norm": 1.2401019030910667, "learning_rate": 5.8781233028253e-06, "loss": 0.6294, "step": 15025 }, { "epoch": 0.46052470270932944, "grad_norm": 1.1492443045671148, "learning_rate": 5.8776346948304075e-06, "loss": 0.6491, "step": 15026 }, { "epoch": 0.4605553512320706, "grad_norm": 1.3205153068785531, "learning_rate": 5.877146078187957e-06, "loss": 0.6515, "step": 15027 }, { "epoch": 0.4605859997548118, "grad_norm": 1.2953774431632612, "learning_rate": 5.876657452902762e-06, "loss": 0.662, "step": 15028 }, { "epoch": 0.460616648277553, "grad_norm": 0.46549864684838205, "learning_rate": 5.8761688189796376e-06, "loss": 0.3936, "step": 15029 }, { "epoch": 0.4606472968002942, "grad_norm": 1.1852563914585794, "learning_rate": 5.875680176423396e-06, "loss": 0.6393, "step": 15030 }, { "epoch": 0.4606779453230354, "grad_norm": 0.47933729404421577, "learning_rate": 5.8751915252388546e-06, "loss": 0.43, "step": 15031 }, { "epoch": 0.4607085938457766, "grad_norm": 1.2142721542859434, "learning_rate": 5.874702865430826e-06, "loss": 0.6809, "step": 15032 }, { "epoch": 0.4607392423685178, "grad_norm": 0.4669319589118971, "learning_rate": 5.874214197004128e-06, "loss": 0.4392, "step": 15033 }, { "epoch": 0.460769890891259, "grad_norm": 1.2980100549724096, "learning_rate": 5.873725519963572e-06, "loss": 0.6109, "step": 15034 }, { "epoch": 0.46080053941400023, "grad_norm": 0.45712973489696895, "learning_rate": 5.873236834313976e-06, "loss": 0.4231, "step": 15035 }, { "epoch": 0.46083118793674144, "grad_norm": 1.286341390316908, "learning_rate": 5.872748140060152e-06, "loss": 0.6332, "step": 15036 }, { "epoch": 0.46086183645948264, "grad_norm": 1.2958864673035093, "learning_rate": 5.872259437206921e-06, "loss": 0.7046, "step": 15037 }, { "epoch": 0.46089248498222385, "grad_norm": 1.317243373235566, "learning_rate": 5.871770725759093e-06, "loss": 0.6897, "step": 15038 }, { "epoch": 0.46092313350496505, "grad_norm": 1.2007771088440224, "learning_rate": 5.871282005721484e-06, "loss": 0.6983, "step": 15039 }, { "epoch": 0.46095378202770626, "grad_norm": 1.3400161804859794, "learning_rate": 5.870793277098912e-06, "loss": 0.6935, "step": 15040 }, { "epoch": 0.46098443055044747, "grad_norm": 0.4531783054654927, "learning_rate": 5.870304539896189e-06, "loss": 0.4123, "step": 15041 }, { "epoch": 0.46101507907318867, "grad_norm": 1.2673972272589078, "learning_rate": 5.8698157941181344e-06, "loss": 0.596, "step": 15042 }, { "epoch": 0.4610457275959299, "grad_norm": 0.5179875017952154, "learning_rate": 5.869327039769561e-06, "loss": 0.4238, "step": 15043 }, { "epoch": 0.4610763761186711, "grad_norm": 1.0958885240650253, "learning_rate": 5.868838276855287e-06, "loss": 0.7015, "step": 15044 }, { "epoch": 0.4611070246414123, "grad_norm": 1.315863619367694, "learning_rate": 5.868349505380127e-06, "loss": 0.6658, "step": 15045 }, { "epoch": 0.4611376731641535, "grad_norm": 1.2739510617981213, "learning_rate": 5.867860725348896e-06, "loss": 0.5982, "step": 15046 }, { "epoch": 0.4611683216868947, "grad_norm": 1.2691676813149342, "learning_rate": 5.867371936766412e-06, "loss": 0.7214, "step": 15047 }, { "epoch": 0.4611989702096359, "grad_norm": 1.2493312968477033, "learning_rate": 5.866883139637492e-06, "loss": 0.591, "step": 15048 }, { "epoch": 0.4612296187323771, "grad_norm": 1.3402260406575317, "learning_rate": 5.866394333966948e-06, "loss": 0.6022, "step": 15049 }, { "epoch": 0.4612602672551183, "grad_norm": 1.2111789584470798, "learning_rate": 5.865905519759601e-06, "loss": 0.6312, "step": 15050 }, { "epoch": 0.4612909157778595, "grad_norm": 0.5250028461745269, "learning_rate": 5.865416697020263e-06, "loss": 0.4207, "step": 15051 }, { "epoch": 0.46132156430060073, "grad_norm": 1.1504280892639642, "learning_rate": 5.864927865753754e-06, "loss": 0.6012, "step": 15052 }, { "epoch": 0.46135221282334193, "grad_norm": 1.2211197567664391, "learning_rate": 5.86443902596489e-06, "loss": 0.6704, "step": 15053 }, { "epoch": 0.46138286134608314, "grad_norm": 0.47278071911328906, "learning_rate": 5.863950177658486e-06, "loss": 0.4209, "step": 15054 }, { "epoch": 0.46141350986882435, "grad_norm": 1.085528412717534, "learning_rate": 5.863461320839361e-06, "loss": 0.6733, "step": 15055 }, { "epoch": 0.46144415839156555, "grad_norm": 1.2367749083316888, "learning_rate": 5.862972455512331e-06, "loss": 0.6119, "step": 15056 }, { "epoch": 0.46147480691430676, "grad_norm": 1.3086568548725814, "learning_rate": 5.862483581682211e-06, "loss": 0.5998, "step": 15057 }, { "epoch": 0.4615054554370479, "grad_norm": 0.4750638400701001, "learning_rate": 5.86199469935382e-06, "loss": 0.4313, "step": 15058 }, { "epoch": 0.4615361039597891, "grad_norm": 1.146400392292953, "learning_rate": 5.861505808531975e-06, "loss": 0.6131, "step": 15059 }, { "epoch": 0.4615667524825303, "grad_norm": 1.157528695747493, "learning_rate": 5.861016909221494e-06, "loss": 0.6174, "step": 15060 }, { "epoch": 0.4615974010052715, "grad_norm": 1.2091487569105048, "learning_rate": 5.860528001427193e-06, "loss": 0.6371, "step": 15061 }, { "epoch": 0.46162804952801273, "grad_norm": 1.4141020718664274, "learning_rate": 5.860039085153887e-06, "loss": 0.6565, "step": 15062 }, { "epoch": 0.46165869805075394, "grad_norm": 0.4911879082853616, "learning_rate": 5.859550160406397e-06, "loss": 0.4045, "step": 15063 }, { "epoch": 0.46168934657349514, "grad_norm": 1.2567537068718326, "learning_rate": 5.859061227189541e-06, "loss": 0.5576, "step": 15064 }, { "epoch": 0.46171999509623635, "grad_norm": 1.3706405302506264, "learning_rate": 5.858572285508134e-06, "loss": 0.6486, "step": 15065 }, { "epoch": 0.46175064361897755, "grad_norm": 1.3343807462777946, "learning_rate": 5.858083335366993e-06, "loss": 0.7222, "step": 15066 }, { "epoch": 0.46178129214171876, "grad_norm": 1.318136612924972, "learning_rate": 5.8575943767709384e-06, "loss": 0.7145, "step": 15067 }, { "epoch": 0.46181194066445996, "grad_norm": 1.1728896013479417, "learning_rate": 5.857105409724788e-06, "loss": 0.6206, "step": 15068 }, { "epoch": 0.46184258918720117, "grad_norm": 1.222787647843779, "learning_rate": 5.856616434233358e-06, "loss": 0.6358, "step": 15069 }, { "epoch": 0.4618732377099424, "grad_norm": 1.3362101229634449, "learning_rate": 5.856127450301467e-06, "loss": 0.6818, "step": 15070 }, { "epoch": 0.4619038862326836, "grad_norm": 1.2817433003390553, "learning_rate": 5.855638457933933e-06, "loss": 0.6193, "step": 15071 }, { "epoch": 0.4619345347554248, "grad_norm": 1.2416070973284035, "learning_rate": 5.855149457135575e-06, "loss": 0.6757, "step": 15072 }, { "epoch": 0.461965183278166, "grad_norm": 1.3121037876799928, "learning_rate": 5.854660447911209e-06, "loss": 0.7383, "step": 15073 }, { "epoch": 0.4619958318009072, "grad_norm": 1.4324239894773185, "learning_rate": 5.854171430265656e-06, "loss": 0.5248, "step": 15074 }, { "epoch": 0.4620264803236484, "grad_norm": 1.2056203738626146, "learning_rate": 5.853682404203733e-06, "loss": 0.5741, "step": 15075 }, { "epoch": 0.4620571288463896, "grad_norm": 1.1513219379613115, "learning_rate": 5.85319336973026e-06, "loss": 0.5754, "step": 15076 }, { "epoch": 0.4620877773691308, "grad_norm": 1.3376834885513216, "learning_rate": 5.852704326850053e-06, "loss": 0.6217, "step": 15077 }, { "epoch": 0.462118425891872, "grad_norm": 1.3137239498743487, "learning_rate": 5.852215275567933e-06, "loss": 0.6571, "step": 15078 }, { "epoch": 0.4621490744146132, "grad_norm": 1.3994468021312962, "learning_rate": 5.851726215888718e-06, "loss": 0.6161, "step": 15079 }, { "epoch": 0.46217972293735443, "grad_norm": 1.1807932710851223, "learning_rate": 5.851237147817226e-06, "loss": 0.7433, "step": 15080 }, { "epoch": 0.46221037146009564, "grad_norm": 1.198187293141916, "learning_rate": 5.8507480713582765e-06, "loss": 0.6723, "step": 15081 }, { "epoch": 0.46224101998283684, "grad_norm": 1.2341656775633485, "learning_rate": 5.850258986516688e-06, "loss": 0.6486, "step": 15082 }, { "epoch": 0.46227166850557805, "grad_norm": 1.1542105761513426, "learning_rate": 5.8497698932972826e-06, "loss": 0.6175, "step": 15083 }, { "epoch": 0.46230231702831925, "grad_norm": 1.2880510378101677, "learning_rate": 5.849280791704874e-06, "loss": 0.6971, "step": 15084 }, { "epoch": 0.46233296555106046, "grad_norm": 0.4759404767694794, "learning_rate": 5.848791681744287e-06, "loss": 0.4169, "step": 15085 }, { "epoch": 0.46236361407380167, "grad_norm": 1.2785187394370663, "learning_rate": 5.848302563420336e-06, "loss": 0.7162, "step": 15086 }, { "epoch": 0.46239426259654287, "grad_norm": 0.4461815202658579, "learning_rate": 5.8478134367378455e-06, "loss": 0.4126, "step": 15087 }, { "epoch": 0.4624249111192841, "grad_norm": 0.47485076399737, "learning_rate": 5.84732430170163e-06, "loss": 0.4174, "step": 15088 }, { "epoch": 0.4624555596420252, "grad_norm": 1.4032357930660453, "learning_rate": 5.846835158316513e-06, "loss": 0.7232, "step": 15089 }, { "epoch": 0.46248620816476643, "grad_norm": 1.1439955223047487, "learning_rate": 5.846346006587312e-06, "loss": 0.6842, "step": 15090 }, { "epoch": 0.46251685668750764, "grad_norm": 1.2555286338464602, "learning_rate": 5.845856846518847e-06, "loss": 0.5729, "step": 15091 }, { "epoch": 0.46254750521024884, "grad_norm": 0.4703300310818834, "learning_rate": 5.8453676781159375e-06, "loss": 0.4135, "step": 15092 }, { "epoch": 0.46257815373299005, "grad_norm": 1.3102705670424017, "learning_rate": 5.844878501383406e-06, "loss": 0.7695, "step": 15093 }, { "epoch": 0.46260880225573126, "grad_norm": 1.3741783376270857, "learning_rate": 5.844389316326069e-06, "loss": 0.6374, "step": 15094 }, { "epoch": 0.46263945077847246, "grad_norm": 1.3058837464507165, "learning_rate": 5.843900122948747e-06, "loss": 0.7054, "step": 15095 }, { "epoch": 0.46267009930121367, "grad_norm": 1.3909796137094317, "learning_rate": 5.843410921256262e-06, "loss": 0.6735, "step": 15096 }, { "epoch": 0.46270074782395487, "grad_norm": 1.163520670966179, "learning_rate": 5.842921711253433e-06, "loss": 0.6168, "step": 15097 }, { "epoch": 0.4627313963466961, "grad_norm": 1.2066034958906513, "learning_rate": 5.842432492945083e-06, "loss": 0.5762, "step": 15098 }, { "epoch": 0.4627620448694373, "grad_norm": 1.2908039254476606, "learning_rate": 5.841943266336027e-06, "loss": 0.6308, "step": 15099 }, { "epoch": 0.4627926933921785, "grad_norm": 0.459229572671893, "learning_rate": 5.841454031431091e-06, "loss": 0.4096, "step": 15100 }, { "epoch": 0.4628233419149197, "grad_norm": 1.4011482564474853, "learning_rate": 5.840964788235091e-06, "loss": 0.6268, "step": 15101 }, { "epoch": 0.4628539904376609, "grad_norm": 1.402917404744207, "learning_rate": 5.840475536752852e-06, "loss": 0.753, "step": 15102 }, { "epoch": 0.4628846389604021, "grad_norm": 1.2396506810606986, "learning_rate": 5.839986276989191e-06, "loss": 0.6133, "step": 15103 }, { "epoch": 0.4629152874831433, "grad_norm": 1.236130701978634, "learning_rate": 5.839497008948931e-06, "loss": 0.6204, "step": 15104 }, { "epoch": 0.4629459360058845, "grad_norm": 0.4720870229028207, "learning_rate": 5.839007732636891e-06, "loss": 0.3937, "step": 15105 }, { "epoch": 0.4629765845286257, "grad_norm": 1.2270901109249843, "learning_rate": 5.838518448057894e-06, "loss": 0.6769, "step": 15106 }, { "epoch": 0.46300723305136693, "grad_norm": 1.3106714901463248, "learning_rate": 5.83802915521676e-06, "loss": 0.7412, "step": 15107 }, { "epoch": 0.46303788157410813, "grad_norm": 1.3041993091687427, "learning_rate": 5.8375398541183106e-06, "loss": 0.723, "step": 15108 }, { "epoch": 0.46306853009684934, "grad_norm": 0.4309325943181296, "learning_rate": 5.837050544767367e-06, "loss": 0.4322, "step": 15109 }, { "epoch": 0.46309917861959055, "grad_norm": 1.350289252672192, "learning_rate": 5.83656122716875e-06, "loss": 0.7149, "step": 15110 }, { "epoch": 0.46312982714233175, "grad_norm": 1.3973073439112351, "learning_rate": 5.836071901327281e-06, "loss": 0.7086, "step": 15111 }, { "epoch": 0.46316047566507296, "grad_norm": 1.2408339603396605, "learning_rate": 5.8355825672477805e-06, "loss": 0.6196, "step": 15112 }, { "epoch": 0.46319112418781416, "grad_norm": 1.276810326953358, "learning_rate": 5.835093224935073e-06, "loss": 0.6347, "step": 15113 }, { "epoch": 0.46322177271055537, "grad_norm": 1.3081349799732898, "learning_rate": 5.834603874393978e-06, "loss": 0.539, "step": 15114 }, { "epoch": 0.4632524212332966, "grad_norm": 1.1097086034136825, "learning_rate": 5.8341145156293175e-06, "loss": 0.5796, "step": 15115 }, { "epoch": 0.4632830697560378, "grad_norm": 1.5363372249227758, "learning_rate": 5.8336251486459114e-06, "loss": 0.6957, "step": 15116 }, { "epoch": 0.463313718278779, "grad_norm": 1.1852003043529533, "learning_rate": 5.833135773448587e-06, "loss": 0.6105, "step": 15117 }, { "epoch": 0.4633443668015202, "grad_norm": 1.1858401082876557, "learning_rate": 5.832646390042159e-06, "loss": 0.5603, "step": 15118 }, { "epoch": 0.4633750153242614, "grad_norm": 1.3386884039995288, "learning_rate": 5.832156998431456e-06, "loss": 0.6337, "step": 15119 }, { "epoch": 0.46340566384700255, "grad_norm": 1.192613502839145, "learning_rate": 5.831667598621294e-06, "loss": 0.6657, "step": 15120 }, { "epoch": 0.46343631236974375, "grad_norm": 1.3003692954916104, "learning_rate": 5.831178190616501e-06, "loss": 0.65, "step": 15121 }, { "epoch": 0.46346696089248496, "grad_norm": 1.2281291996980805, "learning_rate": 5.830688774421896e-06, "loss": 0.5992, "step": 15122 }, { "epoch": 0.46349760941522616, "grad_norm": 1.248893635292617, "learning_rate": 5.8301993500423e-06, "loss": 0.5937, "step": 15123 }, { "epoch": 0.46352825793796737, "grad_norm": 1.3069817359714626, "learning_rate": 5.82970991748254e-06, "loss": 0.6061, "step": 15124 }, { "epoch": 0.4635589064607086, "grad_norm": 1.2671210559489483, "learning_rate": 5.829220476747436e-06, "loss": 0.559, "step": 15125 }, { "epoch": 0.4635895549834498, "grad_norm": 1.2926667937195047, "learning_rate": 5.82873102784181e-06, "loss": 0.6107, "step": 15126 }, { "epoch": 0.463620203506191, "grad_norm": 1.3304205378122556, "learning_rate": 5.828241570770483e-06, "loss": 0.7003, "step": 15127 }, { "epoch": 0.4636508520289322, "grad_norm": 1.134303131962481, "learning_rate": 5.827752105538282e-06, "loss": 0.6379, "step": 15128 }, { "epoch": 0.4636815005516734, "grad_norm": 1.209279291842088, "learning_rate": 5.827262632150028e-06, "loss": 0.6069, "step": 15129 }, { "epoch": 0.4637121490744146, "grad_norm": 1.1937814481249065, "learning_rate": 5.826773150610543e-06, "loss": 0.672, "step": 15130 }, { "epoch": 0.4637427975971558, "grad_norm": 1.3053908624015653, "learning_rate": 5.82628366092465e-06, "loss": 0.6447, "step": 15131 }, { "epoch": 0.463773446119897, "grad_norm": 1.2641354916094965, "learning_rate": 5.825794163097173e-06, "loss": 0.6401, "step": 15132 }, { "epoch": 0.4638040946426382, "grad_norm": 1.2151595297971591, "learning_rate": 5.825304657132935e-06, "loss": 0.6758, "step": 15133 }, { "epoch": 0.4638347431653794, "grad_norm": 0.4799384916699313, "learning_rate": 5.824815143036758e-06, "loss": 0.4151, "step": 15134 }, { "epoch": 0.46386539168812063, "grad_norm": 1.307215760200908, "learning_rate": 5.824325620813468e-06, "loss": 0.6239, "step": 15135 }, { "epoch": 0.46389604021086184, "grad_norm": 1.3418326786912824, "learning_rate": 5.823836090467887e-06, "loss": 0.7596, "step": 15136 }, { "epoch": 0.46392668873360304, "grad_norm": 1.0923448313862514, "learning_rate": 5.8233465520048375e-06, "loss": 0.6448, "step": 15137 }, { "epoch": 0.46395733725634425, "grad_norm": 1.1871729814304774, "learning_rate": 5.822857005429142e-06, "loss": 0.7525, "step": 15138 }, { "epoch": 0.46398798577908545, "grad_norm": 0.5012527929735757, "learning_rate": 5.8223674507456285e-06, "loss": 0.4428, "step": 15139 }, { "epoch": 0.46401863430182666, "grad_norm": 1.158342828857037, "learning_rate": 5.8218778879591175e-06, "loss": 0.6147, "step": 15140 }, { "epoch": 0.46404928282456787, "grad_norm": 0.45373263326486857, "learning_rate": 5.821388317074434e-06, "loss": 0.3996, "step": 15141 }, { "epoch": 0.46407993134730907, "grad_norm": 1.4645094930399505, "learning_rate": 5.820898738096399e-06, "loss": 0.6972, "step": 15142 }, { "epoch": 0.4641105798700503, "grad_norm": 1.2287976836010484, "learning_rate": 5.82040915102984e-06, "loss": 0.6454, "step": 15143 }, { "epoch": 0.4641412283927915, "grad_norm": 1.2306873286036826, "learning_rate": 5.819919555879579e-06, "loss": 0.5708, "step": 15144 }, { "epoch": 0.4641718769155327, "grad_norm": 1.2133996657855084, "learning_rate": 5.8194299526504425e-06, "loss": 0.6789, "step": 15145 }, { "epoch": 0.4642025254382739, "grad_norm": 1.2199079507188557, "learning_rate": 5.818940341347251e-06, "loss": 0.5949, "step": 15146 }, { "epoch": 0.4642331739610151, "grad_norm": 1.3567760246325247, "learning_rate": 5.818450721974832e-06, "loss": 0.6891, "step": 15147 }, { "epoch": 0.4642638224837563, "grad_norm": 1.267379910564141, "learning_rate": 5.817961094538008e-06, "loss": 0.6429, "step": 15148 }, { "epoch": 0.4642944710064975, "grad_norm": 1.3974160438566403, "learning_rate": 5.817471459041605e-06, "loss": 0.8074, "step": 15149 }, { "epoch": 0.4643251195292387, "grad_norm": 1.3050838324549443, "learning_rate": 5.816981815490446e-06, "loss": 0.6578, "step": 15150 }, { "epoch": 0.46435576805197987, "grad_norm": 1.3332036931200644, "learning_rate": 5.816492163889355e-06, "loss": 0.7197, "step": 15151 }, { "epoch": 0.4643864165747211, "grad_norm": 1.386732683549052, "learning_rate": 5.81600250424316e-06, "loss": 0.7134, "step": 15152 }, { "epoch": 0.4644170650974623, "grad_norm": 0.5130259439160292, "learning_rate": 5.815512836556683e-06, "loss": 0.4183, "step": 15153 }, { "epoch": 0.4644477136202035, "grad_norm": 1.3402178942405798, "learning_rate": 5.815023160834749e-06, "loss": 0.6423, "step": 15154 }, { "epoch": 0.4644783621429447, "grad_norm": 1.1941000324446072, "learning_rate": 5.814533477082182e-06, "loss": 0.6689, "step": 15155 }, { "epoch": 0.4645090106656859, "grad_norm": 0.4805919894922604, "learning_rate": 5.81404378530381e-06, "loss": 0.4369, "step": 15156 }, { "epoch": 0.4645396591884271, "grad_norm": 1.2802194248589656, "learning_rate": 5.813554085504455e-06, "loss": 0.589, "step": 15157 }, { "epoch": 0.4645703077111683, "grad_norm": 1.4275593754455935, "learning_rate": 5.813064377688944e-06, "loss": 0.6194, "step": 15158 }, { "epoch": 0.4646009562339095, "grad_norm": 1.1389725548551037, "learning_rate": 5.812574661862101e-06, "loss": 0.6042, "step": 15159 }, { "epoch": 0.4646316047566507, "grad_norm": 1.3048910214193756, "learning_rate": 5.812084938028753e-06, "loss": 0.7034, "step": 15160 }, { "epoch": 0.4646622532793919, "grad_norm": 1.3064262427755093, "learning_rate": 5.811595206193725e-06, "loss": 0.6267, "step": 15161 }, { "epoch": 0.46469290180213313, "grad_norm": 1.2482991966489165, "learning_rate": 5.81110546636184e-06, "loss": 0.6598, "step": 15162 }, { "epoch": 0.46472355032487433, "grad_norm": 1.2647865139518888, "learning_rate": 5.8106157185379264e-06, "loss": 0.648, "step": 15163 }, { "epoch": 0.46475419884761554, "grad_norm": 0.4703622648259607, "learning_rate": 5.810125962726808e-06, "loss": 0.4149, "step": 15164 }, { "epoch": 0.46478484737035675, "grad_norm": 1.2968628194099687, "learning_rate": 5.809636198933313e-06, "loss": 0.5998, "step": 15165 }, { "epoch": 0.46481549589309795, "grad_norm": 1.2820240023650344, "learning_rate": 5.809146427162262e-06, "loss": 0.6943, "step": 15166 }, { "epoch": 0.46484614441583916, "grad_norm": 1.167158659595144, "learning_rate": 5.808656647418488e-06, "loss": 0.5868, "step": 15167 }, { "epoch": 0.46487679293858036, "grad_norm": 1.251086282371132, "learning_rate": 5.808166859706811e-06, "loss": 0.5803, "step": 15168 }, { "epoch": 0.46490744146132157, "grad_norm": 1.1356013896996082, "learning_rate": 5.80767706403206e-06, "loss": 0.6382, "step": 15169 }, { "epoch": 0.4649380899840628, "grad_norm": 0.44508217129855077, "learning_rate": 5.807187260399058e-06, "loss": 0.4192, "step": 15170 }, { "epoch": 0.464968738506804, "grad_norm": 1.1350773456466556, "learning_rate": 5.806697448812637e-06, "loss": 0.7053, "step": 15171 }, { "epoch": 0.4649993870295452, "grad_norm": 0.46676949750297325, "learning_rate": 5.806207629277617e-06, "loss": 0.4169, "step": 15172 }, { "epoch": 0.4650300355522864, "grad_norm": 1.4180999955217757, "learning_rate": 5.805717801798828e-06, "loss": 0.6257, "step": 15173 }, { "epoch": 0.4650606840750276, "grad_norm": 1.4619792961060138, "learning_rate": 5.805227966381095e-06, "loss": 0.7025, "step": 15174 }, { "epoch": 0.4650913325977688, "grad_norm": 1.2678376335226715, "learning_rate": 5.8047381230292455e-06, "loss": 0.6015, "step": 15175 }, { "epoch": 0.46512198112051, "grad_norm": 1.190394947009339, "learning_rate": 5.804248271748104e-06, "loss": 0.6901, "step": 15176 }, { "epoch": 0.4651526296432512, "grad_norm": 1.3610184237560932, "learning_rate": 5.8037584125425e-06, "loss": 0.65, "step": 15177 }, { "epoch": 0.4651832781659924, "grad_norm": 1.3564639247062313, "learning_rate": 5.8032685454172574e-06, "loss": 0.713, "step": 15178 }, { "epoch": 0.4652139266887336, "grad_norm": 0.4480324785821438, "learning_rate": 5.802778670377205e-06, "loss": 0.4125, "step": 15179 }, { "epoch": 0.46524457521147483, "grad_norm": 0.4429109172934191, "learning_rate": 5.802288787427169e-06, "loss": 0.4304, "step": 15180 }, { "epoch": 0.46527522373421604, "grad_norm": 1.2292949443566896, "learning_rate": 5.801798896571975e-06, "loss": 0.7261, "step": 15181 }, { "epoch": 0.4653058722569572, "grad_norm": 1.2190163615603868, "learning_rate": 5.8013089978164535e-06, "loss": 0.6223, "step": 15182 }, { "epoch": 0.4653365207796984, "grad_norm": 1.2433671276713232, "learning_rate": 5.800819091165428e-06, "loss": 0.6462, "step": 15183 }, { "epoch": 0.4653671693024396, "grad_norm": 1.3626781774403032, "learning_rate": 5.800329176623728e-06, "loss": 0.7317, "step": 15184 }, { "epoch": 0.4653978178251808, "grad_norm": 1.1193749366691808, "learning_rate": 5.799839254196179e-06, "loss": 0.6805, "step": 15185 }, { "epoch": 0.465428466347922, "grad_norm": 1.2585114965960493, "learning_rate": 5.79934932388761e-06, "loss": 0.7068, "step": 15186 }, { "epoch": 0.4654591148706632, "grad_norm": 1.244735978341197, "learning_rate": 5.798859385702848e-06, "loss": 0.6094, "step": 15187 }, { "epoch": 0.4654897633934044, "grad_norm": 1.0526466067565392, "learning_rate": 5.798369439646718e-06, "loss": 0.5847, "step": 15188 }, { "epoch": 0.4655204119161456, "grad_norm": 1.618946522840844, "learning_rate": 5.7978794857240506e-06, "loss": 0.7231, "step": 15189 }, { "epoch": 0.46555106043888683, "grad_norm": 1.243227974268876, "learning_rate": 5.797389523939674e-06, "loss": 0.7258, "step": 15190 }, { "epoch": 0.46558170896162804, "grad_norm": 1.2563958059784976, "learning_rate": 5.796899554298413e-06, "loss": 0.6377, "step": 15191 }, { "epoch": 0.46561235748436924, "grad_norm": 1.237992773340123, "learning_rate": 5.796409576805096e-06, "loss": 0.645, "step": 15192 }, { "epoch": 0.46564300600711045, "grad_norm": 1.316932496758083, "learning_rate": 5.795919591464553e-06, "loss": 0.7364, "step": 15193 }, { "epoch": 0.46567365452985165, "grad_norm": 1.3547897711035797, "learning_rate": 5.79542959828161e-06, "loss": 0.5932, "step": 15194 }, { "epoch": 0.46570430305259286, "grad_norm": 1.3218150027889173, "learning_rate": 5.794939597261097e-06, "loss": 0.6571, "step": 15195 }, { "epoch": 0.46573495157533407, "grad_norm": 1.3182402872514327, "learning_rate": 5.794449588407838e-06, "loss": 0.6284, "step": 15196 }, { "epoch": 0.46576560009807527, "grad_norm": 1.1929219708673942, "learning_rate": 5.793959571726666e-06, "loss": 0.6855, "step": 15197 }, { "epoch": 0.4657962486208165, "grad_norm": 1.1630898801725265, "learning_rate": 5.7934695472224066e-06, "loss": 0.5849, "step": 15198 }, { "epoch": 0.4658268971435577, "grad_norm": 1.1980818333942858, "learning_rate": 5.79297951489989e-06, "loss": 0.6624, "step": 15199 }, { "epoch": 0.4658575456662989, "grad_norm": 1.1679658409375264, "learning_rate": 5.792489474763941e-06, "loss": 0.7377, "step": 15200 }, { "epoch": 0.4658881941890401, "grad_norm": 1.1633745357520797, "learning_rate": 5.791999426819393e-06, "loss": 0.5876, "step": 15201 }, { "epoch": 0.4659188427117813, "grad_norm": 1.2441936519594095, "learning_rate": 5.79150937107107e-06, "loss": 0.6326, "step": 15202 }, { "epoch": 0.4659494912345225, "grad_norm": 1.2310562594743417, "learning_rate": 5.7910193075238034e-06, "loss": 0.6136, "step": 15203 }, { "epoch": 0.4659801397572637, "grad_norm": 1.2101971910879534, "learning_rate": 5.790529236182421e-06, "loss": 0.7173, "step": 15204 }, { "epoch": 0.4660107882800049, "grad_norm": 1.1326936274182093, "learning_rate": 5.7900391570517504e-06, "loss": 0.6145, "step": 15205 }, { "epoch": 0.4660414368027461, "grad_norm": 1.1014023210877717, "learning_rate": 5.789549070136625e-06, "loss": 0.5818, "step": 15206 }, { "epoch": 0.46607208532548733, "grad_norm": 1.1288079035529137, "learning_rate": 5.789058975441868e-06, "loss": 0.7203, "step": 15207 }, { "epoch": 0.46610273384822853, "grad_norm": 1.160493159640714, "learning_rate": 5.788568872972312e-06, "loss": 0.6318, "step": 15208 }, { "epoch": 0.46613338237096974, "grad_norm": 1.302255434181251, "learning_rate": 5.788078762732785e-06, "loss": 0.6035, "step": 15209 }, { "epoch": 0.46616403089371095, "grad_norm": 1.3740476220294757, "learning_rate": 5.787588644728117e-06, "loss": 0.6927, "step": 15210 }, { "epoch": 0.46619467941645215, "grad_norm": 0.4963428856316077, "learning_rate": 5.787098518963136e-06, "loss": 0.4254, "step": 15211 }, { "epoch": 0.46622532793919336, "grad_norm": 1.2509867327000181, "learning_rate": 5.786608385442671e-06, "loss": 0.5838, "step": 15212 }, { "epoch": 0.4662559764619345, "grad_norm": 1.236099549172728, "learning_rate": 5.786118244171552e-06, "loss": 0.6622, "step": 15213 }, { "epoch": 0.4662866249846757, "grad_norm": 1.2366891342991921, "learning_rate": 5.7856280951546116e-06, "loss": 0.5547, "step": 15214 }, { "epoch": 0.4663172735074169, "grad_norm": 1.1473905491022678, "learning_rate": 5.785137938396674e-06, "loss": 0.6261, "step": 15215 }, { "epoch": 0.4663479220301581, "grad_norm": 1.3221425566488956, "learning_rate": 5.784647773902574e-06, "loss": 0.6565, "step": 15216 }, { "epoch": 0.46637857055289933, "grad_norm": 1.28422757698795, "learning_rate": 5.784157601677136e-06, "loss": 0.7179, "step": 15217 }, { "epoch": 0.46640921907564054, "grad_norm": 1.3027665842385563, "learning_rate": 5.7836674217251945e-06, "loss": 0.6467, "step": 15218 }, { "epoch": 0.46643986759838174, "grad_norm": 1.2612649007003172, "learning_rate": 5.783177234051576e-06, "loss": 0.6922, "step": 15219 }, { "epoch": 0.46647051612112295, "grad_norm": 0.4367335607717078, "learning_rate": 5.782687038661111e-06, "loss": 0.4288, "step": 15220 }, { "epoch": 0.46650116464386415, "grad_norm": 1.1412411011849874, "learning_rate": 5.782196835558633e-06, "loss": 0.6339, "step": 15221 }, { "epoch": 0.46653181316660536, "grad_norm": 1.1709398199067653, "learning_rate": 5.781706624748968e-06, "loss": 0.6155, "step": 15222 }, { "epoch": 0.46656246168934656, "grad_norm": 1.2688237498152515, "learning_rate": 5.781216406236948e-06, "loss": 0.6946, "step": 15223 }, { "epoch": 0.46659311021208777, "grad_norm": 1.2300065871710881, "learning_rate": 5.780726180027402e-06, "loss": 0.6803, "step": 15224 }, { "epoch": 0.466623758734829, "grad_norm": 0.4670020365013032, "learning_rate": 5.780235946125163e-06, "loss": 0.4256, "step": 15225 }, { "epoch": 0.4666544072575702, "grad_norm": 1.2035005924866289, "learning_rate": 5.779745704535057e-06, "loss": 0.6779, "step": 15226 }, { "epoch": 0.4666850557803114, "grad_norm": 1.1839173530247527, "learning_rate": 5.7792554552619184e-06, "loss": 0.6762, "step": 15227 }, { "epoch": 0.4667157043030526, "grad_norm": 1.342112723844395, "learning_rate": 5.778765198310576e-06, "loss": 0.728, "step": 15228 }, { "epoch": 0.4667463528257938, "grad_norm": 1.3537538623284149, "learning_rate": 5.778274933685863e-06, "loss": 0.607, "step": 15229 }, { "epoch": 0.466777001348535, "grad_norm": 1.6122998160299358, "learning_rate": 5.777784661392606e-06, "loss": 0.6709, "step": 15230 }, { "epoch": 0.4668076498712762, "grad_norm": 1.2420060377384803, "learning_rate": 5.777294381435636e-06, "loss": 0.633, "step": 15231 }, { "epoch": 0.4668382983940174, "grad_norm": 1.4011616903188224, "learning_rate": 5.776804093819789e-06, "loss": 0.6583, "step": 15232 }, { "epoch": 0.4668689469167586, "grad_norm": 1.262510088922014, "learning_rate": 5.776313798549891e-06, "loss": 0.7842, "step": 15233 }, { "epoch": 0.4668995954394998, "grad_norm": 1.3107384832569406, "learning_rate": 5.7758234956307745e-06, "loss": 0.6283, "step": 15234 }, { "epoch": 0.46693024396224103, "grad_norm": 1.1552280899987224, "learning_rate": 5.77533318506727e-06, "loss": 0.7324, "step": 15235 }, { "epoch": 0.46696089248498224, "grad_norm": 1.2173308065015072, "learning_rate": 5.7748428668642095e-06, "loss": 0.7069, "step": 15236 }, { "epoch": 0.46699154100772344, "grad_norm": 0.45343883216403913, "learning_rate": 5.7743525410264256e-06, "loss": 0.4282, "step": 15237 }, { "epoch": 0.46702218953046465, "grad_norm": 1.2091279687459324, "learning_rate": 5.773862207558747e-06, "loss": 0.6799, "step": 15238 }, { "epoch": 0.46705283805320585, "grad_norm": 1.2226225444902845, "learning_rate": 5.773371866466004e-06, "loss": 0.6594, "step": 15239 }, { "epoch": 0.46708348657594706, "grad_norm": 1.3134804771851747, "learning_rate": 5.772881517753033e-06, "loss": 0.7133, "step": 15240 }, { "epoch": 0.46711413509868827, "grad_norm": 0.45338509085007095, "learning_rate": 5.772391161424662e-06, "loss": 0.427, "step": 15241 }, { "epoch": 0.46714478362142947, "grad_norm": 1.3948032023842591, "learning_rate": 5.771900797485723e-06, "loss": 0.6647, "step": 15242 }, { "epoch": 0.4671754321441707, "grad_norm": 1.2394522997089716, "learning_rate": 5.771410425941047e-06, "loss": 0.6067, "step": 15243 }, { "epoch": 0.4672060806669118, "grad_norm": 1.2044033216627226, "learning_rate": 5.770920046795468e-06, "loss": 0.5175, "step": 15244 }, { "epoch": 0.46723672918965303, "grad_norm": 1.4314695695846995, "learning_rate": 5.7704296600538165e-06, "loss": 0.6581, "step": 15245 }, { "epoch": 0.46726737771239424, "grad_norm": 1.1140174443592534, "learning_rate": 5.769939265720923e-06, "loss": 0.5887, "step": 15246 }, { "epoch": 0.46729802623513544, "grad_norm": 1.2376258296972642, "learning_rate": 5.769448863801622e-06, "loss": 0.6627, "step": 15247 }, { "epoch": 0.46732867475787665, "grad_norm": 1.230910890976988, "learning_rate": 5.7689584543007446e-06, "loss": 0.6379, "step": 15248 }, { "epoch": 0.46735932328061786, "grad_norm": 1.2304222692635787, "learning_rate": 5.768468037223124e-06, "loss": 0.6609, "step": 15249 }, { "epoch": 0.46738997180335906, "grad_norm": 1.170981038963347, "learning_rate": 5.767977612573589e-06, "loss": 0.6668, "step": 15250 }, { "epoch": 0.46742062032610027, "grad_norm": 1.1309215052754793, "learning_rate": 5.767487180356974e-06, "loss": 0.6386, "step": 15251 }, { "epoch": 0.4674512688488415, "grad_norm": 0.4563392303446872, "learning_rate": 5.766996740578113e-06, "loss": 0.4248, "step": 15252 }, { "epoch": 0.4674819173715827, "grad_norm": 1.353414420662193, "learning_rate": 5.766506293241837e-06, "loss": 0.6456, "step": 15253 }, { "epoch": 0.4675125658943239, "grad_norm": 1.1870792090026743, "learning_rate": 5.766015838352976e-06, "loss": 0.6207, "step": 15254 }, { "epoch": 0.4675432144170651, "grad_norm": 1.2278279244053838, "learning_rate": 5.765525375916368e-06, "loss": 0.6658, "step": 15255 }, { "epoch": 0.4675738629398063, "grad_norm": 1.2668057198358365, "learning_rate": 5.76503490593684e-06, "loss": 0.7087, "step": 15256 }, { "epoch": 0.4676045114625475, "grad_norm": 1.1626468583844003, "learning_rate": 5.764544428419229e-06, "loss": 0.6314, "step": 15257 }, { "epoch": 0.4676351599852887, "grad_norm": 1.239613759714361, "learning_rate": 5.764053943368365e-06, "loss": 0.6604, "step": 15258 }, { "epoch": 0.4676658085080299, "grad_norm": 1.267531659938915, "learning_rate": 5.7635634507890836e-06, "loss": 0.7074, "step": 15259 }, { "epoch": 0.4676964570307711, "grad_norm": 1.2231026927843967, "learning_rate": 5.763072950686215e-06, "loss": 0.6493, "step": 15260 }, { "epoch": 0.4677271055535123, "grad_norm": 1.3367222539502326, "learning_rate": 5.762582443064593e-06, "loss": 0.6609, "step": 15261 }, { "epoch": 0.46775775407625353, "grad_norm": 0.4786419276996818, "learning_rate": 5.762091927929052e-06, "loss": 0.4153, "step": 15262 }, { "epoch": 0.46778840259899473, "grad_norm": 0.4844076571955364, "learning_rate": 5.761601405284423e-06, "loss": 0.4395, "step": 15263 }, { "epoch": 0.46781905112173594, "grad_norm": 1.133612567096675, "learning_rate": 5.761110875135543e-06, "loss": 0.603, "step": 15264 }, { "epoch": 0.46784969964447715, "grad_norm": 1.1846284802751896, "learning_rate": 5.7606203374872395e-06, "loss": 0.6351, "step": 15265 }, { "epoch": 0.46788034816721835, "grad_norm": 1.1839474459986377, "learning_rate": 5.760129792344351e-06, "loss": 0.6862, "step": 15266 }, { "epoch": 0.46791099668995956, "grad_norm": 1.4591819400163002, "learning_rate": 5.759639239711709e-06, "loss": 0.6586, "step": 15267 }, { "epoch": 0.46794164521270076, "grad_norm": 1.1474457576304846, "learning_rate": 5.7591486795941484e-06, "loss": 0.5657, "step": 15268 }, { "epoch": 0.46797229373544197, "grad_norm": 1.3244988481347653, "learning_rate": 5.758658111996499e-06, "loss": 0.6371, "step": 15269 }, { "epoch": 0.4680029422581832, "grad_norm": 1.3236431376676867, "learning_rate": 5.758167536923599e-06, "loss": 0.5763, "step": 15270 }, { "epoch": 0.4680335907809244, "grad_norm": 0.4451856639860124, "learning_rate": 5.7576769543802805e-06, "loss": 0.4287, "step": 15271 }, { "epoch": 0.4680642393036656, "grad_norm": 1.245324935378706, "learning_rate": 5.7571863643713755e-06, "loss": 0.6366, "step": 15272 }, { "epoch": 0.4680948878264068, "grad_norm": 1.4037699678721152, "learning_rate": 5.756695766901721e-06, "loss": 0.7235, "step": 15273 }, { "epoch": 0.468125536349148, "grad_norm": 1.1790411196305353, "learning_rate": 5.756205161976148e-06, "loss": 0.681, "step": 15274 }, { "epoch": 0.46815618487188915, "grad_norm": 1.1784156520089017, "learning_rate": 5.755714549599495e-06, "loss": 0.6952, "step": 15275 }, { "epoch": 0.46818683339463035, "grad_norm": 1.3050155349512547, "learning_rate": 5.75522392977659e-06, "loss": 0.6483, "step": 15276 }, { "epoch": 0.46821748191737156, "grad_norm": 0.44248504868652655, "learning_rate": 5.754733302512272e-06, "loss": 0.4321, "step": 15277 }, { "epoch": 0.46824813044011276, "grad_norm": 1.265611141310039, "learning_rate": 5.754242667811372e-06, "loss": 0.6649, "step": 15278 }, { "epoch": 0.46827877896285397, "grad_norm": 1.2607860071795536, "learning_rate": 5.753752025678728e-06, "loss": 0.6687, "step": 15279 }, { "epoch": 0.4683094274855952, "grad_norm": 1.3376526494787089, "learning_rate": 5.753261376119172e-06, "loss": 0.752, "step": 15280 }, { "epoch": 0.4683400760083364, "grad_norm": 1.214637313042484, "learning_rate": 5.752770719137538e-06, "loss": 0.6711, "step": 15281 }, { "epoch": 0.4683707245310776, "grad_norm": 0.439961179968532, "learning_rate": 5.752280054738662e-06, "loss": 0.4062, "step": 15282 }, { "epoch": 0.4684013730538188, "grad_norm": 0.4375899324433161, "learning_rate": 5.751789382927379e-06, "loss": 0.4023, "step": 15283 }, { "epoch": 0.46843202157656, "grad_norm": 0.45341926830606905, "learning_rate": 5.751298703708522e-06, "loss": 0.4414, "step": 15284 }, { "epoch": 0.4684626700993012, "grad_norm": 1.1191737997740028, "learning_rate": 5.750808017086927e-06, "loss": 0.5934, "step": 15285 }, { "epoch": 0.4684933186220424, "grad_norm": 1.2884968314843377, "learning_rate": 5.750317323067427e-06, "loss": 0.6473, "step": 15286 }, { "epoch": 0.4685239671447836, "grad_norm": 0.45361622448772143, "learning_rate": 5.74982662165486e-06, "loss": 0.4074, "step": 15287 }, { "epoch": 0.4685546156675248, "grad_norm": 1.2178757171172694, "learning_rate": 5.749335912854059e-06, "loss": 0.6551, "step": 15288 }, { "epoch": 0.468585264190266, "grad_norm": 1.283639950364581, "learning_rate": 5.74884519666986e-06, "loss": 0.7094, "step": 15289 }, { "epoch": 0.46861591271300723, "grad_norm": 1.1661106126426137, "learning_rate": 5.748354473107097e-06, "loss": 0.645, "step": 15290 }, { "epoch": 0.46864656123574844, "grad_norm": 1.2483094894634603, "learning_rate": 5.747863742170607e-06, "loss": 0.6531, "step": 15291 }, { "epoch": 0.46867720975848964, "grad_norm": 1.162294094607967, "learning_rate": 5.7473730038652245e-06, "loss": 0.5581, "step": 15292 }, { "epoch": 0.46870785828123085, "grad_norm": 1.2335518559313996, "learning_rate": 5.746882258195782e-06, "loss": 0.6228, "step": 15293 }, { "epoch": 0.46873850680397205, "grad_norm": 1.141191976659576, "learning_rate": 5.746391505167119e-06, "loss": 0.5973, "step": 15294 }, { "epoch": 0.46876915532671326, "grad_norm": 1.2920638670652882, "learning_rate": 5.74590074478407e-06, "loss": 0.6801, "step": 15295 }, { "epoch": 0.46879980384945447, "grad_norm": 1.0455887302948212, "learning_rate": 5.74540997705147e-06, "loss": 0.5474, "step": 15296 }, { "epoch": 0.46883045237219567, "grad_norm": 1.3370303920870248, "learning_rate": 5.744919201974154e-06, "loss": 0.6659, "step": 15297 }, { "epoch": 0.4688611008949369, "grad_norm": 1.2984632922513126, "learning_rate": 5.744428419556959e-06, "loss": 0.7342, "step": 15298 }, { "epoch": 0.4688917494176781, "grad_norm": 1.2206361252176914, "learning_rate": 5.743937629804721e-06, "loss": 0.7134, "step": 15299 }, { "epoch": 0.4689223979404193, "grad_norm": 1.4048331680366049, "learning_rate": 5.743446832722274e-06, "loss": 0.6138, "step": 15300 }, { "epoch": 0.4689530464631605, "grad_norm": 1.2391202646286035, "learning_rate": 5.742956028314455e-06, "loss": 0.5881, "step": 15301 }, { "epoch": 0.4689836949859017, "grad_norm": 1.2967880979520667, "learning_rate": 5.742465216586102e-06, "loss": 0.65, "step": 15302 }, { "epoch": 0.4690143435086429, "grad_norm": 1.1975797168145852, "learning_rate": 5.741974397542047e-06, "loss": 0.6769, "step": 15303 }, { "epoch": 0.4690449920313841, "grad_norm": 1.2838541637417813, "learning_rate": 5.741483571187129e-06, "loss": 0.6986, "step": 15304 }, { "epoch": 0.4690756405541253, "grad_norm": 1.1037891817333445, "learning_rate": 5.7409927375261845e-06, "loss": 0.6093, "step": 15305 }, { "epoch": 0.4691062890768665, "grad_norm": 1.3562350576245008, "learning_rate": 5.740501896564049e-06, "loss": 0.6291, "step": 15306 }, { "epoch": 0.4691369375996077, "grad_norm": 1.1806592661469908, "learning_rate": 5.740011048305558e-06, "loss": 0.6375, "step": 15307 }, { "epoch": 0.4691675861223489, "grad_norm": 1.2063534414535215, "learning_rate": 5.7395201927555486e-06, "loss": 0.6003, "step": 15308 }, { "epoch": 0.4691982346450901, "grad_norm": 0.5290035188257641, "learning_rate": 5.739029329918859e-06, "loss": 0.4302, "step": 15309 }, { "epoch": 0.4692288831678313, "grad_norm": 1.2101417714250955, "learning_rate": 5.738538459800323e-06, "loss": 0.5155, "step": 15310 }, { "epoch": 0.4692595316905725, "grad_norm": 1.1620450334013352, "learning_rate": 5.73804758240478e-06, "loss": 0.6178, "step": 15311 }, { "epoch": 0.4692901802133137, "grad_norm": 0.476039574810868, "learning_rate": 5.737556697737063e-06, "loss": 0.4192, "step": 15312 }, { "epoch": 0.4693208287360549, "grad_norm": 1.2778143784764608, "learning_rate": 5.737065805802013e-06, "loss": 0.6396, "step": 15313 }, { "epoch": 0.4693514772587961, "grad_norm": 1.1274382167877273, "learning_rate": 5.736574906604465e-06, "loss": 0.7319, "step": 15314 }, { "epoch": 0.4693821257815373, "grad_norm": 1.556215319281614, "learning_rate": 5.736084000149254e-06, "loss": 0.6956, "step": 15315 }, { "epoch": 0.4694127743042785, "grad_norm": 1.1866827187446602, "learning_rate": 5.7355930864412215e-06, "loss": 0.5659, "step": 15316 }, { "epoch": 0.46944342282701973, "grad_norm": 1.4404400140646485, "learning_rate": 5.7351021654852004e-06, "loss": 0.602, "step": 15317 }, { "epoch": 0.46947407134976094, "grad_norm": 0.4557729385162287, "learning_rate": 5.734611237286032e-06, "loss": 0.404, "step": 15318 }, { "epoch": 0.46950471987250214, "grad_norm": 0.45978227692327417, "learning_rate": 5.734120301848548e-06, "loss": 0.4044, "step": 15319 }, { "epoch": 0.46953536839524335, "grad_norm": 1.2666508337387874, "learning_rate": 5.733629359177591e-06, "loss": 0.5587, "step": 15320 }, { "epoch": 0.46956601691798455, "grad_norm": 1.2401655833555545, "learning_rate": 5.733138409277996e-06, "loss": 0.7493, "step": 15321 }, { "epoch": 0.46959666544072576, "grad_norm": 1.1775486347976722, "learning_rate": 5.7326474521546e-06, "loss": 0.6736, "step": 15322 }, { "epoch": 0.46962731396346696, "grad_norm": 1.5021414565323683, "learning_rate": 5.732156487812241e-06, "loss": 0.6644, "step": 15323 }, { "epoch": 0.46965796248620817, "grad_norm": 1.2706257212551586, "learning_rate": 5.731665516255758e-06, "loss": 0.6509, "step": 15324 }, { "epoch": 0.4696886110089494, "grad_norm": 1.3386076120558732, "learning_rate": 5.731174537489986e-06, "loss": 0.729, "step": 15325 }, { "epoch": 0.4697192595316906, "grad_norm": 1.267723450189352, "learning_rate": 5.730683551519764e-06, "loss": 0.5802, "step": 15326 }, { "epoch": 0.4697499080544318, "grad_norm": 1.1765146248417488, "learning_rate": 5.7301925583499314e-06, "loss": 0.5875, "step": 15327 }, { "epoch": 0.469780556577173, "grad_norm": 0.455712300267255, "learning_rate": 5.729701557985325e-06, "loss": 0.4108, "step": 15328 }, { "epoch": 0.4698112050999142, "grad_norm": 1.1434612585508603, "learning_rate": 5.729210550430782e-06, "loss": 0.5591, "step": 15329 }, { "epoch": 0.4698418536226554, "grad_norm": 1.2141145067254218, "learning_rate": 5.728719535691139e-06, "loss": 0.6601, "step": 15330 }, { "epoch": 0.4698725021453966, "grad_norm": 1.2848281774856813, "learning_rate": 5.728228513771238e-06, "loss": 0.6342, "step": 15331 }, { "epoch": 0.4699031506681378, "grad_norm": 1.3596111057568896, "learning_rate": 5.727737484675914e-06, "loss": 0.7347, "step": 15332 }, { "epoch": 0.469933799190879, "grad_norm": 1.110793168992191, "learning_rate": 5.727246448410008e-06, "loss": 0.6201, "step": 15333 }, { "epoch": 0.4699644477136202, "grad_norm": 1.212443996426128, "learning_rate": 5.726755404978355e-06, "loss": 0.6149, "step": 15334 }, { "epoch": 0.46999509623636143, "grad_norm": 1.2971640384435252, "learning_rate": 5.726264354385795e-06, "loss": 0.683, "step": 15335 }, { "epoch": 0.47002574475910264, "grad_norm": 1.2048076954526772, "learning_rate": 5.725773296637167e-06, "loss": 0.5823, "step": 15336 }, { "epoch": 0.47005639328184384, "grad_norm": 1.2545252724464766, "learning_rate": 5.7252822317373105e-06, "loss": 0.5966, "step": 15337 }, { "epoch": 0.470087041804585, "grad_norm": 1.2479488952654978, "learning_rate": 5.724791159691061e-06, "loss": 0.627, "step": 15338 }, { "epoch": 0.4701176903273262, "grad_norm": 1.28048070636787, "learning_rate": 5.7243000805032585e-06, "loss": 0.6219, "step": 15339 }, { "epoch": 0.4701483388500674, "grad_norm": 1.3854525711061045, "learning_rate": 5.723808994178742e-06, "loss": 0.7116, "step": 15340 }, { "epoch": 0.4701789873728086, "grad_norm": 1.1801202394017918, "learning_rate": 5.7233179007223514e-06, "loss": 0.6077, "step": 15341 }, { "epoch": 0.4702096358955498, "grad_norm": 1.2530533654900018, "learning_rate": 5.722826800138924e-06, "loss": 0.6738, "step": 15342 }, { "epoch": 0.470240284418291, "grad_norm": 1.247032806869981, "learning_rate": 5.7223356924332986e-06, "loss": 0.622, "step": 15343 }, { "epoch": 0.4702709329410322, "grad_norm": 1.2807471675839803, "learning_rate": 5.721844577610315e-06, "loss": 0.7255, "step": 15344 }, { "epoch": 0.47030158146377343, "grad_norm": 1.3024010463406668, "learning_rate": 5.721353455674813e-06, "loss": 0.7102, "step": 15345 }, { "epoch": 0.47033222998651464, "grad_norm": 1.371552752754659, "learning_rate": 5.7208623266316296e-06, "loss": 0.7668, "step": 15346 }, { "epoch": 0.47036287850925584, "grad_norm": 0.47591919223908635, "learning_rate": 5.720371190485605e-06, "loss": 0.4024, "step": 15347 }, { "epoch": 0.47039352703199705, "grad_norm": 1.2778469529679928, "learning_rate": 5.719880047241582e-06, "loss": 0.6275, "step": 15348 }, { "epoch": 0.47042417555473826, "grad_norm": 1.1000194096412312, "learning_rate": 5.719388896904393e-06, "loss": 0.6133, "step": 15349 }, { "epoch": 0.47045482407747946, "grad_norm": 1.235892901438135, "learning_rate": 5.718897739478883e-06, "loss": 0.6056, "step": 15350 }, { "epoch": 0.47048547260022067, "grad_norm": 1.248446186820869, "learning_rate": 5.718406574969888e-06, "loss": 0.6884, "step": 15351 }, { "epoch": 0.4705161211229619, "grad_norm": 1.228620179874873, "learning_rate": 5.717915403382251e-06, "loss": 0.7072, "step": 15352 }, { "epoch": 0.4705467696457031, "grad_norm": 1.2531722809644061, "learning_rate": 5.717424224720809e-06, "loss": 0.7555, "step": 15353 }, { "epoch": 0.4705774181684443, "grad_norm": 1.1600374064375198, "learning_rate": 5.716933038990402e-06, "loss": 0.5835, "step": 15354 }, { "epoch": 0.4706080666911855, "grad_norm": 1.3291428888055155, "learning_rate": 5.71644184619587e-06, "loss": 0.6404, "step": 15355 }, { "epoch": 0.4706387152139267, "grad_norm": 1.195798961754971, "learning_rate": 5.715950646342055e-06, "loss": 0.708, "step": 15356 }, { "epoch": 0.4706693637366679, "grad_norm": 1.2399526172955626, "learning_rate": 5.715459439433795e-06, "loss": 0.6787, "step": 15357 }, { "epoch": 0.4707000122594091, "grad_norm": 1.1766340699847317, "learning_rate": 5.714968225475927e-06, "loss": 0.6687, "step": 15358 }, { "epoch": 0.4707306607821503, "grad_norm": 1.1456274192627924, "learning_rate": 5.7144770044732976e-06, "loss": 0.5517, "step": 15359 }, { "epoch": 0.4707613093048915, "grad_norm": 1.3830302534863512, "learning_rate": 5.7139857764307424e-06, "loss": 0.5804, "step": 15360 }, { "epoch": 0.4707919578276327, "grad_norm": 1.2080760028330417, "learning_rate": 5.713494541353103e-06, "loss": 0.6881, "step": 15361 }, { "epoch": 0.47082260635037393, "grad_norm": 1.2668068065177391, "learning_rate": 5.713003299245219e-06, "loss": 0.7098, "step": 15362 }, { "epoch": 0.47085325487311513, "grad_norm": 1.55672480770107, "learning_rate": 5.712512050111931e-06, "loss": 0.6248, "step": 15363 }, { "epoch": 0.47088390339585634, "grad_norm": 1.2674580544586016, "learning_rate": 5.71202079395808e-06, "loss": 0.6661, "step": 15364 }, { "epoch": 0.47091455191859755, "grad_norm": 0.5133867507628077, "learning_rate": 5.711529530788505e-06, "loss": 0.4204, "step": 15365 }, { "epoch": 0.47094520044133875, "grad_norm": 1.2322356244571933, "learning_rate": 5.711038260608047e-06, "loss": 0.5754, "step": 15366 }, { "epoch": 0.47097584896407996, "grad_norm": 1.0827204371874883, "learning_rate": 5.7105469834215485e-06, "loss": 0.6344, "step": 15367 }, { "epoch": 0.47100649748682116, "grad_norm": 1.3442701268455355, "learning_rate": 5.710055699233848e-06, "loss": 0.6465, "step": 15368 }, { "epoch": 0.4710371460095623, "grad_norm": 1.291681197812067, "learning_rate": 5.709564408049787e-06, "loss": 0.6647, "step": 15369 }, { "epoch": 0.4710677945323035, "grad_norm": 1.3356411180092866, "learning_rate": 5.709073109874207e-06, "loss": 0.7168, "step": 15370 }, { "epoch": 0.4710984430550447, "grad_norm": 1.4159365446001666, "learning_rate": 5.708581804711947e-06, "loss": 0.6791, "step": 15371 }, { "epoch": 0.47112909157778593, "grad_norm": 1.2557544328127757, "learning_rate": 5.708090492567851e-06, "loss": 0.7222, "step": 15372 }, { "epoch": 0.47115974010052714, "grad_norm": 0.4208960735432619, "learning_rate": 5.707599173446756e-06, "loss": 0.3966, "step": 15373 }, { "epoch": 0.47119038862326834, "grad_norm": 1.4116103094160144, "learning_rate": 5.707107847353508e-06, "loss": 0.646, "step": 15374 }, { "epoch": 0.47122103714600955, "grad_norm": 1.2095761180730287, "learning_rate": 5.706616514292944e-06, "loss": 0.5658, "step": 15375 }, { "epoch": 0.47125168566875075, "grad_norm": 0.46554100185816083, "learning_rate": 5.7061251742699066e-06, "loss": 0.4277, "step": 15376 }, { "epoch": 0.47128233419149196, "grad_norm": 0.46068043182409224, "learning_rate": 5.705633827289236e-06, "loss": 0.4019, "step": 15377 }, { "epoch": 0.47131298271423316, "grad_norm": 1.265924183857583, "learning_rate": 5.705142473355777e-06, "loss": 0.6146, "step": 15378 }, { "epoch": 0.47134363123697437, "grad_norm": 1.316538223704068, "learning_rate": 5.7046511124743676e-06, "loss": 0.6887, "step": 15379 }, { "epoch": 0.4713742797597156, "grad_norm": 1.2810155243935155, "learning_rate": 5.7041597446498505e-06, "loss": 0.6633, "step": 15380 }, { "epoch": 0.4714049282824568, "grad_norm": 1.3863640184841215, "learning_rate": 5.703668369887068e-06, "loss": 0.7435, "step": 15381 }, { "epoch": 0.471435576805198, "grad_norm": 0.4959426609810848, "learning_rate": 5.703176988190861e-06, "loss": 0.4228, "step": 15382 }, { "epoch": 0.4714662253279392, "grad_norm": 1.4342163984310585, "learning_rate": 5.70268559956607e-06, "loss": 0.6276, "step": 15383 }, { "epoch": 0.4714968738506804, "grad_norm": 1.1469964468397977, "learning_rate": 5.702194204017539e-06, "loss": 0.6487, "step": 15384 }, { "epoch": 0.4715275223734216, "grad_norm": 1.1831467571814818, "learning_rate": 5.7017028015501075e-06, "loss": 0.6714, "step": 15385 }, { "epoch": 0.4715581708961628, "grad_norm": 1.2188821446313558, "learning_rate": 5.701211392168619e-06, "loss": 0.6333, "step": 15386 }, { "epoch": 0.471588819418904, "grad_norm": 1.2762628693253986, "learning_rate": 5.700719975877917e-06, "loss": 0.6357, "step": 15387 }, { "epoch": 0.4716194679416452, "grad_norm": 1.202028910116638, "learning_rate": 5.70022855268284e-06, "loss": 0.7373, "step": 15388 }, { "epoch": 0.4716501164643864, "grad_norm": 1.2600635372422861, "learning_rate": 5.6997371225882335e-06, "loss": 0.6728, "step": 15389 }, { "epoch": 0.47168076498712763, "grad_norm": 0.4888726612003509, "learning_rate": 5.699245685598936e-06, "loss": 0.4169, "step": 15390 }, { "epoch": 0.47171141350986884, "grad_norm": 1.1591921477814253, "learning_rate": 5.698754241719795e-06, "loss": 0.7062, "step": 15391 }, { "epoch": 0.47174206203261004, "grad_norm": 1.1861569783951758, "learning_rate": 5.698262790955647e-06, "loss": 0.6774, "step": 15392 }, { "epoch": 0.47177271055535125, "grad_norm": 1.2206789699654035, "learning_rate": 5.697771333311338e-06, "loss": 0.6094, "step": 15393 }, { "epoch": 0.47180335907809245, "grad_norm": 1.401522076028923, "learning_rate": 5.697279868791709e-06, "loss": 0.675, "step": 15394 }, { "epoch": 0.47183400760083366, "grad_norm": 1.2244001084346643, "learning_rate": 5.696788397401603e-06, "loss": 0.583, "step": 15395 }, { "epoch": 0.47186465612357487, "grad_norm": 1.4919014144337206, "learning_rate": 5.696296919145864e-06, "loss": 0.6325, "step": 15396 }, { "epoch": 0.47189530464631607, "grad_norm": 1.297463885711251, "learning_rate": 5.695805434029331e-06, "loss": 0.6196, "step": 15397 }, { "epoch": 0.4719259531690573, "grad_norm": 1.3711812364554246, "learning_rate": 5.695313942056852e-06, "loss": 0.6589, "step": 15398 }, { "epoch": 0.4719566016917985, "grad_norm": 1.2760274284602549, "learning_rate": 5.6948224432332655e-06, "loss": 0.6632, "step": 15399 }, { "epoch": 0.47198725021453963, "grad_norm": 1.1880235200156384, "learning_rate": 5.694330937563415e-06, "loss": 0.6383, "step": 15400 }, { "epoch": 0.47201789873728084, "grad_norm": 1.2631601634215137, "learning_rate": 5.693839425052145e-06, "loss": 0.6478, "step": 15401 }, { "epoch": 0.47204854726002204, "grad_norm": 1.2094835340305623, "learning_rate": 5.6933479057042975e-06, "loss": 0.6627, "step": 15402 }, { "epoch": 0.47207919578276325, "grad_norm": 0.44838548880985724, "learning_rate": 5.692856379524715e-06, "loss": 0.4273, "step": 15403 }, { "epoch": 0.47210984430550446, "grad_norm": 1.2599967159475673, "learning_rate": 5.6923648465182415e-06, "loss": 0.5314, "step": 15404 }, { "epoch": 0.47214049282824566, "grad_norm": 1.263206265504456, "learning_rate": 5.6918733066897195e-06, "loss": 0.6819, "step": 15405 }, { "epoch": 0.47217114135098687, "grad_norm": 1.3559688671813888, "learning_rate": 5.691381760043995e-06, "loss": 0.5792, "step": 15406 }, { "epoch": 0.4722017898737281, "grad_norm": 0.4685053719942825, "learning_rate": 5.6908902065859065e-06, "loss": 0.4172, "step": 15407 }, { "epoch": 0.4722324383964693, "grad_norm": 1.1574220858161213, "learning_rate": 5.690398646320302e-06, "loss": 0.722, "step": 15408 }, { "epoch": 0.4722630869192105, "grad_norm": 1.2852863214125139, "learning_rate": 5.6899070792520206e-06, "loss": 0.6332, "step": 15409 }, { "epoch": 0.4722937354419517, "grad_norm": 1.136177206680047, "learning_rate": 5.68941550538591e-06, "loss": 0.6659, "step": 15410 }, { "epoch": 0.4723243839646929, "grad_norm": 1.1007665631224235, "learning_rate": 5.6889239247268125e-06, "loss": 0.6681, "step": 15411 }, { "epoch": 0.4723550324874341, "grad_norm": 1.2602093092752404, "learning_rate": 5.68843233727957e-06, "loss": 0.6215, "step": 15412 }, { "epoch": 0.4723856810101753, "grad_norm": 1.3437373279210851, "learning_rate": 5.687940743049028e-06, "loss": 0.5496, "step": 15413 }, { "epoch": 0.4724163295329165, "grad_norm": 1.3367907002323736, "learning_rate": 5.6874491420400305e-06, "loss": 0.7137, "step": 15414 }, { "epoch": 0.4724469780556577, "grad_norm": 1.2528006835785965, "learning_rate": 5.68695753425742e-06, "loss": 0.6916, "step": 15415 }, { "epoch": 0.4724776265783989, "grad_norm": 0.4686439792757533, "learning_rate": 5.686465919706041e-06, "loss": 0.4327, "step": 15416 }, { "epoch": 0.47250827510114013, "grad_norm": 1.4108520235009605, "learning_rate": 5.685974298390739e-06, "loss": 0.6442, "step": 15417 }, { "epoch": 0.47253892362388134, "grad_norm": 1.2444619400917856, "learning_rate": 5.685482670316356e-06, "loss": 0.6813, "step": 15418 }, { "epoch": 0.47256957214662254, "grad_norm": 1.2502303319805823, "learning_rate": 5.6849910354877355e-06, "loss": 0.6969, "step": 15419 }, { "epoch": 0.47260022066936375, "grad_norm": 1.196441531077692, "learning_rate": 5.684499393909724e-06, "loss": 0.5644, "step": 15420 }, { "epoch": 0.47263086919210495, "grad_norm": 0.45708832534686994, "learning_rate": 5.6840077455871655e-06, "loss": 0.4133, "step": 15421 }, { "epoch": 0.47266151771484616, "grad_norm": 1.227529742092912, "learning_rate": 5.683516090524903e-06, "loss": 0.627, "step": 15422 }, { "epoch": 0.47269216623758736, "grad_norm": 1.3526834998896788, "learning_rate": 5.6830244287277814e-06, "loss": 0.5923, "step": 15423 }, { "epoch": 0.47272281476032857, "grad_norm": 1.2052457898269433, "learning_rate": 5.682532760200646e-06, "loss": 0.6153, "step": 15424 }, { "epoch": 0.4727534632830698, "grad_norm": 1.281000496787048, "learning_rate": 5.68204108494834e-06, "loss": 0.7943, "step": 15425 }, { "epoch": 0.472784111805811, "grad_norm": 1.4793284832058906, "learning_rate": 5.681549402975709e-06, "loss": 0.7329, "step": 15426 }, { "epoch": 0.4728147603285522, "grad_norm": 1.1744236054656336, "learning_rate": 5.681057714287597e-06, "loss": 0.5815, "step": 15427 }, { "epoch": 0.4728454088512934, "grad_norm": 1.2217955993389387, "learning_rate": 5.68056601888885e-06, "loss": 0.6342, "step": 15428 }, { "epoch": 0.4728760573740346, "grad_norm": 0.5033120128277467, "learning_rate": 5.680074316784312e-06, "loss": 0.4087, "step": 15429 }, { "epoch": 0.4729067058967758, "grad_norm": 1.1704875565772792, "learning_rate": 5.679582607978826e-06, "loss": 0.5521, "step": 15430 }, { "epoch": 0.47293735441951695, "grad_norm": 1.2051161966607122, "learning_rate": 5.6790908924772395e-06, "loss": 0.7126, "step": 15431 }, { "epoch": 0.47296800294225816, "grad_norm": 0.4499661798188875, "learning_rate": 5.6785991702843976e-06, "loss": 0.4164, "step": 15432 }, { "epoch": 0.47299865146499936, "grad_norm": 1.2687332189519716, "learning_rate": 5.6781074414051444e-06, "loss": 0.6728, "step": 15433 }, { "epoch": 0.47302929998774057, "grad_norm": 1.3294385541619222, "learning_rate": 5.677615705844325e-06, "loss": 0.6661, "step": 15434 }, { "epoch": 0.4730599485104818, "grad_norm": 1.2232345138293461, "learning_rate": 5.677123963606783e-06, "loss": 0.5448, "step": 15435 }, { "epoch": 0.473090597033223, "grad_norm": 1.0544570571036078, "learning_rate": 5.676632214697367e-06, "loss": 0.6253, "step": 15436 }, { "epoch": 0.4731212455559642, "grad_norm": 1.2777866046191675, "learning_rate": 5.67614045912092e-06, "loss": 0.6793, "step": 15437 }, { "epoch": 0.4731518940787054, "grad_norm": 1.343507003530994, "learning_rate": 5.675648696882288e-06, "loss": 0.6986, "step": 15438 }, { "epoch": 0.4731825426014466, "grad_norm": 1.26271672698488, "learning_rate": 5.675156927986316e-06, "loss": 0.653, "step": 15439 }, { "epoch": 0.4732131911241878, "grad_norm": 1.1941150129504787, "learning_rate": 5.674665152437851e-06, "loss": 0.6173, "step": 15440 }, { "epoch": 0.473243839646929, "grad_norm": 0.5117658299776285, "learning_rate": 5.674173370241738e-06, "loss": 0.4085, "step": 15441 }, { "epoch": 0.4732744881696702, "grad_norm": 1.2209560936938073, "learning_rate": 5.673681581402821e-06, "loss": 0.6545, "step": 15442 }, { "epoch": 0.4733051366924114, "grad_norm": 1.3372000587690183, "learning_rate": 5.6731897859259475e-06, "loss": 0.7189, "step": 15443 }, { "epoch": 0.4733357852151526, "grad_norm": 1.255797889623618, "learning_rate": 5.672697983815962e-06, "loss": 0.6421, "step": 15444 }, { "epoch": 0.47336643373789383, "grad_norm": 0.44751546469941733, "learning_rate": 5.672206175077715e-06, "loss": 0.4105, "step": 15445 }, { "epoch": 0.47339708226063504, "grad_norm": 1.3139570366707325, "learning_rate": 5.671714359716044e-06, "loss": 0.6739, "step": 15446 }, { "epoch": 0.47342773078337624, "grad_norm": 1.2719306338448315, "learning_rate": 5.671222537735802e-06, "loss": 0.716, "step": 15447 }, { "epoch": 0.47345837930611745, "grad_norm": 1.238224128153915, "learning_rate": 5.670730709141831e-06, "loss": 0.7002, "step": 15448 }, { "epoch": 0.47348902782885866, "grad_norm": 1.4069585129884117, "learning_rate": 5.67023887393898e-06, "loss": 0.706, "step": 15449 }, { "epoch": 0.47351967635159986, "grad_norm": 1.2164164949003347, "learning_rate": 5.6697470321320936e-06, "loss": 0.5921, "step": 15450 }, { "epoch": 0.47355032487434107, "grad_norm": 1.3694615271746866, "learning_rate": 5.669255183726019e-06, "loss": 0.7015, "step": 15451 }, { "epoch": 0.47358097339708227, "grad_norm": 1.399705161643983, "learning_rate": 5.6687633287256e-06, "loss": 0.7263, "step": 15452 }, { "epoch": 0.4736116219198235, "grad_norm": 0.5213873855919688, "learning_rate": 5.668271467135686e-06, "loss": 0.4545, "step": 15453 }, { "epoch": 0.4736422704425647, "grad_norm": 1.0862099142599364, "learning_rate": 5.667779598961122e-06, "loss": 0.5971, "step": 15454 }, { "epoch": 0.4736729189653059, "grad_norm": 1.4846305942538467, "learning_rate": 5.667287724206754e-06, "loss": 0.7044, "step": 15455 }, { "epoch": 0.4737035674880471, "grad_norm": 1.4319820672533694, "learning_rate": 5.666795842877432e-06, "loss": 0.5985, "step": 15456 }, { "epoch": 0.4737342160107883, "grad_norm": 1.2976726080271583, "learning_rate": 5.666303954977998e-06, "loss": 0.6448, "step": 15457 }, { "epoch": 0.4737648645335295, "grad_norm": 1.1462885508662184, "learning_rate": 5.6658120605133004e-06, "loss": 0.6274, "step": 15458 }, { "epoch": 0.4737955130562707, "grad_norm": 1.2765875259257318, "learning_rate": 5.6653201594881855e-06, "loss": 0.6779, "step": 15459 }, { "epoch": 0.4738261615790119, "grad_norm": 1.1931563404869538, "learning_rate": 5.664828251907504e-06, "loss": 0.6832, "step": 15460 }, { "epoch": 0.4738568101017531, "grad_norm": 1.3410167078824566, "learning_rate": 5.664336337776097e-06, "loss": 0.6295, "step": 15461 }, { "epoch": 0.4738874586244943, "grad_norm": 1.1022727490189907, "learning_rate": 5.6638444170988136e-06, "loss": 0.6065, "step": 15462 }, { "epoch": 0.4739181071472355, "grad_norm": 0.447208430140338, "learning_rate": 5.6633524898805015e-06, "loss": 0.4241, "step": 15463 }, { "epoch": 0.4739487556699767, "grad_norm": 1.238456015367665, "learning_rate": 5.662860556126009e-06, "loss": 0.6653, "step": 15464 }, { "epoch": 0.4739794041927179, "grad_norm": 1.145070896344748, "learning_rate": 5.662368615840182e-06, "loss": 0.4951, "step": 15465 }, { "epoch": 0.4740100527154591, "grad_norm": 0.48034881664339657, "learning_rate": 5.661876669027866e-06, "loss": 0.4044, "step": 15466 }, { "epoch": 0.4740407012382003, "grad_norm": 1.1424235829337825, "learning_rate": 5.661384715693911e-06, "loss": 0.5522, "step": 15467 }, { "epoch": 0.4740713497609415, "grad_norm": 1.2074629056506117, "learning_rate": 5.660892755843162e-06, "loss": 0.6719, "step": 15468 }, { "epoch": 0.4741019982836827, "grad_norm": 0.4403845491582517, "learning_rate": 5.6604007894804695e-06, "loss": 0.4201, "step": 15469 }, { "epoch": 0.4741326468064239, "grad_norm": 1.152582947631399, "learning_rate": 5.659908816610677e-06, "loss": 0.6597, "step": 15470 }, { "epoch": 0.4741632953291651, "grad_norm": 1.119818684490016, "learning_rate": 5.659416837238636e-06, "loss": 0.6418, "step": 15471 }, { "epoch": 0.47419394385190633, "grad_norm": 1.3772558200542808, "learning_rate": 5.658924851369191e-06, "loss": 0.6598, "step": 15472 }, { "epoch": 0.47422459237464754, "grad_norm": 1.2769770572713492, "learning_rate": 5.658432859007191e-06, "loss": 0.6985, "step": 15473 }, { "epoch": 0.47425524089738874, "grad_norm": 1.4243567564309791, "learning_rate": 5.657940860157484e-06, "loss": 0.6597, "step": 15474 }, { "epoch": 0.47428588942012995, "grad_norm": 1.221773009425129, "learning_rate": 5.657448854824916e-06, "loss": 0.5956, "step": 15475 }, { "epoch": 0.47431653794287115, "grad_norm": 1.361434737278917, "learning_rate": 5.656956843014338e-06, "loss": 0.6783, "step": 15476 }, { "epoch": 0.47434718646561236, "grad_norm": 1.2843816716198255, "learning_rate": 5.656464824730595e-06, "loss": 0.7138, "step": 15477 }, { "epoch": 0.47437783498835356, "grad_norm": 1.0975807223151455, "learning_rate": 5.655972799978536e-06, "loss": 0.6204, "step": 15478 }, { "epoch": 0.47440848351109477, "grad_norm": 0.4827473719011471, "learning_rate": 5.655480768763011e-06, "loss": 0.4407, "step": 15479 }, { "epoch": 0.474439132033836, "grad_norm": 0.4685970044292859, "learning_rate": 5.654988731088864e-06, "loss": 0.4031, "step": 15480 }, { "epoch": 0.4744697805565772, "grad_norm": 1.2155274663519924, "learning_rate": 5.654496686960946e-06, "loss": 0.6956, "step": 15481 }, { "epoch": 0.4745004290793184, "grad_norm": 0.4587627749820962, "learning_rate": 5.654004636384105e-06, "loss": 0.427, "step": 15482 }, { "epoch": 0.4745310776020596, "grad_norm": 1.2713584943104967, "learning_rate": 5.653512579363189e-06, "loss": 0.6511, "step": 15483 }, { "epoch": 0.4745617261248008, "grad_norm": 1.240106140232349, "learning_rate": 5.653020515903047e-06, "loss": 0.7035, "step": 15484 }, { "epoch": 0.474592374647542, "grad_norm": 0.447014924207716, "learning_rate": 5.652528446008526e-06, "loss": 0.3934, "step": 15485 }, { "epoch": 0.4746230231702832, "grad_norm": 1.2774416058401135, "learning_rate": 5.652036369684476e-06, "loss": 0.6318, "step": 15486 }, { "epoch": 0.4746536716930244, "grad_norm": 1.4035678876099662, "learning_rate": 5.651544286935744e-06, "loss": 0.6331, "step": 15487 }, { "epoch": 0.4746843202157656, "grad_norm": 1.329917881454777, "learning_rate": 5.65105219776718e-06, "loss": 0.6838, "step": 15488 }, { "epoch": 0.4747149687385068, "grad_norm": 1.2716255392014773, "learning_rate": 5.650560102183631e-06, "loss": 0.6069, "step": 15489 }, { "epoch": 0.47474561726124803, "grad_norm": 1.525454417623191, "learning_rate": 5.6500680001899474e-06, "loss": 0.7117, "step": 15490 }, { "epoch": 0.47477626578398924, "grad_norm": 1.1612136909006052, "learning_rate": 5.649575891790978e-06, "loss": 0.6532, "step": 15491 }, { "epoch": 0.47480691430673044, "grad_norm": 1.2548769415230039, "learning_rate": 5.649083776991571e-06, "loss": 0.6042, "step": 15492 }, { "epoch": 0.4748375628294716, "grad_norm": 0.48151885555307367, "learning_rate": 5.6485916557965755e-06, "loss": 0.4193, "step": 15493 }, { "epoch": 0.4748682113522128, "grad_norm": 1.3533402314973575, "learning_rate": 5.648099528210839e-06, "loss": 0.6401, "step": 15494 }, { "epoch": 0.474898859874954, "grad_norm": 1.2620141595110632, "learning_rate": 5.647607394239215e-06, "loss": 0.7006, "step": 15495 }, { "epoch": 0.4749295083976952, "grad_norm": 1.07805196953, "learning_rate": 5.647115253886547e-06, "loss": 0.6874, "step": 15496 }, { "epoch": 0.4749601569204364, "grad_norm": 1.1590544763253676, "learning_rate": 5.646623107157688e-06, "loss": 0.6723, "step": 15497 }, { "epoch": 0.4749908054431776, "grad_norm": 1.2442399552714507, "learning_rate": 5.646130954057485e-06, "loss": 0.7077, "step": 15498 }, { "epoch": 0.4750214539659188, "grad_norm": 1.2534459506362088, "learning_rate": 5.645638794590791e-06, "loss": 0.6375, "step": 15499 }, { "epoch": 0.47505210248866003, "grad_norm": 1.6315978067244639, "learning_rate": 5.6451466287624505e-06, "loss": 0.6455, "step": 15500 }, { "epoch": 0.47508275101140124, "grad_norm": 0.45564486451573777, "learning_rate": 5.644654456577315e-06, "loss": 0.4443, "step": 15501 }, { "epoch": 0.47511339953414244, "grad_norm": 1.1783088150793715, "learning_rate": 5.6441622780402365e-06, "loss": 0.6957, "step": 15502 }, { "epoch": 0.47514404805688365, "grad_norm": 1.1517428244312773, "learning_rate": 5.64367009315606e-06, "loss": 0.7213, "step": 15503 }, { "epoch": 0.47517469657962486, "grad_norm": 1.138073252232885, "learning_rate": 5.643177901929637e-06, "loss": 0.6226, "step": 15504 }, { "epoch": 0.47520534510236606, "grad_norm": 1.2504541768088637, "learning_rate": 5.64268570436582e-06, "loss": 0.6359, "step": 15505 }, { "epoch": 0.47523599362510727, "grad_norm": 1.44609920590284, "learning_rate": 5.642193500469455e-06, "loss": 0.6784, "step": 15506 }, { "epoch": 0.4752666421478485, "grad_norm": 1.207757448076619, "learning_rate": 5.641701290245391e-06, "loss": 0.6294, "step": 15507 }, { "epoch": 0.4752972906705897, "grad_norm": 1.124420783509047, "learning_rate": 5.641209073698483e-06, "loss": 0.5963, "step": 15508 }, { "epoch": 0.4753279391933309, "grad_norm": 1.3290947701859925, "learning_rate": 5.640716850833576e-06, "loss": 0.7426, "step": 15509 }, { "epoch": 0.4753585877160721, "grad_norm": 0.44820731686734816, "learning_rate": 5.640224621655526e-06, "loss": 0.4239, "step": 15510 }, { "epoch": 0.4753892362388133, "grad_norm": 0.4382473206842786, "learning_rate": 5.6397323861691744e-06, "loss": 0.4342, "step": 15511 }, { "epoch": 0.4754198847615545, "grad_norm": 1.2099974454168458, "learning_rate": 5.639240144379378e-06, "loss": 0.6521, "step": 15512 }, { "epoch": 0.4754505332842957, "grad_norm": 1.1645223046867883, "learning_rate": 5.638747896290984e-06, "loss": 0.6655, "step": 15513 }, { "epoch": 0.4754811818070369, "grad_norm": 1.2563224646719926, "learning_rate": 5.638255641908844e-06, "loss": 0.6166, "step": 15514 }, { "epoch": 0.4755118303297781, "grad_norm": 0.4473832597377591, "learning_rate": 5.6377633812378076e-06, "loss": 0.4122, "step": 15515 }, { "epoch": 0.4755424788525193, "grad_norm": 1.1539391535186114, "learning_rate": 5.6372711142827255e-06, "loss": 0.6086, "step": 15516 }, { "epoch": 0.47557312737526053, "grad_norm": 0.46428312737407074, "learning_rate": 5.636778841048447e-06, "loss": 0.4409, "step": 15517 }, { "epoch": 0.47560377589800173, "grad_norm": 1.2825917085824008, "learning_rate": 5.6362865615398255e-06, "loss": 0.6256, "step": 15518 }, { "epoch": 0.47563442442074294, "grad_norm": 0.446345023067109, "learning_rate": 5.635794275761709e-06, "loss": 0.4168, "step": 15519 }, { "epoch": 0.47566507294348415, "grad_norm": 1.4018835013491961, "learning_rate": 5.635301983718948e-06, "loss": 0.5712, "step": 15520 }, { "epoch": 0.47569572146622535, "grad_norm": 1.1653757571236258, "learning_rate": 5.634809685416396e-06, "loss": 0.6498, "step": 15521 }, { "epoch": 0.47572636998896656, "grad_norm": 1.2137185569692512, "learning_rate": 5.6343173808589e-06, "loss": 0.6549, "step": 15522 }, { "epoch": 0.47575701851170776, "grad_norm": 1.2327958754292505, "learning_rate": 5.633825070051314e-06, "loss": 0.663, "step": 15523 }, { "epoch": 0.4757876670344489, "grad_norm": 1.2911828396644, "learning_rate": 5.633332752998486e-06, "loss": 0.5897, "step": 15524 }, { "epoch": 0.4758183155571901, "grad_norm": 1.1702110001625456, "learning_rate": 5.632840429705269e-06, "loss": 0.6377, "step": 15525 }, { "epoch": 0.4758489640799313, "grad_norm": 1.4013537774220373, "learning_rate": 5.632348100176513e-06, "loss": 0.7515, "step": 15526 }, { "epoch": 0.47587961260267253, "grad_norm": 0.4843529029637811, "learning_rate": 5.63185576441707e-06, "loss": 0.451, "step": 15527 }, { "epoch": 0.47591026112541374, "grad_norm": 1.2262011393958556, "learning_rate": 5.631363422431789e-06, "loss": 0.5925, "step": 15528 }, { "epoch": 0.47594090964815494, "grad_norm": 1.291850370486008, "learning_rate": 5.6308710742255255e-06, "loss": 0.6196, "step": 15529 }, { "epoch": 0.47597155817089615, "grad_norm": 1.2450401705953524, "learning_rate": 5.630378719803126e-06, "loss": 0.6696, "step": 15530 }, { "epoch": 0.47600220669363735, "grad_norm": 1.4660326572803277, "learning_rate": 5.629886359169443e-06, "loss": 0.6538, "step": 15531 }, { "epoch": 0.47603285521637856, "grad_norm": 1.1144426386849688, "learning_rate": 5.62939399232933e-06, "loss": 0.5993, "step": 15532 }, { "epoch": 0.47606350373911976, "grad_norm": 1.1560301451469281, "learning_rate": 5.628901619287636e-06, "loss": 0.5647, "step": 15533 }, { "epoch": 0.47609415226186097, "grad_norm": 1.3233380528950964, "learning_rate": 5.628409240049214e-06, "loss": 0.6669, "step": 15534 }, { "epoch": 0.4761248007846022, "grad_norm": 1.2234388406727086, "learning_rate": 5.627916854618914e-06, "loss": 0.6473, "step": 15535 }, { "epoch": 0.4761554493073434, "grad_norm": 1.2547328499278778, "learning_rate": 5.62742446300159e-06, "loss": 0.6838, "step": 15536 }, { "epoch": 0.4761860978300846, "grad_norm": 1.3923197777544611, "learning_rate": 5.626932065202091e-06, "loss": 0.6586, "step": 15537 }, { "epoch": 0.4762167463528258, "grad_norm": 1.298096552255394, "learning_rate": 5.626439661225272e-06, "loss": 0.5954, "step": 15538 }, { "epoch": 0.476247394875567, "grad_norm": 1.2748252355178025, "learning_rate": 5.625947251075979e-06, "loss": 0.7244, "step": 15539 }, { "epoch": 0.4762780433983082, "grad_norm": 1.2525854783258288, "learning_rate": 5.6254548347590695e-06, "loss": 0.661, "step": 15540 }, { "epoch": 0.4763086919210494, "grad_norm": 1.279599661691752, "learning_rate": 5.624962412279395e-06, "loss": 0.6654, "step": 15541 }, { "epoch": 0.4763393404437906, "grad_norm": 1.3400884776726738, "learning_rate": 5.624469983641805e-06, "loss": 0.514, "step": 15542 }, { "epoch": 0.4763699889665318, "grad_norm": 1.163647961466877, "learning_rate": 5.623977548851151e-06, "loss": 0.5434, "step": 15543 }, { "epoch": 0.476400637489273, "grad_norm": 1.1844623248981339, "learning_rate": 5.623485107912289e-06, "loss": 0.5974, "step": 15544 }, { "epoch": 0.47643128601201423, "grad_norm": 1.340246546597912, "learning_rate": 5.622992660830066e-06, "loss": 0.6462, "step": 15545 }, { "epoch": 0.47646193453475544, "grad_norm": 1.2233738615230885, "learning_rate": 5.622500207609338e-06, "loss": 0.7415, "step": 15546 }, { "epoch": 0.47649258305749664, "grad_norm": 0.4615784318965259, "learning_rate": 5.622007748254957e-06, "loss": 0.4156, "step": 15547 }, { "epoch": 0.47652323158023785, "grad_norm": 0.4998248442727, "learning_rate": 5.6215152827717745e-06, "loss": 0.4186, "step": 15548 }, { "epoch": 0.47655388010297905, "grad_norm": 1.4293693202161684, "learning_rate": 5.621022811164643e-06, "loss": 0.6703, "step": 15549 }, { "epoch": 0.47658452862572026, "grad_norm": 0.44629487488850483, "learning_rate": 5.620530333438413e-06, "loss": 0.4128, "step": 15550 }, { "epoch": 0.47661517714846147, "grad_norm": 1.545857193820355, "learning_rate": 5.620037849597942e-06, "loss": 0.7228, "step": 15551 }, { "epoch": 0.47664582567120267, "grad_norm": 1.163333147784345, "learning_rate": 5.619545359648075e-06, "loss": 0.6359, "step": 15552 }, { "epoch": 0.4766764741939439, "grad_norm": 0.4335587743080378, "learning_rate": 5.6190528635936735e-06, "loss": 0.3878, "step": 15553 }, { "epoch": 0.4767071227166851, "grad_norm": 1.1662574529377323, "learning_rate": 5.618560361439583e-06, "loss": 0.531, "step": 15554 }, { "epoch": 0.47673777123942623, "grad_norm": 1.3217544282415519, "learning_rate": 5.618067853190661e-06, "loss": 0.6862, "step": 15555 }, { "epoch": 0.47676841976216744, "grad_norm": 1.2367424248462542, "learning_rate": 5.617575338851757e-06, "loss": 0.6502, "step": 15556 }, { "epoch": 0.47679906828490864, "grad_norm": 1.287621139142662, "learning_rate": 5.617082818427726e-06, "loss": 0.6772, "step": 15557 }, { "epoch": 0.47682971680764985, "grad_norm": 1.2509145427027124, "learning_rate": 5.6165902919234186e-06, "loss": 0.6804, "step": 15558 }, { "epoch": 0.47686036533039106, "grad_norm": 1.138909663083243, "learning_rate": 5.616097759343691e-06, "loss": 0.6132, "step": 15559 }, { "epoch": 0.47689101385313226, "grad_norm": 1.1090896221001705, "learning_rate": 5.615605220693393e-06, "loss": 0.5632, "step": 15560 }, { "epoch": 0.47692166237587347, "grad_norm": 1.2851844751518289, "learning_rate": 5.615112675977379e-06, "loss": 0.7033, "step": 15561 }, { "epoch": 0.4769523108986147, "grad_norm": 1.2505560766091435, "learning_rate": 5.614620125200504e-06, "loss": 0.5974, "step": 15562 }, { "epoch": 0.4769829594213559, "grad_norm": 1.22819031202206, "learning_rate": 5.614127568367618e-06, "loss": 0.7109, "step": 15563 }, { "epoch": 0.4770136079440971, "grad_norm": 1.2301932365256611, "learning_rate": 5.6136350054835785e-06, "loss": 0.6284, "step": 15564 }, { "epoch": 0.4770442564668383, "grad_norm": 1.192022794674819, "learning_rate": 5.6131424365532335e-06, "loss": 0.6805, "step": 15565 }, { "epoch": 0.4770749049895795, "grad_norm": 1.1970161949107982, "learning_rate": 5.6126498615814405e-06, "loss": 0.6585, "step": 15566 }, { "epoch": 0.4771055535123207, "grad_norm": 1.3503731773843826, "learning_rate": 5.612157280573052e-06, "loss": 0.7117, "step": 15567 }, { "epoch": 0.4771362020350619, "grad_norm": 1.0797248408670754, "learning_rate": 5.6116646935329215e-06, "loss": 0.6393, "step": 15568 }, { "epoch": 0.4771668505578031, "grad_norm": 1.356991873054958, "learning_rate": 5.611172100465902e-06, "loss": 0.6932, "step": 15569 }, { "epoch": 0.4771974990805443, "grad_norm": 1.1704917379113393, "learning_rate": 5.610679501376847e-06, "loss": 0.6022, "step": 15570 }, { "epoch": 0.4772281476032855, "grad_norm": 1.150135509471893, "learning_rate": 5.610186896270608e-06, "loss": 0.5302, "step": 15571 }, { "epoch": 0.47725879612602673, "grad_norm": 0.5154076838593976, "learning_rate": 5.609694285152045e-06, "loss": 0.4087, "step": 15572 }, { "epoch": 0.47728944464876794, "grad_norm": 1.3006475803734108, "learning_rate": 5.6092016680260074e-06, "loss": 0.5932, "step": 15573 }, { "epoch": 0.47732009317150914, "grad_norm": 1.1925234715877426, "learning_rate": 5.6087090448973505e-06, "loss": 0.6082, "step": 15574 }, { "epoch": 0.47735074169425035, "grad_norm": 1.3250484939152762, "learning_rate": 5.608216415770926e-06, "loss": 0.6909, "step": 15575 }, { "epoch": 0.47738139021699155, "grad_norm": 1.4236076037617593, "learning_rate": 5.60772378065159e-06, "loss": 0.6594, "step": 15576 }, { "epoch": 0.47741203873973276, "grad_norm": 1.3892611728827262, "learning_rate": 5.607231139544197e-06, "loss": 0.6444, "step": 15577 }, { "epoch": 0.47744268726247396, "grad_norm": 1.1551330723553797, "learning_rate": 5.606738492453599e-06, "loss": 0.6363, "step": 15578 }, { "epoch": 0.47747333578521517, "grad_norm": 1.2500588248288758, "learning_rate": 5.606245839384653e-06, "loss": 0.7055, "step": 15579 }, { "epoch": 0.4775039843079564, "grad_norm": 1.0721985791083815, "learning_rate": 5.60575318034221e-06, "loss": 0.6476, "step": 15580 }, { "epoch": 0.4775346328306976, "grad_norm": 1.3704998676583475, "learning_rate": 5.605260515331128e-06, "loss": 0.6422, "step": 15581 }, { "epoch": 0.4775652813534388, "grad_norm": 0.46085269195961126, "learning_rate": 5.604767844356256e-06, "loss": 0.4158, "step": 15582 }, { "epoch": 0.47759592987618, "grad_norm": 1.3358209813503594, "learning_rate": 5.604275167422455e-06, "loss": 0.6268, "step": 15583 }, { "epoch": 0.4776265783989212, "grad_norm": 1.1752707311657942, "learning_rate": 5.603782484534574e-06, "loss": 0.6513, "step": 15584 }, { "epoch": 0.4776572269216624, "grad_norm": 1.85957516604423, "learning_rate": 5.60328979569747e-06, "loss": 0.6444, "step": 15585 }, { "epoch": 0.47768787544440355, "grad_norm": 1.189688720327005, "learning_rate": 5.6027971009159975e-06, "loss": 0.6005, "step": 15586 }, { "epoch": 0.47771852396714476, "grad_norm": 1.5101095764840882, "learning_rate": 5.602304400195012e-06, "loss": 0.6932, "step": 15587 }, { "epoch": 0.47774917248988596, "grad_norm": 1.2869833948943905, "learning_rate": 5.601811693539366e-06, "loss": 0.6306, "step": 15588 }, { "epoch": 0.47777982101262717, "grad_norm": 0.4388867995121231, "learning_rate": 5.601318980953916e-06, "loss": 0.4265, "step": 15589 }, { "epoch": 0.4778104695353684, "grad_norm": 1.2787162979866533, "learning_rate": 5.600826262443516e-06, "loss": 0.6083, "step": 15590 }, { "epoch": 0.4778411180581096, "grad_norm": 1.3161408061012283, "learning_rate": 5.6003335380130205e-06, "loss": 0.6276, "step": 15591 }, { "epoch": 0.4778717665808508, "grad_norm": 1.3283684972027099, "learning_rate": 5.599840807667286e-06, "loss": 0.6818, "step": 15592 }, { "epoch": 0.477902415103592, "grad_norm": 1.2666489224628443, "learning_rate": 5.599348071411166e-06, "loss": 0.61, "step": 15593 }, { "epoch": 0.4779330636263332, "grad_norm": 1.2043291698512464, "learning_rate": 5.598855329249516e-06, "loss": 0.7303, "step": 15594 }, { "epoch": 0.4779637121490744, "grad_norm": 1.2448547820054774, "learning_rate": 5.598362581187192e-06, "loss": 0.6433, "step": 15595 }, { "epoch": 0.4779943606718156, "grad_norm": 1.2838369141275336, "learning_rate": 5.597869827229048e-06, "loss": 0.6132, "step": 15596 }, { "epoch": 0.4780250091945568, "grad_norm": 1.2461977163666877, "learning_rate": 5.597377067379939e-06, "loss": 0.6216, "step": 15597 }, { "epoch": 0.478055657717298, "grad_norm": 1.1422291335910888, "learning_rate": 5.596884301644722e-06, "loss": 0.5778, "step": 15598 }, { "epoch": 0.4780863062400392, "grad_norm": 1.208528200836518, "learning_rate": 5.59639153002825e-06, "loss": 0.5685, "step": 15599 }, { "epoch": 0.47811695476278043, "grad_norm": 1.310932583560136, "learning_rate": 5.595898752535381e-06, "loss": 0.6462, "step": 15600 }, { "epoch": 0.47814760328552164, "grad_norm": 1.2767141198555325, "learning_rate": 5.5954059691709674e-06, "loss": 0.6258, "step": 15601 }, { "epoch": 0.47817825180826284, "grad_norm": 1.4418838491467085, "learning_rate": 5.594913179939868e-06, "loss": 0.6486, "step": 15602 }, { "epoch": 0.47820890033100405, "grad_norm": 1.4163508497702173, "learning_rate": 5.594420384846937e-06, "loss": 0.5834, "step": 15603 }, { "epoch": 0.47823954885374526, "grad_norm": 0.4846922075865361, "learning_rate": 5.593927583897027e-06, "loss": 0.4458, "step": 15604 }, { "epoch": 0.47827019737648646, "grad_norm": 1.2025991457171077, "learning_rate": 5.593434777095001e-06, "loss": 0.5349, "step": 15605 }, { "epoch": 0.47830084589922767, "grad_norm": 1.3554730977592033, "learning_rate": 5.5929419644457075e-06, "loss": 0.638, "step": 15606 }, { "epoch": 0.4783314944219689, "grad_norm": 0.43534755322689284, "learning_rate": 5.5924491459540055e-06, "loss": 0.4226, "step": 15607 }, { "epoch": 0.4783621429447101, "grad_norm": 0.44829463179029827, "learning_rate": 5.59195632162475e-06, "loss": 0.4083, "step": 15608 }, { "epoch": 0.4783927914674513, "grad_norm": 0.46813051862752053, "learning_rate": 5.591463491462799e-06, "loss": 0.4376, "step": 15609 }, { "epoch": 0.4784234399901925, "grad_norm": 1.1280057701637702, "learning_rate": 5.5909706554730055e-06, "loss": 0.635, "step": 15610 }, { "epoch": 0.4784540885129337, "grad_norm": 1.116355709051829, "learning_rate": 5.5904778136602274e-06, "loss": 0.6448, "step": 15611 }, { "epoch": 0.4784847370356749, "grad_norm": 1.2548134993237907, "learning_rate": 5.589984966029319e-06, "loss": 0.6552, "step": 15612 }, { "epoch": 0.4785153855584161, "grad_norm": 0.42953295352136284, "learning_rate": 5.5894921125851395e-06, "loss": 0.3862, "step": 15613 }, { "epoch": 0.4785460340811573, "grad_norm": 1.2858685584010177, "learning_rate": 5.588999253332543e-06, "loss": 0.5792, "step": 15614 }, { "epoch": 0.4785766826038985, "grad_norm": 1.1051170738987637, "learning_rate": 5.588506388276386e-06, "loss": 0.6675, "step": 15615 }, { "epoch": 0.4786073311266397, "grad_norm": 1.3472844257700098, "learning_rate": 5.5880135174215254e-06, "loss": 0.6938, "step": 15616 }, { "epoch": 0.4786379796493809, "grad_norm": 1.193920185769687, "learning_rate": 5.587520640772816e-06, "loss": 0.6024, "step": 15617 }, { "epoch": 0.4786686281721221, "grad_norm": 1.2002237932828916, "learning_rate": 5.587027758335118e-06, "loss": 0.6264, "step": 15618 }, { "epoch": 0.4786992766948633, "grad_norm": 1.136398293826504, "learning_rate": 5.586534870113282e-06, "loss": 0.6792, "step": 15619 }, { "epoch": 0.4787299252176045, "grad_norm": 1.2811578901340999, "learning_rate": 5.58604197611217e-06, "loss": 0.6051, "step": 15620 }, { "epoch": 0.4787605737403457, "grad_norm": 1.3392505125225478, "learning_rate": 5.585549076336635e-06, "loss": 0.6402, "step": 15621 }, { "epoch": 0.4787912222630869, "grad_norm": 1.259872093540312, "learning_rate": 5.585056170791537e-06, "loss": 0.6232, "step": 15622 }, { "epoch": 0.4788218707858281, "grad_norm": 1.1926755026912732, "learning_rate": 5.584563259481729e-06, "loss": 0.6098, "step": 15623 }, { "epoch": 0.4788525193085693, "grad_norm": 1.157852901552778, "learning_rate": 5.584070342412071e-06, "loss": 0.6448, "step": 15624 }, { "epoch": 0.4788831678313105, "grad_norm": 1.5593532303306419, "learning_rate": 5.583577419587417e-06, "loss": 0.6304, "step": 15625 }, { "epoch": 0.4789138163540517, "grad_norm": 1.456410676655268, "learning_rate": 5.583084491012628e-06, "loss": 0.662, "step": 15626 }, { "epoch": 0.47894446487679293, "grad_norm": 1.0340989066128898, "learning_rate": 5.582591556692556e-06, "loss": 0.6776, "step": 15627 }, { "epoch": 0.47897511339953414, "grad_norm": 1.4267351649632551, "learning_rate": 5.58209861663206e-06, "loss": 0.7089, "step": 15628 }, { "epoch": 0.47900576192227534, "grad_norm": 1.2717704303774144, "learning_rate": 5.5816056708359975e-06, "loss": 0.6882, "step": 15629 }, { "epoch": 0.47903641044501655, "grad_norm": 1.1110613548470203, "learning_rate": 5.581112719309227e-06, "loss": 0.6127, "step": 15630 }, { "epoch": 0.47906705896775775, "grad_norm": 1.156341160575202, "learning_rate": 5.580619762056604e-06, "loss": 0.6226, "step": 15631 }, { "epoch": 0.47909770749049896, "grad_norm": 1.3178326066891506, "learning_rate": 5.580126799082984e-06, "loss": 0.537, "step": 15632 }, { "epoch": 0.47912835601324016, "grad_norm": 1.4046209090550368, "learning_rate": 5.57963383039323e-06, "loss": 0.666, "step": 15633 }, { "epoch": 0.47915900453598137, "grad_norm": 1.2381631654943848, "learning_rate": 5.5791408559921925e-06, "loss": 0.6077, "step": 15634 }, { "epoch": 0.4791896530587226, "grad_norm": 1.3347739156575593, "learning_rate": 5.578647875884733e-06, "loss": 0.6604, "step": 15635 }, { "epoch": 0.4792203015814638, "grad_norm": 0.5355502272258471, "learning_rate": 5.578154890075707e-06, "loss": 0.4369, "step": 15636 }, { "epoch": 0.479250950104205, "grad_norm": 1.10098526192685, "learning_rate": 5.577661898569975e-06, "loss": 0.689, "step": 15637 }, { "epoch": 0.4792815986269462, "grad_norm": 1.3854833327675646, "learning_rate": 5.57716890137239e-06, "loss": 0.5834, "step": 15638 }, { "epoch": 0.4793122471496874, "grad_norm": 0.4750677218791176, "learning_rate": 5.576675898487813e-06, "loss": 0.4045, "step": 15639 }, { "epoch": 0.4793428956724286, "grad_norm": 1.224649328629667, "learning_rate": 5.5761828899211e-06, "loss": 0.6534, "step": 15640 }, { "epoch": 0.4793735441951698, "grad_norm": 0.4701411601801916, "learning_rate": 5.575689875677112e-06, "loss": 0.4235, "step": 15641 }, { "epoch": 0.479404192717911, "grad_norm": 1.4571497407646246, "learning_rate": 5.575196855760703e-06, "loss": 0.6613, "step": 15642 }, { "epoch": 0.4794348412406522, "grad_norm": 1.2959137838197892, "learning_rate": 5.574703830176732e-06, "loss": 0.636, "step": 15643 }, { "epoch": 0.4794654897633934, "grad_norm": 1.3062360339191519, "learning_rate": 5.574210798930056e-06, "loss": 0.7169, "step": 15644 }, { "epoch": 0.47949613828613463, "grad_norm": 1.1563394065052581, "learning_rate": 5.573717762025537e-06, "loss": 0.6656, "step": 15645 }, { "epoch": 0.47952678680887584, "grad_norm": 1.1799414691078947, "learning_rate": 5.573224719468028e-06, "loss": 0.5655, "step": 15646 }, { "epoch": 0.47955743533161704, "grad_norm": 0.47465817626190554, "learning_rate": 5.572731671262389e-06, "loss": 0.4269, "step": 15647 }, { "epoch": 0.4795880838543582, "grad_norm": 1.1681838136166425, "learning_rate": 5.572238617413479e-06, "loss": 0.6161, "step": 15648 }, { "epoch": 0.4796187323770994, "grad_norm": 1.2581266618981, "learning_rate": 5.5717455579261555e-06, "loss": 0.6444, "step": 15649 }, { "epoch": 0.4796493808998406, "grad_norm": 1.2956976570013499, "learning_rate": 5.571252492805278e-06, "loss": 0.6837, "step": 15650 }, { "epoch": 0.4796800294225818, "grad_norm": 1.117915727113096, "learning_rate": 5.5707594220557005e-06, "loss": 0.5671, "step": 15651 }, { "epoch": 0.479710677945323, "grad_norm": 1.2327763268010727, "learning_rate": 5.570266345682287e-06, "loss": 0.6184, "step": 15652 }, { "epoch": 0.4797413264680642, "grad_norm": 1.2775049710472703, "learning_rate": 5.569773263689893e-06, "loss": 0.5433, "step": 15653 }, { "epoch": 0.4797719749908054, "grad_norm": 1.1201000423388643, "learning_rate": 5.569280176083376e-06, "loss": 0.5548, "step": 15654 }, { "epoch": 0.47980262351354663, "grad_norm": 1.039907150981778, "learning_rate": 5.568787082867596e-06, "loss": 0.5578, "step": 15655 }, { "epoch": 0.47983327203628784, "grad_norm": 0.4665837070582833, "learning_rate": 5.5682939840474126e-06, "loss": 0.4004, "step": 15656 }, { "epoch": 0.47986392055902904, "grad_norm": 1.3986802123810869, "learning_rate": 5.567800879627682e-06, "loss": 0.7558, "step": 15657 }, { "epoch": 0.47989456908177025, "grad_norm": 1.1980013085373225, "learning_rate": 5.5673077696132635e-06, "loss": 0.6323, "step": 15658 }, { "epoch": 0.47992521760451146, "grad_norm": 1.2603836204571317, "learning_rate": 5.566814654009017e-06, "loss": 0.7125, "step": 15659 }, { "epoch": 0.47995586612725266, "grad_norm": 1.3779508444982616, "learning_rate": 5.566321532819802e-06, "loss": 0.7616, "step": 15660 }, { "epoch": 0.47998651464999387, "grad_norm": 1.1717516330825608, "learning_rate": 5.565828406050476e-06, "loss": 0.5778, "step": 15661 }, { "epoch": 0.4800171631727351, "grad_norm": 1.170142024410353, "learning_rate": 5.5653352737058955e-06, "loss": 0.652, "step": 15662 }, { "epoch": 0.4800478116954763, "grad_norm": 1.1757228472808126, "learning_rate": 5.5648421357909235e-06, "loss": 0.5492, "step": 15663 }, { "epoch": 0.4800784602182175, "grad_norm": 1.3540969133424108, "learning_rate": 5.564348992310417e-06, "loss": 0.7435, "step": 15664 }, { "epoch": 0.4801091087409587, "grad_norm": 1.2786563387832783, "learning_rate": 5.563855843269237e-06, "loss": 0.6684, "step": 15665 }, { "epoch": 0.4801397572636999, "grad_norm": 1.2335358740165057, "learning_rate": 5.563362688672238e-06, "loss": 0.692, "step": 15666 }, { "epoch": 0.4801704057864411, "grad_norm": 1.2114353443007657, "learning_rate": 5.562869528524285e-06, "loss": 0.6348, "step": 15667 }, { "epoch": 0.4802010543091823, "grad_norm": 1.1040775605038011, "learning_rate": 5.5623763628302345e-06, "loss": 0.5549, "step": 15668 }, { "epoch": 0.4802317028319235, "grad_norm": 1.2695555699827796, "learning_rate": 5.561883191594945e-06, "loss": 0.6014, "step": 15669 }, { "epoch": 0.4802623513546647, "grad_norm": 1.480332446045891, "learning_rate": 5.561390014823276e-06, "loss": 0.785, "step": 15670 }, { "epoch": 0.4802929998774059, "grad_norm": 1.2240884190961185, "learning_rate": 5.5608968325200875e-06, "loss": 0.5948, "step": 15671 }, { "epoch": 0.48032364840014713, "grad_norm": 0.4985266863469257, "learning_rate": 5.56040364469024e-06, "loss": 0.4321, "step": 15672 }, { "epoch": 0.48035429692288834, "grad_norm": 1.1825034560145322, "learning_rate": 5.55991045133859e-06, "loss": 0.645, "step": 15673 }, { "epoch": 0.48038494544562954, "grad_norm": 0.4738848859286209, "learning_rate": 5.559417252470002e-06, "loss": 0.4194, "step": 15674 }, { "epoch": 0.48041559396837075, "grad_norm": 0.44838937363077613, "learning_rate": 5.55892404808933e-06, "loss": 0.424, "step": 15675 }, { "epoch": 0.48044624249111195, "grad_norm": 0.44610881639239514, "learning_rate": 5.55843083820144e-06, "loss": 0.4195, "step": 15676 }, { "epoch": 0.48047689101385316, "grad_norm": 0.4508220657662797, "learning_rate": 5.557937622811185e-06, "loss": 0.4304, "step": 15677 }, { "epoch": 0.48050753953659436, "grad_norm": 1.379465823287025, "learning_rate": 5.5574444019234285e-06, "loss": 0.6346, "step": 15678 }, { "epoch": 0.4805381880593355, "grad_norm": 1.286509562631074, "learning_rate": 5.556951175543029e-06, "loss": 0.606, "step": 15679 }, { "epoch": 0.4805688365820767, "grad_norm": 1.1662626666145481, "learning_rate": 5.556457943674849e-06, "loss": 0.5444, "step": 15680 }, { "epoch": 0.4805994851048179, "grad_norm": 1.324705735000554, "learning_rate": 5.555964706323746e-06, "loss": 0.6783, "step": 15681 }, { "epoch": 0.48063013362755913, "grad_norm": 1.2573026720592435, "learning_rate": 5.555471463494579e-06, "loss": 0.5817, "step": 15682 }, { "epoch": 0.48066078215030034, "grad_norm": 1.1462894412617923, "learning_rate": 5.554978215192211e-06, "loss": 0.6765, "step": 15683 }, { "epoch": 0.48069143067304154, "grad_norm": 1.407499208792324, "learning_rate": 5.554484961421499e-06, "loss": 0.6225, "step": 15684 }, { "epoch": 0.48072207919578275, "grad_norm": 0.4819819676269356, "learning_rate": 5.553991702187307e-06, "loss": 0.4053, "step": 15685 }, { "epoch": 0.48075272771852395, "grad_norm": 1.2088917468235962, "learning_rate": 5.55349843749449e-06, "loss": 0.6395, "step": 15686 }, { "epoch": 0.48078337624126516, "grad_norm": 1.1628944253244342, "learning_rate": 5.553005167347914e-06, "loss": 0.6622, "step": 15687 }, { "epoch": 0.48081402476400636, "grad_norm": 2.0532690701384073, "learning_rate": 5.552511891752435e-06, "loss": 0.5897, "step": 15688 }, { "epoch": 0.48084467328674757, "grad_norm": 1.2882878606073056, "learning_rate": 5.552018610712917e-06, "loss": 0.6866, "step": 15689 }, { "epoch": 0.4808753218094888, "grad_norm": 1.232130896596592, "learning_rate": 5.551525324234216e-06, "loss": 0.6104, "step": 15690 }, { "epoch": 0.48090597033223, "grad_norm": 1.286542776520068, "learning_rate": 5.5510320323211975e-06, "loss": 0.6977, "step": 15691 }, { "epoch": 0.4809366188549712, "grad_norm": 0.48691437728189557, "learning_rate": 5.5505387349787175e-06, "loss": 0.4235, "step": 15692 }, { "epoch": 0.4809672673777124, "grad_norm": 1.1820868398208924, "learning_rate": 5.5500454322116395e-06, "loss": 0.6593, "step": 15693 }, { "epoch": 0.4809979159004536, "grad_norm": 1.2509713919304617, "learning_rate": 5.54955212402482e-06, "loss": 0.6341, "step": 15694 }, { "epoch": 0.4810285644231948, "grad_norm": 1.2644419424220548, "learning_rate": 5.549058810423128e-06, "loss": 0.6526, "step": 15695 }, { "epoch": 0.481059212945936, "grad_norm": 1.3772381162982272, "learning_rate": 5.548565491411415e-06, "loss": 0.678, "step": 15696 }, { "epoch": 0.4810898614686772, "grad_norm": 1.2679217446560274, "learning_rate": 5.548072166994548e-06, "loss": 0.526, "step": 15697 }, { "epoch": 0.4811205099914184, "grad_norm": 1.22178395472747, "learning_rate": 5.547578837177384e-06, "loss": 0.5733, "step": 15698 }, { "epoch": 0.4811511585141596, "grad_norm": 1.2947235579477394, "learning_rate": 5.547085501964787e-06, "loss": 0.7217, "step": 15699 }, { "epoch": 0.48118180703690083, "grad_norm": 1.119327355245073, "learning_rate": 5.5465921613616155e-06, "loss": 0.7103, "step": 15700 }, { "epoch": 0.48121245555964204, "grad_norm": 1.1636251714275292, "learning_rate": 5.546098815372732e-06, "loss": 0.5783, "step": 15701 }, { "epoch": 0.48124310408238324, "grad_norm": 1.2128581805631276, "learning_rate": 5.545605464002998e-06, "loss": 0.622, "step": 15702 }, { "epoch": 0.48127375260512445, "grad_norm": 1.2567718439579865, "learning_rate": 5.545112107257273e-06, "loss": 0.7019, "step": 15703 }, { "epoch": 0.48130440112786566, "grad_norm": 1.3189914749430052, "learning_rate": 5.54461874514042e-06, "loss": 0.5424, "step": 15704 }, { "epoch": 0.48133504965060686, "grad_norm": 1.074505292265679, "learning_rate": 5.544125377657297e-06, "loss": 0.59, "step": 15705 }, { "epoch": 0.48136569817334807, "grad_norm": 0.49681944266058997, "learning_rate": 5.543632004812769e-06, "loss": 0.4234, "step": 15706 }, { "epoch": 0.4813963466960893, "grad_norm": 1.0910022927033654, "learning_rate": 5.543138626611696e-06, "loss": 0.5581, "step": 15707 }, { "epoch": 0.4814269952188305, "grad_norm": 1.1391197568745894, "learning_rate": 5.542645243058938e-06, "loss": 0.5823, "step": 15708 }, { "epoch": 0.4814576437415717, "grad_norm": 1.266562709271879, "learning_rate": 5.5421518541593575e-06, "loss": 0.5938, "step": 15709 }, { "epoch": 0.48148829226431283, "grad_norm": 1.3133683731356165, "learning_rate": 5.541658459917817e-06, "loss": 0.6555, "step": 15710 }, { "epoch": 0.48151894078705404, "grad_norm": 1.1317072735872773, "learning_rate": 5.541165060339178e-06, "loss": 0.611, "step": 15711 }, { "epoch": 0.48154958930979525, "grad_norm": 1.2899216880892965, "learning_rate": 5.540671655428298e-06, "loss": 0.6972, "step": 15712 }, { "epoch": 0.48158023783253645, "grad_norm": 1.2120209893078753, "learning_rate": 5.540178245190044e-06, "loss": 0.5179, "step": 15713 }, { "epoch": 0.48161088635527766, "grad_norm": 1.3013079665739025, "learning_rate": 5.539684829629276e-06, "loss": 0.5781, "step": 15714 }, { "epoch": 0.48164153487801886, "grad_norm": 1.3227591096026052, "learning_rate": 5.5391914087508545e-06, "loss": 0.63, "step": 15715 }, { "epoch": 0.48167218340076007, "grad_norm": 1.2671494456148937, "learning_rate": 5.538697982559642e-06, "loss": 0.6317, "step": 15716 }, { "epoch": 0.4817028319235013, "grad_norm": 1.1995528945054774, "learning_rate": 5.538204551060501e-06, "loss": 0.7341, "step": 15717 }, { "epoch": 0.4817334804462425, "grad_norm": 1.138017920445881, "learning_rate": 5.537711114258293e-06, "loss": 0.6274, "step": 15718 }, { "epoch": 0.4817641289689837, "grad_norm": 1.1898391538678676, "learning_rate": 5.53721767215788e-06, "loss": 0.6359, "step": 15719 }, { "epoch": 0.4817947774917249, "grad_norm": 1.308617453796245, "learning_rate": 5.536724224764122e-06, "loss": 0.6509, "step": 15720 }, { "epoch": 0.4818254260144661, "grad_norm": 1.2822102008342824, "learning_rate": 5.536230772081884e-06, "loss": 0.6652, "step": 15721 }, { "epoch": 0.4818560745372073, "grad_norm": 1.4646425290773664, "learning_rate": 5.535737314116027e-06, "loss": 0.7272, "step": 15722 }, { "epoch": 0.4818867230599485, "grad_norm": 1.2908597779009499, "learning_rate": 5.535243850871414e-06, "loss": 0.6567, "step": 15723 }, { "epoch": 0.4819173715826897, "grad_norm": 1.2812420023266764, "learning_rate": 5.534750382352905e-06, "loss": 0.6049, "step": 15724 }, { "epoch": 0.4819480201054309, "grad_norm": 1.2117386044709797, "learning_rate": 5.534256908565365e-06, "loss": 0.6968, "step": 15725 }, { "epoch": 0.4819786686281721, "grad_norm": 1.2761190521694277, "learning_rate": 5.533763429513655e-06, "loss": 0.6464, "step": 15726 }, { "epoch": 0.48200931715091333, "grad_norm": 1.1770217927818807, "learning_rate": 5.5332699452026354e-06, "loss": 0.6397, "step": 15727 }, { "epoch": 0.48203996567365454, "grad_norm": 1.4314444091201097, "learning_rate": 5.5327764556371725e-06, "loss": 0.6365, "step": 15728 }, { "epoch": 0.48207061419639574, "grad_norm": 1.127288281749225, "learning_rate": 5.5322829608221255e-06, "loss": 0.5035, "step": 15729 }, { "epoch": 0.48210126271913695, "grad_norm": 1.1505752485934748, "learning_rate": 5.53178946076236e-06, "loss": 0.5907, "step": 15730 }, { "epoch": 0.48213191124187815, "grad_norm": 0.4716360294536173, "learning_rate": 5.531295955462735e-06, "loss": 0.4101, "step": 15731 }, { "epoch": 0.48216255976461936, "grad_norm": 1.318399799284239, "learning_rate": 5.5308024449281165e-06, "loss": 0.7365, "step": 15732 }, { "epoch": 0.48219320828736056, "grad_norm": 0.44390292357580796, "learning_rate": 5.530308929163364e-06, "loss": 0.3993, "step": 15733 }, { "epoch": 0.48222385681010177, "grad_norm": 1.2506482706344038, "learning_rate": 5.5298154081733436e-06, "loss": 0.625, "step": 15734 }, { "epoch": 0.482254505332843, "grad_norm": 1.1561526596319776, "learning_rate": 5.529321881962916e-06, "loss": 0.6888, "step": 15735 }, { "epoch": 0.4822851538555842, "grad_norm": 1.3300446336904617, "learning_rate": 5.528828350536944e-06, "loss": 0.578, "step": 15736 }, { "epoch": 0.4823158023783254, "grad_norm": 1.370266399967238, "learning_rate": 5.528334813900291e-06, "loss": 0.6236, "step": 15737 }, { "epoch": 0.4823464509010666, "grad_norm": 1.4688384745797298, "learning_rate": 5.52784127205782e-06, "loss": 0.6742, "step": 15738 }, { "epoch": 0.4823770994238078, "grad_norm": 1.1366244825176417, "learning_rate": 5.527347725014395e-06, "loss": 0.6358, "step": 15739 }, { "epoch": 0.482407747946549, "grad_norm": 1.1881087971652573, "learning_rate": 5.526854172774877e-06, "loss": 0.6266, "step": 15740 }, { "epoch": 0.48243839646929015, "grad_norm": 1.3389419586581932, "learning_rate": 5.526360615344129e-06, "loss": 0.6809, "step": 15741 }, { "epoch": 0.48246904499203136, "grad_norm": 1.1246911090618248, "learning_rate": 5.525867052727016e-06, "loss": 0.6066, "step": 15742 }, { "epoch": 0.48249969351477257, "grad_norm": 0.5061053390401655, "learning_rate": 5.525373484928401e-06, "loss": 0.4205, "step": 15743 }, { "epoch": 0.48253034203751377, "grad_norm": 1.343264450823886, "learning_rate": 5.524879911953146e-06, "loss": 0.6411, "step": 15744 }, { "epoch": 0.482560990560255, "grad_norm": 1.2634538609023684, "learning_rate": 5.5243863338061165e-06, "loss": 0.6572, "step": 15745 }, { "epoch": 0.4825916390829962, "grad_norm": 1.3247389821172892, "learning_rate": 5.523892750492171e-06, "loss": 0.6003, "step": 15746 }, { "epoch": 0.4826222876057374, "grad_norm": 1.235514327423396, "learning_rate": 5.523399162016179e-06, "loss": 0.6783, "step": 15747 }, { "epoch": 0.4826529361284786, "grad_norm": 1.5266958905871866, "learning_rate": 5.5229055683829995e-06, "loss": 0.7365, "step": 15748 }, { "epoch": 0.4826835846512198, "grad_norm": 0.4462590963984667, "learning_rate": 5.5224119695975e-06, "loss": 0.4048, "step": 15749 }, { "epoch": 0.482714233173961, "grad_norm": 1.403650630143378, "learning_rate": 5.521918365664539e-06, "loss": 0.661, "step": 15750 }, { "epoch": 0.4827448816967022, "grad_norm": 1.1035646154066248, "learning_rate": 5.521424756588984e-06, "loss": 0.5711, "step": 15751 }, { "epoch": 0.4827755302194434, "grad_norm": 0.4600472483998386, "learning_rate": 5.520931142375697e-06, "loss": 0.4149, "step": 15752 }, { "epoch": 0.4828061787421846, "grad_norm": 1.3649235030211557, "learning_rate": 5.520437523029542e-06, "loss": 0.6177, "step": 15753 }, { "epoch": 0.4828368272649258, "grad_norm": 1.16398189883989, "learning_rate": 5.519943898555384e-06, "loss": 0.641, "step": 15754 }, { "epoch": 0.48286747578766703, "grad_norm": 1.2678039072697787, "learning_rate": 5.519450268958084e-06, "loss": 0.5473, "step": 15755 }, { "epoch": 0.48289812431040824, "grad_norm": 1.2649417001896945, "learning_rate": 5.518956634242509e-06, "loss": 0.6949, "step": 15756 }, { "epoch": 0.48292877283314944, "grad_norm": 1.4162256304332963, "learning_rate": 5.518462994413522e-06, "loss": 0.7154, "step": 15757 }, { "epoch": 0.48295942135589065, "grad_norm": 1.173597350943115, "learning_rate": 5.517969349475987e-06, "loss": 0.6802, "step": 15758 }, { "epoch": 0.48299006987863186, "grad_norm": 1.1405322470917199, "learning_rate": 5.517475699434764e-06, "loss": 0.6695, "step": 15759 }, { "epoch": 0.48302071840137306, "grad_norm": 1.3197856461858732, "learning_rate": 5.5169820442947255e-06, "loss": 0.7075, "step": 15760 }, { "epoch": 0.48305136692411427, "grad_norm": 1.259216892601713, "learning_rate": 5.516488384060726e-06, "loss": 0.671, "step": 15761 }, { "epoch": 0.4830820154468555, "grad_norm": 1.254991110191468, "learning_rate": 5.515994718737637e-06, "loss": 0.67, "step": 15762 }, { "epoch": 0.4831126639695967, "grad_norm": 1.2587440694060807, "learning_rate": 5.515501048330319e-06, "loss": 0.7102, "step": 15763 }, { "epoch": 0.4831433124923379, "grad_norm": 1.2589442672262896, "learning_rate": 5.515007372843637e-06, "loss": 0.6303, "step": 15764 }, { "epoch": 0.4831739610150791, "grad_norm": 1.2321740703211768, "learning_rate": 5.514513692282457e-06, "loss": 0.7099, "step": 15765 }, { "epoch": 0.4832046095378203, "grad_norm": 1.401805377815846, "learning_rate": 5.514020006651641e-06, "loss": 0.5822, "step": 15766 }, { "epoch": 0.4832352580605615, "grad_norm": 1.3245470106527595, "learning_rate": 5.513526315956053e-06, "loss": 0.5714, "step": 15767 }, { "epoch": 0.4832659065833027, "grad_norm": 1.5253166554121296, "learning_rate": 5.513032620200561e-06, "loss": 0.7565, "step": 15768 }, { "epoch": 0.4832965551060439, "grad_norm": 1.587875904258374, "learning_rate": 5.512538919390027e-06, "loss": 0.6775, "step": 15769 }, { "epoch": 0.4833272036287851, "grad_norm": 1.2902854597555622, "learning_rate": 5.512045213529315e-06, "loss": 0.6245, "step": 15770 }, { "epoch": 0.4833578521515263, "grad_norm": 1.1459922950390995, "learning_rate": 5.51155150262329e-06, "loss": 0.6614, "step": 15771 }, { "epoch": 0.4833885006742675, "grad_norm": 1.1729760134733198, "learning_rate": 5.511057786676819e-06, "loss": 0.6186, "step": 15772 }, { "epoch": 0.4834191491970087, "grad_norm": 2.015046891667144, "learning_rate": 5.510564065694764e-06, "loss": 0.5791, "step": 15773 }, { "epoch": 0.4834497977197499, "grad_norm": 1.251751803313819, "learning_rate": 5.5100703396819895e-06, "loss": 0.6807, "step": 15774 }, { "epoch": 0.4834804462424911, "grad_norm": 1.2356418668931717, "learning_rate": 5.5095766086433635e-06, "loss": 0.554, "step": 15775 }, { "epoch": 0.4835110947652323, "grad_norm": 1.3774813559445431, "learning_rate": 5.509082872583747e-06, "loss": 0.6676, "step": 15776 }, { "epoch": 0.4835417432879735, "grad_norm": 1.3267724068275621, "learning_rate": 5.508589131508009e-06, "loss": 0.8175, "step": 15777 }, { "epoch": 0.4835723918107147, "grad_norm": 1.1333709449277785, "learning_rate": 5.50809538542101e-06, "loss": 0.5739, "step": 15778 }, { "epoch": 0.4836030403334559, "grad_norm": 1.3138442452693497, "learning_rate": 5.507601634327617e-06, "loss": 0.6017, "step": 15779 }, { "epoch": 0.4836336888561971, "grad_norm": 1.216927833509049, "learning_rate": 5.507107878232697e-06, "loss": 0.5898, "step": 15780 }, { "epoch": 0.4836643373789383, "grad_norm": 1.3100995880161685, "learning_rate": 5.506614117141112e-06, "loss": 0.7185, "step": 15781 }, { "epoch": 0.48369498590167953, "grad_norm": 1.318357568286519, "learning_rate": 5.506120351057729e-06, "loss": 0.6371, "step": 15782 }, { "epoch": 0.48372563442442074, "grad_norm": 1.2593327351127022, "learning_rate": 5.505626579987411e-06, "loss": 0.6737, "step": 15783 }, { "epoch": 0.48375628294716194, "grad_norm": 1.2315443182960175, "learning_rate": 5.505132803935028e-06, "loss": 0.614, "step": 15784 }, { "epoch": 0.48378693146990315, "grad_norm": 1.2851428155234543, "learning_rate": 5.50463902290544e-06, "loss": 0.6918, "step": 15785 }, { "epoch": 0.48381757999264435, "grad_norm": 1.2742942098562382, "learning_rate": 5.504145236903515e-06, "loss": 0.5914, "step": 15786 }, { "epoch": 0.48384822851538556, "grad_norm": 1.2402098151304932, "learning_rate": 5.503651445934119e-06, "loss": 0.6365, "step": 15787 }, { "epoch": 0.48387887703812676, "grad_norm": 1.153623149820394, "learning_rate": 5.5031576500021155e-06, "loss": 0.5315, "step": 15788 }, { "epoch": 0.48390952556086797, "grad_norm": 1.222827514199616, "learning_rate": 5.502663849112371e-06, "loss": 0.6842, "step": 15789 }, { "epoch": 0.4839401740836092, "grad_norm": 1.1909259740280802, "learning_rate": 5.5021700432697515e-06, "loss": 0.6738, "step": 15790 }, { "epoch": 0.4839708226063504, "grad_norm": 1.1865999464090804, "learning_rate": 5.501676232479122e-06, "loss": 0.551, "step": 15791 }, { "epoch": 0.4840014711290916, "grad_norm": 1.4356491520993668, "learning_rate": 5.501182416745347e-06, "loss": 0.618, "step": 15792 }, { "epoch": 0.4840321196518328, "grad_norm": 1.0990801232619487, "learning_rate": 5.500688596073295e-06, "loss": 0.5493, "step": 15793 }, { "epoch": 0.484062768174574, "grad_norm": 1.1619604415560707, "learning_rate": 5.50019477046783e-06, "loss": 0.5357, "step": 15794 }, { "epoch": 0.4840934166973152, "grad_norm": 1.2535634764115449, "learning_rate": 5.4997009399338176e-06, "loss": 0.6731, "step": 15795 }, { "epoch": 0.4841240652200564, "grad_norm": 1.353757336683253, "learning_rate": 5.499207104476123e-06, "loss": 0.6802, "step": 15796 }, { "epoch": 0.4841547137427976, "grad_norm": 0.47359707551430513, "learning_rate": 5.498713264099615e-06, "loss": 0.3899, "step": 15797 }, { "epoch": 0.4841853622655388, "grad_norm": 0.5235657554779476, "learning_rate": 5.4982194188091545e-06, "loss": 0.4229, "step": 15798 }, { "epoch": 0.48421601078828, "grad_norm": 1.382089616659895, "learning_rate": 5.497725568609614e-06, "loss": 0.6629, "step": 15799 }, { "epoch": 0.48424665931102123, "grad_norm": 1.2341211713965525, "learning_rate": 5.497231713505854e-06, "loss": 0.6214, "step": 15800 }, { "epoch": 0.48427730783376244, "grad_norm": 1.3228797585278242, "learning_rate": 5.496737853502744e-06, "loss": 0.6467, "step": 15801 }, { "epoch": 0.48430795635650364, "grad_norm": 1.0762388748293599, "learning_rate": 5.496243988605147e-06, "loss": 0.6301, "step": 15802 }, { "epoch": 0.4843386048792448, "grad_norm": 1.1847287681286889, "learning_rate": 5.4957501188179345e-06, "loss": 0.6211, "step": 15803 }, { "epoch": 0.484369253401986, "grad_norm": 1.2873388143427398, "learning_rate": 5.495256244145966e-06, "loss": 0.6323, "step": 15804 }, { "epoch": 0.4843999019247272, "grad_norm": 1.2457598152418952, "learning_rate": 5.494762364594112e-06, "loss": 0.6086, "step": 15805 }, { "epoch": 0.4844305504474684, "grad_norm": 1.2461081048936122, "learning_rate": 5.494268480167237e-06, "loss": 0.7063, "step": 15806 }, { "epoch": 0.4844611989702096, "grad_norm": 1.2698564191351713, "learning_rate": 5.493774590870209e-06, "loss": 0.7081, "step": 15807 }, { "epoch": 0.4844918474929508, "grad_norm": 1.2046146935969952, "learning_rate": 5.493280696707894e-06, "loss": 0.6754, "step": 15808 }, { "epoch": 0.48452249601569203, "grad_norm": 1.3975774417827502, "learning_rate": 5.492786797685157e-06, "loss": 0.664, "step": 15809 }, { "epoch": 0.48455314453843323, "grad_norm": 1.337929818888964, "learning_rate": 5.492292893806866e-06, "loss": 0.7741, "step": 15810 }, { "epoch": 0.48458379306117444, "grad_norm": 1.1473519868681308, "learning_rate": 5.491798985077889e-06, "loss": 0.5891, "step": 15811 }, { "epoch": 0.48461444158391564, "grad_norm": 1.2920220084686462, "learning_rate": 5.491305071503089e-06, "loss": 0.6481, "step": 15812 }, { "epoch": 0.48464509010665685, "grad_norm": 1.2398595945702393, "learning_rate": 5.490811153087334e-06, "loss": 0.6017, "step": 15813 }, { "epoch": 0.48467573862939806, "grad_norm": 1.3639274148052745, "learning_rate": 5.490317229835493e-06, "loss": 0.529, "step": 15814 }, { "epoch": 0.48470638715213926, "grad_norm": 0.5645701936653812, "learning_rate": 5.48982330175243e-06, "loss": 0.3992, "step": 15815 }, { "epoch": 0.48473703567488047, "grad_norm": 1.332418726020452, "learning_rate": 5.489329368843012e-06, "loss": 0.6214, "step": 15816 }, { "epoch": 0.4847676841976217, "grad_norm": 1.2731422990963146, "learning_rate": 5.488835431112106e-06, "loss": 0.6866, "step": 15817 }, { "epoch": 0.4847983327203629, "grad_norm": 1.358547916869384, "learning_rate": 5.488341488564582e-06, "loss": 0.735, "step": 15818 }, { "epoch": 0.4848289812431041, "grad_norm": 1.2138152363720058, "learning_rate": 5.487847541205302e-06, "loss": 0.5081, "step": 15819 }, { "epoch": 0.4848596297658453, "grad_norm": 1.2881333800006072, "learning_rate": 5.487353589039136e-06, "loss": 0.656, "step": 15820 }, { "epoch": 0.4848902782885865, "grad_norm": 1.3017684063820316, "learning_rate": 5.48685963207095e-06, "loss": 0.653, "step": 15821 }, { "epoch": 0.4849209268113277, "grad_norm": 1.3524739023234924, "learning_rate": 5.486365670305612e-06, "loss": 0.6693, "step": 15822 }, { "epoch": 0.4849515753340689, "grad_norm": 1.3165504196130589, "learning_rate": 5.485871703747989e-06, "loss": 0.6839, "step": 15823 }, { "epoch": 0.4849822238568101, "grad_norm": 1.3346082913724981, "learning_rate": 5.4853777324029464e-06, "loss": 0.6211, "step": 15824 }, { "epoch": 0.4850128723795513, "grad_norm": 1.1060327369090095, "learning_rate": 5.484883756275354e-06, "loss": 0.565, "step": 15825 }, { "epoch": 0.4850435209022925, "grad_norm": 0.4713240802558779, "learning_rate": 5.484389775370078e-06, "loss": 0.3862, "step": 15826 }, { "epoch": 0.48507416942503373, "grad_norm": 1.2391782368586688, "learning_rate": 5.483895789691985e-06, "loss": 0.6489, "step": 15827 }, { "epoch": 0.48510481794777494, "grad_norm": 1.2540604637621515, "learning_rate": 5.483401799245943e-06, "loss": 0.717, "step": 15828 }, { "epoch": 0.48513546647051614, "grad_norm": 1.5521883698937888, "learning_rate": 5.48290780403682e-06, "loss": 0.6928, "step": 15829 }, { "epoch": 0.48516611499325735, "grad_norm": 1.2175844545628915, "learning_rate": 5.482413804069483e-06, "loss": 0.7412, "step": 15830 }, { "epoch": 0.48519676351599855, "grad_norm": 1.2758585681213446, "learning_rate": 5.481919799348799e-06, "loss": 0.6026, "step": 15831 }, { "epoch": 0.48522741203873976, "grad_norm": 1.2608901195446782, "learning_rate": 5.481425789879635e-06, "loss": 0.6617, "step": 15832 }, { "epoch": 0.48525806056148096, "grad_norm": 1.2314937880752699, "learning_rate": 5.48093177566686e-06, "loss": 0.6188, "step": 15833 }, { "epoch": 0.4852887090842221, "grad_norm": 1.2233931055026595, "learning_rate": 5.4804377567153424e-06, "loss": 0.5399, "step": 15834 }, { "epoch": 0.4853193576069633, "grad_norm": 1.194317509077493, "learning_rate": 5.479943733029947e-06, "loss": 0.7111, "step": 15835 }, { "epoch": 0.4853500061297045, "grad_norm": 1.319801699775539, "learning_rate": 5.479449704615543e-06, "loss": 0.6731, "step": 15836 }, { "epoch": 0.48538065465244573, "grad_norm": 1.2200290585319151, "learning_rate": 5.478955671477e-06, "loss": 0.6209, "step": 15837 }, { "epoch": 0.48541130317518694, "grad_norm": 1.2262808772233746, "learning_rate": 5.478461633619185e-06, "loss": 0.6562, "step": 15838 }, { "epoch": 0.48544195169792814, "grad_norm": 0.5282797420339486, "learning_rate": 5.477967591046962e-06, "loss": 0.4232, "step": 15839 }, { "epoch": 0.48547260022066935, "grad_norm": 1.3211697434789016, "learning_rate": 5.4774735437652036e-06, "loss": 0.6618, "step": 15840 }, { "epoch": 0.48550324874341055, "grad_norm": 1.1712312453643936, "learning_rate": 5.476979491778777e-06, "loss": 0.6422, "step": 15841 }, { "epoch": 0.48553389726615176, "grad_norm": 1.3044562432452826, "learning_rate": 5.476485435092549e-06, "loss": 0.6198, "step": 15842 }, { "epoch": 0.48556454578889297, "grad_norm": 1.2729310154977111, "learning_rate": 5.475991373711387e-06, "loss": 0.7087, "step": 15843 }, { "epoch": 0.48559519431163417, "grad_norm": 1.2672160950868931, "learning_rate": 5.47549730764016e-06, "loss": 0.7603, "step": 15844 }, { "epoch": 0.4856258428343754, "grad_norm": 0.47758868316956204, "learning_rate": 5.475003236883738e-06, "loss": 0.4219, "step": 15845 }, { "epoch": 0.4856564913571166, "grad_norm": 1.1477566481456767, "learning_rate": 5.474509161446987e-06, "loss": 0.6432, "step": 15846 }, { "epoch": 0.4856871398798578, "grad_norm": 1.0778344760417171, "learning_rate": 5.474015081334776e-06, "loss": 0.5853, "step": 15847 }, { "epoch": 0.485717788402599, "grad_norm": 0.46209325857608124, "learning_rate": 5.473520996551972e-06, "loss": 0.4251, "step": 15848 }, { "epoch": 0.4857484369253402, "grad_norm": 1.3055964071424377, "learning_rate": 5.473026907103446e-06, "loss": 0.7136, "step": 15849 }, { "epoch": 0.4857790854480814, "grad_norm": 1.1789422725036762, "learning_rate": 5.472532812994063e-06, "loss": 0.5851, "step": 15850 }, { "epoch": 0.4858097339708226, "grad_norm": 0.4522642829493695, "learning_rate": 5.472038714228695e-06, "loss": 0.4229, "step": 15851 }, { "epoch": 0.4858403824935638, "grad_norm": 1.1886957042470465, "learning_rate": 5.471544610812207e-06, "loss": 0.6113, "step": 15852 }, { "epoch": 0.485871031016305, "grad_norm": 1.3712932625946463, "learning_rate": 5.471050502749472e-06, "loss": 0.6442, "step": 15853 }, { "epoch": 0.4859016795390462, "grad_norm": 1.3359878883145417, "learning_rate": 5.470556390045354e-06, "loss": 0.6833, "step": 15854 }, { "epoch": 0.48593232806178743, "grad_norm": 1.3083057977122956, "learning_rate": 5.470062272704724e-06, "loss": 0.6973, "step": 15855 }, { "epoch": 0.48596297658452864, "grad_norm": 1.1126001163121109, "learning_rate": 5.46956815073245e-06, "loss": 0.6235, "step": 15856 }, { "epoch": 0.48599362510726984, "grad_norm": 1.202972132324746, "learning_rate": 5.469074024133401e-06, "loss": 0.6124, "step": 15857 }, { "epoch": 0.48602427363001105, "grad_norm": 1.3725352055557274, "learning_rate": 5.468579892912446e-06, "loss": 0.6499, "step": 15858 }, { "epoch": 0.48605492215275226, "grad_norm": 1.36748066990606, "learning_rate": 5.468085757074453e-06, "loss": 0.7067, "step": 15859 }, { "epoch": 0.48608557067549346, "grad_norm": 1.2462826689090243, "learning_rate": 5.4675916166242904e-06, "loss": 0.726, "step": 15860 }, { "epoch": 0.48611621919823467, "grad_norm": 1.334639569592646, "learning_rate": 5.467097471566829e-06, "loss": 0.6081, "step": 15861 }, { "epoch": 0.4861468677209759, "grad_norm": 1.1297036254688342, "learning_rate": 5.466603321906937e-06, "loss": 0.5603, "step": 15862 }, { "epoch": 0.4861775162437171, "grad_norm": 1.1930442936301797, "learning_rate": 5.466109167649483e-06, "loss": 0.6087, "step": 15863 }, { "epoch": 0.4862081647664583, "grad_norm": 0.4969712739413693, "learning_rate": 5.465615008799336e-06, "loss": 0.4247, "step": 15864 }, { "epoch": 0.48623881328919943, "grad_norm": 1.1274598286958222, "learning_rate": 5.4651208453613634e-06, "loss": 0.5539, "step": 15865 }, { "epoch": 0.48626946181194064, "grad_norm": 1.387596982698512, "learning_rate": 5.464626677340438e-06, "loss": 0.5603, "step": 15866 }, { "epoch": 0.48630011033468185, "grad_norm": 1.108284929049236, "learning_rate": 5.464132504741426e-06, "loss": 0.5602, "step": 15867 }, { "epoch": 0.48633075885742305, "grad_norm": 1.2654551624459018, "learning_rate": 5.4636383275692e-06, "loss": 0.5951, "step": 15868 }, { "epoch": 0.48636140738016426, "grad_norm": 1.221800046798575, "learning_rate": 5.463144145828624e-06, "loss": 0.6791, "step": 15869 }, { "epoch": 0.48639205590290546, "grad_norm": 1.287157314147381, "learning_rate": 5.462649959524572e-06, "loss": 0.6822, "step": 15870 }, { "epoch": 0.48642270442564667, "grad_norm": 1.095522832652683, "learning_rate": 5.46215576866191e-06, "loss": 0.5826, "step": 15871 }, { "epoch": 0.4864533529483879, "grad_norm": 0.44648625701341044, "learning_rate": 5.461661573245512e-06, "loss": 0.3992, "step": 15872 }, { "epoch": 0.4864840014711291, "grad_norm": 1.2121017526329407, "learning_rate": 5.4611673732802405e-06, "loss": 0.6681, "step": 15873 }, { "epoch": 0.4865146499938703, "grad_norm": 1.1584808600314247, "learning_rate": 5.460673168770971e-06, "loss": 0.6851, "step": 15874 }, { "epoch": 0.4865452985166115, "grad_norm": 1.2193167829532867, "learning_rate": 5.460178959722571e-06, "loss": 0.6777, "step": 15875 }, { "epoch": 0.4865759470393527, "grad_norm": 1.281781372232814, "learning_rate": 5.4596847461399095e-06, "loss": 0.6491, "step": 15876 }, { "epoch": 0.4866065955620939, "grad_norm": 0.44896159196748836, "learning_rate": 5.459190528027857e-06, "loss": 0.3946, "step": 15877 }, { "epoch": 0.4866372440848351, "grad_norm": 1.422999405847257, "learning_rate": 5.458696305391281e-06, "loss": 0.5688, "step": 15878 }, { "epoch": 0.4866678926075763, "grad_norm": 0.5258443060206164, "learning_rate": 5.458202078235056e-06, "loss": 0.4231, "step": 15879 }, { "epoch": 0.4866985411303175, "grad_norm": 1.1878798777251007, "learning_rate": 5.457707846564046e-06, "loss": 0.6479, "step": 15880 }, { "epoch": 0.4867291896530587, "grad_norm": 1.280882574395309, "learning_rate": 5.457213610383125e-06, "loss": 0.7179, "step": 15881 }, { "epoch": 0.48675983817579993, "grad_norm": 0.43499900434300554, "learning_rate": 5.456719369697161e-06, "loss": 0.4034, "step": 15882 }, { "epoch": 0.48679048669854114, "grad_norm": 0.41950104994700754, "learning_rate": 5.456225124511024e-06, "loss": 0.4251, "step": 15883 }, { "epoch": 0.48682113522128234, "grad_norm": 1.0491894039623577, "learning_rate": 5.455730874829584e-06, "loss": 0.6611, "step": 15884 }, { "epoch": 0.48685178374402355, "grad_norm": 1.200718575339821, "learning_rate": 5.455236620657712e-06, "loss": 0.6732, "step": 15885 }, { "epoch": 0.48688243226676475, "grad_norm": 1.2559768744782445, "learning_rate": 5.454742362000276e-06, "loss": 0.6702, "step": 15886 }, { "epoch": 0.48691308078950596, "grad_norm": 1.323952605142948, "learning_rate": 5.454248098862147e-06, "loss": 0.6717, "step": 15887 }, { "epoch": 0.48694372931224716, "grad_norm": 1.2754687086821193, "learning_rate": 5.453753831248196e-06, "loss": 0.6412, "step": 15888 }, { "epoch": 0.48697437783498837, "grad_norm": 1.1861082639790286, "learning_rate": 5.453259559163293e-06, "loss": 0.6797, "step": 15889 }, { "epoch": 0.4870050263577296, "grad_norm": 1.3650922460726351, "learning_rate": 5.4527652826123055e-06, "loss": 0.5825, "step": 15890 }, { "epoch": 0.4870356748804708, "grad_norm": 0.48192603084492647, "learning_rate": 5.452271001600108e-06, "loss": 0.4198, "step": 15891 }, { "epoch": 0.487066323403212, "grad_norm": 1.19625214923099, "learning_rate": 5.451776716131569e-06, "loss": 0.6153, "step": 15892 }, { "epoch": 0.4870969719259532, "grad_norm": 1.3116559756746844, "learning_rate": 5.451282426211555e-06, "loss": 0.6541, "step": 15893 }, { "epoch": 0.4871276204486944, "grad_norm": 1.2645239740431655, "learning_rate": 5.450788131844943e-06, "loss": 0.7042, "step": 15894 }, { "epoch": 0.4871582689714356, "grad_norm": 1.2292768216890235, "learning_rate": 5.4502938330365996e-06, "loss": 0.5425, "step": 15895 }, { "epoch": 0.48718891749417675, "grad_norm": 1.3244799123767954, "learning_rate": 5.449799529791395e-06, "loss": 0.5918, "step": 15896 }, { "epoch": 0.48721956601691796, "grad_norm": 1.1352009878303024, "learning_rate": 5.4493052221142005e-06, "loss": 0.6331, "step": 15897 }, { "epoch": 0.48725021453965917, "grad_norm": 1.3001825891461858, "learning_rate": 5.448810910009888e-06, "loss": 0.6473, "step": 15898 }, { "epoch": 0.48728086306240037, "grad_norm": 1.233739607781014, "learning_rate": 5.448316593483325e-06, "loss": 0.5842, "step": 15899 }, { "epoch": 0.4873115115851416, "grad_norm": 0.43179374172556456, "learning_rate": 5.4478222725393856e-06, "loss": 0.4113, "step": 15900 }, { "epoch": 0.4873421601078828, "grad_norm": 0.4472914207013588, "learning_rate": 5.4473279471829364e-06, "loss": 0.4136, "step": 15901 }, { "epoch": 0.487372808630624, "grad_norm": 1.2481703165898441, "learning_rate": 5.446833617418853e-06, "loss": 0.7176, "step": 15902 }, { "epoch": 0.4874034571533652, "grad_norm": 1.3184839069637728, "learning_rate": 5.4463392832520035e-06, "loss": 0.6663, "step": 15903 }, { "epoch": 0.4874341056761064, "grad_norm": 1.1694584391571503, "learning_rate": 5.445844944687256e-06, "loss": 0.559, "step": 15904 }, { "epoch": 0.4874647541988476, "grad_norm": 1.4163563467949989, "learning_rate": 5.445350601729488e-06, "loss": 0.6119, "step": 15905 }, { "epoch": 0.4874954027215888, "grad_norm": 1.1750382268447344, "learning_rate": 5.444856254383564e-06, "loss": 0.6131, "step": 15906 }, { "epoch": 0.48752605124433, "grad_norm": 1.0638995799526763, "learning_rate": 5.444361902654359e-06, "loss": 0.5798, "step": 15907 }, { "epoch": 0.4875566997670712, "grad_norm": 0.45795755792438375, "learning_rate": 5.443867546546741e-06, "loss": 0.4049, "step": 15908 }, { "epoch": 0.48758734828981243, "grad_norm": 1.4381489255306468, "learning_rate": 5.443373186065583e-06, "loss": 0.6437, "step": 15909 }, { "epoch": 0.48761799681255363, "grad_norm": 1.2106600880332947, "learning_rate": 5.4428788212157555e-06, "loss": 0.6337, "step": 15910 }, { "epoch": 0.48764864533529484, "grad_norm": 1.4795400612037828, "learning_rate": 5.442384452002132e-06, "loss": 0.7078, "step": 15911 }, { "epoch": 0.48767929385803604, "grad_norm": 1.1830076491279176, "learning_rate": 5.441890078429578e-06, "loss": 0.6877, "step": 15912 }, { "epoch": 0.48770994238077725, "grad_norm": 1.240510483204508, "learning_rate": 5.441395700502969e-06, "loss": 0.598, "step": 15913 }, { "epoch": 0.48774059090351846, "grad_norm": 1.2823088055399268, "learning_rate": 5.4409013182271766e-06, "loss": 0.6536, "step": 15914 }, { "epoch": 0.48777123942625966, "grad_norm": 1.3335815195751228, "learning_rate": 5.44040693160707e-06, "loss": 0.6364, "step": 15915 }, { "epoch": 0.48780188794900087, "grad_norm": 1.1303894096673865, "learning_rate": 5.43991254064752e-06, "loss": 0.6716, "step": 15916 }, { "epoch": 0.4878325364717421, "grad_norm": 1.189312617006684, "learning_rate": 5.4394181453534e-06, "loss": 0.5963, "step": 15917 }, { "epoch": 0.4878631849944833, "grad_norm": 1.2206413012330193, "learning_rate": 5.438923745729581e-06, "loss": 0.6947, "step": 15918 }, { "epoch": 0.4878938335172245, "grad_norm": 1.2198380803243931, "learning_rate": 5.438429341780932e-06, "loss": 0.6796, "step": 15919 }, { "epoch": 0.4879244820399657, "grad_norm": 1.408266419748705, "learning_rate": 5.437934933512329e-06, "loss": 0.6713, "step": 15920 }, { "epoch": 0.4879551305627069, "grad_norm": 1.3165652196597533, "learning_rate": 5.43744052092864e-06, "loss": 0.6584, "step": 15921 }, { "epoch": 0.4879857790854481, "grad_norm": 1.31352527851792, "learning_rate": 5.4369461040347385e-06, "loss": 0.6668, "step": 15922 }, { "epoch": 0.4880164276081893, "grad_norm": 1.301739028027663, "learning_rate": 5.436451682835494e-06, "loss": 0.6976, "step": 15923 }, { "epoch": 0.4880470761309305, "grad_norm": 0.4706309591872336, "learning_rate": 5.43595725733578e-06, "loss": 0.4019, "step": 15924 }, { "epoch": 0.4880777246536717, "grad_norm": 1.2247301827313326, "learning_rate": 5.435462827540466e-06, "loss": 0.7037, "step": 15925 }, { "epoch": 0.4881083731764129, "grad_norm": 1.1091193597181843, "learning_rate": 5.4349683934544294e-06, "loss": 0.5576, "step": 15926 }, { "epoch": 0.4881390216991541, "grad_norm": 1.2374959267675385, "learning_rate": 5.434473955082534e-06, "loss": 0.683, "step": 15927 }, { "epoch": 0.4881696702218953, "grad_norm": 1.45197741688925, "learning_rate": 5.433979512429658e-06, "loss": 0.7395, "step": 15928 }, { "epoch": 0.4882003187446365, "grad_norm": 1.1965194379142179, "learning_rate": 5.4334850655006686e-06, "loss": 0.6074, "step": 15929 }, { "epoch": 0.4882309672673777, "grad_norm": 1.2649803535791038, "learning_rate": 5.432990614300442e-06, "loss": 0.6762, "step": 15930 }, { "epoch": 0.4882616157901189, "grad_norm": 0.4737032441126137, "learning_rate": 5.432496158833846e-06, "loss": 0.4112, "step": 15931 }, { "epoch": 0.4882922643128601, "grad_norm": 1.1908046205468585, "learning_rate": 5.432001699105756e-06, "loss": 0.6642, "step": 15932 }, { "epoch": 0.4883229128356013, "grad_norm": 1.2616072492128685, "learning_rate": 5.431507235121043e-06, "loss": 0.5606, "step": 15933 }, { "epoch": 0.4883535613583425, "grad_norm": 1.3542529384962048, "learning_rate": 5.4310127668845795e-06, "loss": 0.5549, "step": 15934 }, { "epoch": 0.4883842098810837, "grad_norm": 0.45269020310757657, "learning_rate": 5.430518294401236e-06, "loss": 0.4104, "step": 15935 }, { "epoch": 0.4884148584038249, "grad_norm": 1.2401955909416627, "learning_rate": 5.430023817675883e-06, "loss": 0.7347, "step": 15936 }, { "epoch": 0.48844550692656613, "grad_norm": 1.1916613193986056, "learning_rate": 5.429529336713399e-06, "loss": 0.533, "step": 15937 }, { "epoch": 0.48847615544930734, "grad_norm": 1.2601341528599392, "learning_rate": 5.429034851518652e-06, "loss": 0.6228, "step": 15938 }, { "epoch": 0.48850680397204854, "grad_norm": 1.2567211066559207, "learning_rate": 5.428540362096514e-06, "loss": 0.5588, "step": 15939 }, { "epoch": 0.48853745249478975, "grad_norm": 0.4495935702470606, "learning_rate": 5.428045868451858e-06, "loss": 0.4081, "step": 15940 }, { "epoch": 0.48856810101753095, "grad_norm": 1.1690651963836756, "learning_rate": 5.427551370589558e-06, "loss": 0.6597, "step": 15941 }, { "epoch": 0.48859874954027216, "grad_norm": 1.2619526680909474, "learning_rate": 5.4270568685144835e-06, "loss": 0.756, "step": 15942 }, { "epoch": 0.48862939806301336, "grad_norm": 1.3160070534869186, "learning_rate": 5.426562362231509e-06, "loss": 0.7739, "step": 15943 }, { "epoch": 0.48866004658575457, "grad_norm": 1.3542997677222788, "learning_rate": 5.426067851745504e-06, "loss": 0.6721, "step": 15944 }, { "epoch": 0.4886906951084958, "grad_norm": 1.2564781568036254, "learning_rate": 5.425573337061346e-06, "loss": 0.6693, "step": 15945 }, { "epoch": 0.488721343631237, "grad_norm": 1.3736014811141386, "learning_rate": 5.425078818183905e-06, "loss": 0.7448, "step": 15946 }, { "epoch": 0.4887519921539782, "grad_norm": 1.2754928329052988, "learning_rate": 5.424584295118053e-06, "loss": 0.5463, "step": 15947 }, { "epoch": 0.4887826406767194, "grad_norm": 1.3754519556481992, "learning_rate": 5.424089767868663e-06, "loss": 0.6576, "step": 15948 }, { "epoch": 0.4888132891994606, "grad_norm": 1.1285897845875417, "learning_rate": 5.42359523644061e-06, "loss": 0.6177, "step": 15949 }, { "epoch": 0.4888439377222018, "grad_norm": 0.47074466561308725, "learning_rate": 5.423100700838763e-06, "loss": 0.4206, "step": 15950 }, { "epoch": 0.488874586244943, "grad_norm": 1.250835655341624, "learning_rate": 5.422606161067996e-06, "loss": 0.5699, "step": 15951 }, { "epoch": 0.4889052347676842, "grad_norm": 0.48901646094983403, "learning_rate": 5.4221116171331835e-06, "loss": 0.4301, "step": 15952 }, { "epoch": 0.4889358832904254, "grad_norm": 1.356552639873263, "learning_rate": 5.421617069039198e-06, "loss": 0.6773, "step": 15953 }, { "epoch": 0.4889665318131666, "grad_norm": 1.2736604697366523, "learning_rate": 5.42112251679091e-06, "loss": 0.6694, "step": 15954 }, { "epoch": 0.48899718033590783, "grad_norm": 1.125950386339341, "learning_rate": 5.420627960393194e-06, "loss": 0.641, "step": 15955 }, { "epoch": 0.48902782885864904, "grad_norm": 1.3147230828038707, "learning_rate": 5.420133399850924e-06, "loss": 0.6128, "step": 15956 }, { "epoch": 0.48905847738139024, "grad_norm": 1.263351459232809, "learning_rate": 5.419638835168972e-06, "loss": 0.7334, "step": 15957 }, { "epoch": 0.4890891259041314, "grad_norm": 1.209390030612223, "learning_rate": 5.419144266352211e-06, "loss": 0.6482, "step": 15958 }, { "epoch": 0.4891197744268726, "grad_norm": 1.2188383352695622, "learning_rate": 5.418649693405514e-06, "loss": 0.7012, "step": 15959 }, { "epoch": 0.4891504229496138, "grad_norm": 1.2960163973847862, "learning_rate": 5.418155116333755e-06, "loss": 0.7489, "step": 15960 }, { "epoch": 0.489181071472355, "grad_norm": 1.3341115061170805, "learning_rate": 5.417660535141806e-06, "loss": 0.6748, "step": 15961 }, { "epoch": 0.4892117199950962, "grad_norm": 1.1481428917310175, "learning_rate": 5.417165949834542e-06, "loss": 0.6703, "step": 15962 }, { "epoch": 0.4892423685178374, "grad_norm": 1.3432970332061214, "learning_rate": 5.416671360416834e-06, "loss": 0.6838, "step": 15963 }, { "epoch": 0.48927301704057863, "grad_norm": 1.2403356106505214, "learning_rate": 5.416176766893556e-06, "loss": 0.6078, "step": 15964 }, { "epoch": 0.48930366556331983, "grad_norm": 0.5260602436138919, "learning_rate": 5.415682169269585e-06, "loss": 0.4244, "step": 15965 }, { "epoch": 0.48933431408606104, "grad_norm": 1.2396561163290578, "learning_rate": 5.415187567549788e-06, "loss": 0.62, "step": 15966 }, { "epoch": 0.48936496260880225, "grad_norm": 1.2341264856694705, "learning_rate": 5.414692961739043e-06, "loss": 0.5298, "step": 15967 }, { "epoch": 0.48939561113154345, "grad_norm": 1.26854759985442, "learning_rate": 5.414198351842223e-06, "loss": 0.6789, "step": 15968 }, { "epoch": 0.48942625965428466, "grad_norm": 1.2985575292412022, "learning_rate": 5.413703737864199e-06, "loss": 0.4744, "step": 15969 }, { "epoch": 0.48945690817702586, "grad_norm": 1.131073795403025, "learning_rate": 5.4132091198098455e-06, "loss": 0.62, "step": 15970 }, { "epoch": 0.48948755669976707, "grad_norm": 0.47763848543523524, "learning_rate": 5.412714497684039e-06, "loss": 0.4093, "step": 15971 }, { "epoch": 0.4895182052225083, "grad_norm": 1.0510299694979892, "learning_rate": 5.4122198714916495e-06, "loss": 0.647, "step": 15972 }, { "epoch": 0.4895488537452495, "grad_norm": 1.2268279355412977, "learning_rate": 5.411725241237552e-06, "loss": 0.7209, "step": 15973 }, { "epoch": 0.4895795022679907, "grad_norm": 1.3429734471307764, "learning_rate": 5.411230606926622e-06, "loss": 0.5636, "step": 15974 }, { "epoch": 0.4896101507907319, "grad_norm": 1.2337937408093114, "learning_rate": 5.41073596856373e-06, "loss": 0.6641, "step": 15975 }, { "epoch": 0.4896407993134731, "grad_norm": 0.48336879818057477, "learning_rate": 5.410241326153753e-06, "loss": 0.4222, "step": 15976 }, { "epoch": 0.4896714478362143, "grad_norm": 0.48036631939219776, "learning_rate": 5.4097466797015615e-06, "loss": 0.4346, "step": 15977 }, { "epoch": 0.4897020963589555, "grad_norm": 1.0826969528910002, "learning_rate": 5.409252029212032e-06, "loss": 0.578, "step": 15978 }, { "epoch": 0.4897327448816967, "grad_norm": 1.2960723648703907, "learning_rate": 5.408757374690037e-06, "loss": 0.6379, "step": 15979 }, { "epoch": 0.4897633934044379, "grad_norm": 1.25428410649848, "learning_rate": 5.408262716140452e-06, "loss": 0.6054, "step": 15980 }, { "epoch": 0.4897940419271791, "grad_norm": 0.4496338602011746, "learning_rate": 5.407768053568148e-06, "loss": 0.4125, "step": 15981 }, { "epoch": 0.48982469044992033, "grad_norm": 1.4254571164875665, "learning_rate": 5.407273386978003e-06, "loss": 0.6154, "step": 15982 }, { "epoch": 0.48985533897266154, "grad_norm": 1.2905024122725235, "learning_rate": 5.406778716374888e-06, "loss": 0.6489, "step": 15983 }, { "epoch": 0.48988598749540274, "grad_norm": 1.2988539104714465, "learning_rate": 5.40628404176368e-06, "loss": 0.7262, "step": 15984 }, { "epoch": 0.48991663601814395, "grad_norm": 0.4565901572135249, "learning_rate": 5.405789363149251e-06, "loss": 0.4015, "step": 15985 }, { "epoch": 0.48994728454088515, "grad_norm": 1.3122151283865997, "learning_rate": 5.405294680536475e-06, "loss": 0.746, "step": 15986 }, { "epoch": 0.48997793306362636, "grad_norm": 1.4177000495216483, "learning_rate": 5.404799993930226e-06, "loss": 0.6102, "step": 15987 }, { "epoch": 0.49000858158636756, "grad_norm": 1.1577969143210087, "learning_rate": 5.404305303335379e-06, "loss": 0.5312, "step": 15988 }, { "epoch": 0.4900392301091087, "grad_norm": 1.2502969861394997, "learning_rate": 5.40381060875681e-06, "loss": 0.6847, "step": 15989 }, { "epoch": 0.4900698786318499, "grad_norm": 1.1851569923998295, "learning_rate": 5.403315910199389e-06, "loss": 0.6418, "step": 15990 }, { "epoch": 0.4901005271545911, "grad_norm": 1.2567141217037938, "learning_rate": 5.402821207667998e-06, "loss": 0.656, "step": 15991 }, { "epoch": 0.49013117567733233, "grad_norm": 0.45133409256373314, "learning_rate": 5.402326501167502e-06, "loss": 0.4008, "step": 15992 }, { "epoch": 0.49016182420007354, "grad_norm": 1.24856692042237, "learning_rate": 5.4018317907027816e-06, "loss": 0.6254, "step": 15993 }, { "epoch": 0.49019247272281474, "grad_norm": 1.208535315590901, "learning_rate": 5.401337076278709e-06, "loss": 0.6572, "step": 15994 }, { "epoch": 0.49022312124555595, "grad_norm": 0.46502258879512814, "learning_rate": 5.400842357900161e-06, "loss": 0.4339, "step": 15995 }, { "epoch": 0.49025376976829715, "grad_norm": 1.3486447140434756, "learning_rate": 5.40034763557201e-06, "loss": 0.7307, "step": 15996 }, { "epoch": 0.49028441829103836, "grad_norm": 1.326727661919417, "learning_rate": 5.399852909299131e-06, "loss": 0.5801, "step": 15997 }, { "epoch": 0.49031506681377957, "grad_norm": 1.3387044305948754, "learning_rate": 5.399358179086399e-06, "loss": 0.7319, "step": 15998 }, { "epoch": 0.49034571533652077, "grad_norm": 0.42862129126857285, "learning_rate": 5.398863444938689e-06, "loss": 0.3941, "step": 15999 }, { "epoch": 0.490376363859262, "grad_norm": 1.3268624571375858, "learning_rate": 5.398368706860876e-06, "loss": 0.6513, "step": 16000 }, { "epoch": 0.4904070123820032, "grad_norm": 1.1967728444111725, "learning_rate": 5.397873964857833e-06, "loss": 0.6687, "step": 16001 }, { "epoch": 0.4904376609047444, "grad_norm": 1.0747577246783155, "learning_rate": 5.3973792189344366e-06, "loss": 0.6199, "step": 16002 }, { "epoch": 0.4904683094274856, "grad_norm": 1.2382573675777122, "learning_rate": 5.396884469095562e-06, "loss": 0.5993, "step": 16003 }, { "epoch": 0.4904989579502268, "grad_norm": 1.2727062915379679, "learning_rate": 5.396389715346082e-06, "loss": 0.7041, "step": 16004 }, { "epoch": 0.490529606472968, "grad_norm": 1.3523893993850782, "learning_rate": 5.395894957690871e-06, "loss": 0.654, "step": 16005 }, { "epoch": 0.4905602549957092, "grad_norm": 1.2253407119293735, "learning_rate": 5.395400196134809e-06, "loss": 0.6052, "step": 16006 }, { "epoch": 0.4905909035184504, "grad_norm": 1.2665758404446628, "learning_rate": 5.394905430682766e-06, "loss": 0.6502, "step": 16007 }, { "epoch": 0.4906215520411916, "grad_norm": 1.1708833160386114, "learning_rate": 5.3944106613396196e-06, "loss": 0.629, "step": 16008 }, { "epoch": 0.4906522005639328, "grad_norm": 1.1921038713833498, "learning_rate": 5.393915888110242e-06, "loss": 0.6552, "step": 16009 }, { "epoch": 0.49068284908667403, "grad_norm": 1.1830328337090208, "learning_rate": 5.393421110999513e-06, "loss": 0.5085, "step": 16010 }, { "epoch": 0.49071349760941524, "grad_norm": 1.2450185249478285, "learning_rate": 5.392926330012305e-06, "loss": 0.652, "step": 16011 }, { "epoch": 0.49074414613215644, "grad_norm": 0.4645557415731363, "learning_rate": 5.3924315451534915e-06, "loss": 0.3947, "step": 16012 }, { "epoch": 0.49077479465489765, "grad_norm": 1.2940340521846607, "learning_rate": 5.39193675642795e-06, "loss": 0.6673, "step": 16013 }, { "epoch": 0.49080544317763886, "grad_norm": 1.372337985421881, "learning_rate": 5.391441963840556e-06, "loss": 0.5468, "step": 16014 }, { "epoch": 0.49083609170038006, "grad_norm": 1.3491027886067413, "learning_rate": 5.3909471673961844e-06, "loss": 0.7125, "step": 16015 }, { "epoch": 0.49086674022312127, "grad_norm": 1.2049927821242616, "learning_rate": 5.3904523670997085e-06, "loss": 0.6644, "step": 16016 }, { "epoch": 0.4908973887458625, "grad_norm": 0.4468189619121554, "learning_rate": 5.389957562956007e-06, "loss": 0.4025, "step": 16017 }, { "epoch": 0.4909280372686037, "grad_norm": 1.2098509659837775, "learning_rate": 5.389462754969955e-06, "loss": 0.6299, "step": 16018 }, { "epoch": 0.4909586857913449, "grad_norm": 0.464785193552192, "learning_rate": 5.388967943146426e-06, "loss": 0.4124, "step": 16019 }, { "epoch": 0.49098933431408603, "grad_norm": 1.3712631471808137, "learning_rate": 5.388473127490295e-06, "loss": 0.6935, "step": 16020 }, { "epoch": 0.49101998283682724, "grad_norm": 1.2225247782238857, "learning_rate": 5.3879783080064396e-06, "loss": 0.6139, "step": 16021 }, { "epoch": 0.49105063135956845, "grad_norm": 1.4591651693480292, "learning_rate": 5.387483484699736e-06, "loss": 0.7101, "step": 16022 }, { "epoch": 0.49108127988230965, "grad_norm": 0.4428137767342585, "learning_rate": 5.3869886575750575e-06, "loss": 0.3968, "step": 16023 }, { "epoch": 0.49111192840505086, "grad_norm": 1.3874444783246869, "learning_rate": 5.386493826637279e-06, "loss": 0.6963, "step": 16024 }, { "epoch": 0.49114257692779206, "grad_norm": 0.4419188672891243, "learning_rate": 5.38599899189128e-06, "loss": 0.3993, "step": 16025 }, { "epoch": 0.49117322545053327, "grad_norm": 0.4240747954956876, "learning_rate": 5.385504153341934e-06, "loss": 0.4046, "step": 16026 }, { "epoch": 0.4912038739732745, "grad_norm": 1.181134746353423, "learning_rate": 5.385009310994116e-06, "loss": 0.5456, "step": 16027 }, { "epoch": 0.4912345224960157, "grad_norm": 1.1151791543195886, "learning_rate": 5.384514464852704e-06, "loss": 0.6277, "step": 16028 }, { "epoch": 0.4912651710187569, "grad_norm": 1.133421344395637, "learning_rate": 5.384019614922572e-06, "loss": 0.6666, "step": 16029 }, { "epoch": 0.4912958195414981, "grad_norm": 1.325619066404707, "learning_rate": 5.383524761208597e-06, "loss": 0.7087, "step": 16030 }, { "epoch": 0.4913264680642393, "grad_norm": 1.1466336845647271, "learning_rate": 5.383029903715653e-06, "loss": 0.6624, "step": 16031 }, { "epoch": 0.4913571165869805, "grad_norm": 1.346284410951564, "learning_rate": 5.382535042448619e-06, "loss": 0.5971, "step": 16032 }, { "epoch": 0.4913877651097217, "grad_norm": 1.7250241448272714, "learning_rate": 5.382040177412368e-06, "loss": 0.6839, "step": 16033 }, { "epoch": 0.4914184136324629, "grad_norm": 1.4549147943717762, "learning_rate": 5.3815453086117785e-06, "loss": 0.6917, "step": 16034 }, { "epoch": 0.4914490621552041, "grad_norm": 0.45305207290277816, "learning_rate": 5.381050436051724e-06, "loss": 0.4025, "step": 16035 }, { "epoch": 0.4914797106779453, "grad_norm": 1.2459274746371325, "learning_rate": 5.380555559737084e-06, "loss": 0.6992, "step": 16036 }, { "epoch": 0.49151035920068653, "grad_norm": 0.47446335424161307, "learning_rate": 5.38006067967273e-06, "loss": 0.4038, "step": 16037 }, { "epoch": 0.49154100772342774, "grad_norm": 1.1622669386176079, "learning_rate": 5.379565795863545e-06, "loss": 0.6459, "step": 16038 }, { "epoch": 0.49157165624616894, "grad_norm": 1.2047852309535507, "learning_rate": 5.379070908314398e-06, "loss": 0.6301, "step": 16039 }, { "epoch": 0.49160230476891015, "grad_norm": 0.44266657787104774, "learning_rate": 5.378576017030168e-06, "loss": 0.4108, "step": 16040 }, { "epoch": 0.49163295329165135, "grad_norm": 0.431035999092536, "learning_rate": 5.378081122015733e-06, "loss": 0.3966, "step": 16041 }, { "epoch": 0.49166360181439256, "grad_norm": 0.4614766952642994, "learning_rate": 5.377586223275968e-06, "loss": 0.401, "step": 16042 }, { "epoch": 0.49169425033713376, "grad_norm": 1.2942794715930297, "learning_rate": 5.377091320815748e-06, "loss": 0.6361, "step": 16043 }, { "epoch": 0.49172489885987497, "grad_norm": 1.2131854764162846, "learning_rate": 5.376596414639952e-06, "loss": 0.6564, "step": 16044 }, { "epoch": 0.4917555473826162, "grad_norm": 0.4633162623288715, "learning_rate": 5.376101504753456e-06, "loss": 0.4235, "step": 16045 }, { "epoch": 0.4917861959053574, "grad_norm": 0.4513262538875228, "learning_rate": 5.375606591161133e-06, "loss": 0.3956, "step": 16046 }, { "epoch": 0.4918168444280986, "grad_norm": 1.2381657480875, "learning_rate": 5.375111673867865e-06, "loss": 0.6647, "step": 16047 }, { "epoch": 0.4918474929508398, "grad_norm": 1.2033077212723522, "learning_rate": 5.374616752878523e-06, "loss": 0.6269, "step": 16048 }, { "epoch": 0.491878141473581, "grad_norm": 0.44368434855014377, "learning_rate": 5.374121828197989e-06, "loss": 0.4252, "step": 16049 }, { "epoch": 0.4919087899963222, "grad_norm": 0.4533388270658875, "learning_rate": 5.373626899831135e-06, "loss": 0.4179, "step": 16050 }, { "epoch": 0.49193943851906335, "grad_norm": 0.4198276876454124, "learning_rate": 5.37313196778284e-06, "loss": 0.4401, "step": 16051 }, { "epoch": 0.49197008704180456, "grad_norm": 1.2798925257640335, "learning_rate": 5.37263703205798e-06, "loss": 0.6166, "step": 16052 }, { "epoch": 0.49200073556454577, "grad_norm": 1.48363022474988, "learning_rate": 5.372142092661432e-06, "loss": 0.7675, "step": 16053 }, { "epoch": 0.49203138408728697, "grad_norm": 1.1800050353277567, "learning_rate": 5.371647149598074e-06, "loss": 0.6095, "step": 16054 }, { "epoch": 0.4920620326100282, "grad_norm": 0.4540305574727316, "learning_rate": 5.371152202872781e-06, "loss": 0.4266, "step": 16055 }, { "epoch": 0.4920926811327694, "grad_norm": 1.3063999305017173, "learning_rate": 5.370657252490429e-06, "loss": 0.6187, "step": 16056 }, { "epoch": 0.4921233296555106, "grad_norm": 1.313918919496139, "learning_rate": 5.370162298455898e-06, "loss": 0.6531, "step": 16057 }, { "epoch": 0.4921539781782518, "grad_norm": 1.1131500525722722, "learning_rate": 5.369667340774062e-06, "loss": 0.6132, "step": 16058 }, { "epoch": 0.492184626700993, "grad_norm": 1.2226769901824934, "learning_rate": 5.369172379449798e-06, "loss": 0.7198, "step": 16059 }, { "epoch": 0.4922152752237342, "grad_norm": 1.2102132725116281, "learning_rate": 5.368677414487987e-06, "loss": 0.7098, "step": 16060 }, { "epoch": 0.4922459237464754, "grad_norm": 1.3624421316560962, "learning_rate": 5.3681824458935015e-06, "loss": 0.6815, "step": 16061 }, { "epoch": 0.4922765722692166, "grad_norm": 1.2714736537944846, "learning_rate": 5.367687473671221e-06, "loss": 0.6501, "step": 16062 }, { "epoch": 0.4923072207919578, "grad_norm": 1.1365849492167472, "learning_rate": 5.36719249782602e-06, "loss": 0.6584, "step": 16063 }, { "epoch": 0.49233786931469903, "grad_norm": 1.1215499687138752, "learning_rate": 5.366697518362779e-06, "loss": 0.6394, "step": 16064 }, { "epoch": 0.49236851783744023, "grad_norm": 1.329549380865472, "learning_rate": 5.366202535286373e-06, "loss": 0.6653, "step": 16065 }, { "epoch": 0.49239916636018144, "grad_norm": 1.2049150222825868, "learning_rate": 5.3657075486016805e-06, "loss": 0.521, "step": 16066 }, { "epoch": 0.49242981488292265, "grad_norm": 0.46509021009565676, "learning_rate": 5.365212558313576e-06, "loss": 0.3978, "step": 16067 }, { "epoch": 0.49246046340566385, "grad_norm": 1.4015752003126072, "learning_rate": 5.3647175644269404e-06, "loss": 0.6686, "step": 16068 }, { "epoch": 0.49249111192840506, "grad_norm": 1.2848031827810116, "learning_rate": 5.364222566946649e-06, "loss": 0.6712, "step": 16069 }, { "epoch": 0.49252176045114626, "grad_norm": 1.2584654724845294, "learning_rate": 5.363727565877579e-06, "loss": 0.6774, "step": 16070 }, { "epoch": 0.49255240897388747, "grad_norm": 0.44810960772091324, "learning_rate": 5.363232561224608e-06, "loss": 0.4049, "step": 16071 }, { "epoch": 0.4925830574966287, "grad_norm": 1.3563598372187327, "learning_rate": 5.362737552992615e-06, "loss": 0.5899, "step": 16072 }, { "epoch": 0.4926137060193699, "grad_norm": 1.2926077712333035, "learning_rate": 5.362242541186475e-06, "loss": 0.6178, "step": 16073 }, { "epoch": 0.4926443545421111, "grad_norm": 1.124748350876844, "learning_rate": 5.361747525811066e-06, "loss": 0.6158, "step": 16074 }, { "epoch": 0.4926750030648523, "grad_norm": 1.1534872742056912, "learning_rate": 5.3612525068712675e-06, "loss": 0.5418, "step": 16075 }, { "epoch": 0.4927056515875935, "grad_norm": 1.3042277925534609, "learning_rate": 5.360757484371956e-06, "loss": 0.7309, "step": 16076 }, { "epoch": 0.4927363001103347, "grad_norm": 1.3208745955012298, "learning_rate": 5.360262458318008e-06, "loss": 0.6289, "step": 16077 }, { "epoch": 0.4927669486330759, "grad_norm": 1.2232687227836458, "learning_rate": 5.359767428714299e-06, "loss": 0.6981, "step": 16078 }, { "epoch": 0.4927975971558171, "grad_norm": 1.2840078396576484, "learning_rate": 5.359272395565713e-06, "loss": 0.7657, "step": 16079 }, { "epoch": 0.4928282456785583, "grad_norm": 1.2164255922426455, "learning_rate": 5.358777358877124e-06, "loss": 0.6698, "step": 16080 }, { "epoch": 0.4928588942012995, "grad_norm": 1.1797152673613172, "learning_rate": 5.358282318653409e-06, "loss": 0.5317, "step": 16081 }, { "epoch": 0.4928895427240407, "grad_norm": 1.3227293759050944, "learning_rate": 5.3577872748994465e-06, "loss": 0.6102, "step": 16082 }, { "epoch": 0.4929201912467819, "grad_norm": 1.1423323492888797, "learning_rate": 5.357292227620115e-06, "loss": 0.5747, "step": 16083 }, { "epoch": 0.4929508397695231, "grad_norm": 0.5140890253710346, "learning_rate": 5.356797176820291e-06, "loss": 0.4037, "step": 16084 }, { "epoch": 0.4929814882922643, "grad_norm": 1.2775281288399485, "learning_rate": 5.3563021225048525e-06, "loss": 0.6512, "step": 16085 }, { "epoch": 0.4930121368150055, "grad_norm": 1.2933525406591262, "learning_rate": 5.35580706467868e-06, "loss": 0.6963, "step": 16086 }, { "epoch": 0.4930427853377467, "grad_norm": 1.2463221696862965, "learning_rate": 5.355312003346648e-06, "loss": 0.6753, "step": 16087 }, { "epoch": 0.4930734338604879, "grad_norm": 1.1945434341947738, "learning_rate": 5.354816938513638e-06, "loss": 0.599, "step": 16088 }, { "epoch": 0.4931040823832291, "grad_norm": 0.478881804102715, "learning_rate": 5.354321870184522e-06, "loss": 0.4167, "step": 16089 }, { "epoch": 0.4931347309059703, "grad_norm": 1.1975178257846295, "learning_rate": 5.3538267983641855e-06, "loss": 0.6053, "step": 16090 }, { "epoch": 0.4931653794287115, "grad_norm": 1.1394852276137677, "learning_rate": 5.353331723057501e-06, "loss": 0.6255, "step": 16091 }, { "epoch": 0.49319602795145273, "grad_norm": 1.2721742570484769, "learning_rate": 5.35283664426935e-06, "loss": 0.5522, "step": 16092 }, { "epoch": 0.49322667647419394, "grad_norm": 1.4519768005843612, "learning_rate": 5.352341562004608e-06, "loss": 0.5632, "step": 16093 }, { "epoch": 0.49325732499693514, "grad_norm": 1.2683205938833846, "learning_rate": 5.351846476268157e-06, "loss": 0.7126, "step": 16094 }, { "epoch": 0.49328797351967635, "grad_norm": 1.318644652036036, "learning_rate": 5.35135138706487e-06, "loss": 0.6449, "step": 16095 }, { "epoch": 0.49331862204241755, "grad_norm": 1.1522445157192114, "learning_rate": 5.3508562943996275e-06, "loss": 0.6184, "step": 16096 }, { "epoch": 0.49334927056515876, "grad_norm": 1.1851839799608561, "learning_rate": 5.35036119827731e-06, "loss": 0.7524, "step": 16097 }, { "epoch": 0.49337991908789997, "grad_norm": 1.3290358704275125, "learning_rate": 5.349866098702792e-06, "loss": 0.6941, "step": 16098 }, { "epoch": 0.49341056761064117, "grad_norm": 1.229142053478196, "learning_rate": 5.349370995680957e-06, "loss": 0.6557, "step": 16099 }, { "epoch": 0.4934412161333824, "grad_norm": 1.3514007119172602, "learning_rate": 5.3488758892166785e-06, "loss": 0.6315, "step": 16100 }, { "epoch": 0.4934718646561236, "grad_norm": 1.2950588201497752, "learning_rate": 5.3483807793148355e-06, "loss": 0.5958, "step": 16101 }, { "epoch": 0.4935025131788648, "grad_norm": 0.474393443135653, "learning_rate": 5.347885665980308e-06, "loss": 0.4218, "step": 16102 }, { "epoch": 0.493533161701606, "grad_norm": 1.4173162012946894, "learning_rate": 5.347390549217976e-06, "loss": 0.6207, "step": 16103 }, { "epoch": 0.4935638102243472, "grad_norm": 1.2615880992378692, "learning_rate": 5.346895429032714e-06, "loss": 0.5593, "step": 16104 }, { "epoch": 0.4935944587470884, "grad_norm": 0.46940341884766695, "learning_rate": 5.346400305429403e-06, "loss": 0.4213, "step": 16105 }, { "epoch": 0.4936251072698296, "grad_norm": 0.4449600995519053, "learning_rate": 5.34590517841292e-06, "loss": 0.4074, "step": 16106 }, { "epoch": 0.4936557557925708, "grad_norm": 1.1123582759393273, "learning_rate": 5.345410047988148e-06, "loss": 0.5092, "step": 16107 }, { "epoch": 0.493686404315312, "grad_norm": 1.2798435661889167, "learning_rate": 5.34491491415996e-06, "loss": 0.5965, "step": 16108 }, { "epoch": 0.4937170528380532, "grad_norm": 1.4249307001362286, "learning_rate": 5.344419776933237e-06, "loss": 0.5824, "step": 16109 }, { "epoch": 0.49374770136079443, "grad_norm": 1.316966684334518, "learning_rate": 5.343924636312858e-06, "loss": 0.5848, "step": 16110 }, { "epoch": 0.49377834988353564, "grad_norm": 1.300057112979959, "learning_rate": 5.343429492303702e-06, "loss": 0.6889, "step": 16111 }, { "epoch": 0.49380899840627684, "grad_norm": 1.1093693975540002, "learning_rate": 5.342934344910648e-06, "loss": 0.6356, "step": 16112 }, { "epoch": 0.493839646929018, "grad_norm": 1.2920486572330563, "learning_rate": 5.3424391941385724e-06, "loss": 0.6107, "step": 16113 }, { "epoch": 0.4938702954517592, "grad_norm": 0.49014031699450933, "learning_rate": 5.341944039992357e-06, "loss": 0.428, "step": 16114 }, { "epoch": 0.4939009439745004, "grad_norm": 0.4929470565408547, "learning_rate": 5.34144888247688e-06, "loss": 0.4273, "step": 16115 }, { "epoch": 0.4939315924972416, "grad_norm": 1.2069470648754868, "learning_rate": 5.340953721597019e-06, "loss": 0.7081, "step": 16116 }, { "epoch": 0.4939622410199828, "grad_norm": 1.4078376585583268, "learning_rate": 5.340458557357653e-06, "loss": 0.5914, "step": 16117 }, { "epoch": 0.493992889542724, "grad_norm": 1.176246503409468, "learning_rate": 5.339963389763663e-06, "loss": 0.5803, "step": 16118 }, { "epoch": 0.49402353806546523, "grad_norm": 1.1556671435846795, "learning_rate": 5.339468218819926e-06, "loss": 0.6103, "step": 16119 }, { "epoch": 0.49405418658820643, "grad_norm": 1.1583596450451035, "learning_rate": 5.338973044531323e-06, "loss": 0.5771, "step": 16120 }, { "epoch": 0.49408483511094764, "grad_norm": 1.1517228315962207, "learning_rate": 5.33847786690273e-06, "loss": 0.573, "step": 16121 }, { "epoch": 0.49411548363368885, "grad_norm": 1.1944477694347444, "learning_rate": 5.337982685939029e-06, "loss": 0.621, "step": 16122 }, { "epoch": 0.49414613215643005, "grad_norm": 1.2311699102859943, "learning_rate": 5.337487501645099e-06, "loss": 0.5339, "step": 16123 }, { "epoch": 0.49417678067917126, "grad_norm": 1.2113398717438775, "learning_rate": 5.3369923140258165e-06, "loss": 0.6795, "step": 16124 }, { "epoch": 0.49420742920191246, "grad_norm": 1.1838523265556904, "learning_rate": 5.336497123086063e-06, "loss": 0.6502, "step": 16125 }, { "epoch": 0.49423807772465367, "grad_norm": 1.420780208331047, "learning_rate": 5.336001928830719e-06, "loss": 0.634, "step": 16126 }, { "epoch": 0.4942687262473949, "grad_norm": 1.306558412322651, "learning_rate": 5.3355067312646605e-06, "loss": 0.6002, "step": 16127 }, { "epoch": 0.4942993747701361, "grad_norm": 1.3624642508710652, "learning_rate": 5.335011530392767e-06, "loss": 0.6559, "step": 16128 }, { "epoch": 0.4943300232928773, "grad_norm": 1.2904554365002543, "learning_rate": 5.334516326219921e-06, "loss": 0.7193, "step": 16129 }, { "epoch": 0.4943606718156185, "grad_norm": 1.2992433251404123, "learning_rate": 5.334021118751e-06, "loss": 0.7292, "step": 16130 }, { "epoch": 0.4943913203383597, "grad_norm": 1.126850902724334, "learning_rate": 5.3335259079908845e-06, "loss": 0.5218, "step": 16131 }, { "epoch": 0.4944219688611009, "grad_norm": 1.224111907978385, "learning_rate": 5.33303069394445e-06, "loss": 0.6236, "step": 16132 }, { "epoch": 0.4944526173838421, "grad_norm": 1.4724902136842333, "learning_rate": 5.33253547661658e-06, "loss": 0.6718, "step": 16133 }, { "epoch": 0.4944832659065833, "grad_norm": 1.2232859674012706, "learning_rate": 5.332040256012154e-06, "loss": 0.6453, "step": 16134 }, { "epoch": 0.4945139144293245, "grad_norm": 1.1635946548420377, "learning_rate": 5.331545032136049e-06, "loss": 0.6203, "step": 16135 }, { "epoch": 0.4945445629520657, "grad_norm": 1.2389322677384014, "learning_rate": 5.331049804993147e-06, "loss": 0.6961, "step": 16136 }, { "epoch": 0.49457521147480693, "grad_norm": 1.3448934492262412, "learning_rate": 5.330554574588327e-06, "loss": 0.7023, "step": 16137 }, { "epoch": 0.49460585999754814, "grad_norm": 1.3216268574294772, "learning_rate": 5.330059340926466e-06, "loss": 0.7087, "step": 16138 }, { "epoch": 0.49463650852028934, "grad_norm": 1.2129604517526986, "learning_rate": 5.329564104012448e-06, "loss": 0.6282, "step": 16139 }, { "epoch": 0.49466715704303055, "grad_norm": 0.520659239656474, "learning_rate": 5.32906886385115e-06, "loss": 0.4246, "step": 16140 }, { "epoch": 0.49469780556577175, "grad_norm": 1.1867472486332225, "learning_rate": 5.328573620447452e-06, "loss": 0.6044, "step": 16141 }, { "epoch": 0.49472845408851296, "grad_norm": 1.2958669800822389, "learning_rate": 5.328078373806235e-06, "loss": 0.5375, "step": 16142 }, { "epoch": 0.49475910261125416, "grad_norm": 1.2907126969189684, "learning_rate": 5.327583123932376e-06, "loss": 0.5902, "step": 16143 }, { "epoch": 0.4947897511339953, "grad_norm": 0.47689861314683446, "learning_rate": 5.327087870830757e-06, "loss": 0.4189, "step": 16144 }, { "epoch": 0.4948203996567365, "grad_norm": 1.2486603889233727, "learning_rate": 5.3265926145062585e-06, "loss": 0.614, "step": 16145 }, { "epoch": 0.4948510481794777, "grad_norm": 1.1170433916156792, "learning_rate": 5.326097354963759e-06, "loss": 0.6478, "step": 16146 }, { "epoch": 0.49488169670221893, "grad_norm": 1.3169325460253263, "learning_rate": 5.325602092208139e-06, "loss": 0.6931, "step": 16147 }, { "epoch": 0.49491234522496014, "grad_norm": 1.2931435546775174, "learning_rate": 5.325106826244278e-06, "loss": 0.5523, "step": 16148 }, { "epoch": 0.49494299374770134, "grad_norm": 1.1022268609095747, "learning_rate": 5.324611557077057e-06, "loss": 0.5587, "step": 16149 }, { "epoch": 0.49497364227044255, "grad_norm": 1.1852173266548984, "learning_rate": 5.324116284711355e-06, "loss": 0.6294, "step": 16150 }, { "epoch": 0.49500429079318375, "grad_norm": 1.1369032149987164, "learning_rate": 5.323621009152051e-06, "loss": 0.5812, "step": 16151 }, { "epoch": 0.49503493931592496, "grad_norm": 1.1460223812392936, "learning_rate": 5.323125730404029e-06, "loss": 0.5981, "step": 16152 }, { "epoch": 0.49506558783866617, "grad_norm": 1.2560958724218323, "learning_rate": 5.322630448472165e-06, "loss": 0.6069, "step": 16153 }, { "epoch": 0.49509623636140737, "grad_norm": 1.0871788202488646, "learning_rate": 5.322135163361339e-06, "loss": 0.6584, "step": 16154 }, { "epoch": 0.4951268848841486, "grad_norm": 1.1105421382071057, "learning_rate": 5.321639875076435e-06, "loss": 0.6468, "step": 16155 }, { "epoch": 0.4951575334068898, "grad_norm": 0.5481850890032949, "learning_rate": 5.32114458362233e-06, "loss": 0.39, "step": 16156 }, { "epoch": 0.495188181929631, "grad_norm": 1.351537340356075, "learning_rate": 5.320649289003906e-06, "loss": 0.668, "step": 16157 }, { "epoch": 0.4952188304523722, "grad_norm": 1.2551775301296597, "learning_rate": 5.3201539912260426e-06, "loss": 0.6683, "step": 16158 }, { "epoch": 0.4952494789751134, "grad_norm": 1.2366677216195598, "learning_rate": 5.31965869029362e-06, "loss": 0.639, "step": 16159 }, { "epoch": 0.4952801274978546, "grad_norm": 0.4803375931491924, "learning_rate": 5.319163386211517e-06, "loss": 0.3995, "step": 16160 }, { "epoch": 0.4953107760205958, "grad_norm": 1.3573096602955645, "learning_rate": 5.318668078984618e-06, "loss": 0.686, "step": 16161 }, { "epoch": 0.495341424543337, "grad_norm": 1.3008657951442912, "learning_rate": 5.318172768617798e-06, "loss": 0.6751, "step": 16162 }, { "epoch": 0.4953720730660782, "grad_norm": 1.3573201341832846, "learning_rate": 5.317677455115943e-06, "loss": 0.6406, "step": 16163 }, { "epoch": 0.49540272158881943, "grad_norm": 1.2534458342458008, "learning_rate": 5.3171821384839284e-06, "loss": 0.6672, "step": 16164 }, { "epoch": 0.49543337011156063, "grad_norm": 1.2245398944916748, "learning_rate": 5.316686818726639e-06, "loss": 0.6085, "step": 16165 }, { "epoch": 0.49546401863430184, "grad_norm": 1.2194240044463482, "learning_rate": 5.3161914958489525e-06, "loss": 0.6494, "step": 16166 }, { "epoch": 0.49549466715704304, "grad_norm": 1.301780327946272, "learning_rate": 5.3156961698557495e-06, "loss": 0.6277, "step": 16167 }, { "epoch": 0.49552531567978425, "grad_norm": 1.3019261977820638, "learning_rate": 5.315200840751912e-06, "loss": 0.6224, "step": 16168 }, { "epoch": 0.49555596420252546, "grad_norm": 1.2239507220065713, "learning_rate": 5.314705508542321e-06, "loss": 0.648, "step": 16169 }, { "epoch": 0.49558661272526666, "grad_norm": 0.5554122422851777, "learning_rate": 5.314210173231855e-06, "loss": 0.4166, "step": 16170 }, { "epoch": 0.49561726124800787, "grad_norm": 1.2128190552490328, "learning_rate": 5.313714834825395e-06, "loss": 0.6791, "step": 16171 }, { "epoch": 0.4956479097707491, "grad_norm": 1.3123004915102707, "learning_rate": 5.313219493327826e-06, "loss": 0.6694, "step": 16172 }, { "epoch": 0.4956785582934903, "grad_norm": 1.3664940744115812, "learning_rate": 5.312724148744022e-06, "loss": 0.702, "step": 16173 }, { "epoch": 0.4957092068162315, "grad_norm": 1.1373134889965866, "learning_rate": 5.312228801078867e-06, "loss": 0.6455, "step": 16174 }, { "epoch": 0.49573985533897263, "grad_norm": 1.1687915825188187, "learning_rate": 5.311733450337242e-06, "loss": 0.7334, "step": 16175 }, { "epoch": 0.49577050386171384, "grad_norm": 1.0877712054532542, "learning_rate": 5.311238096524027e-06, "loss": 0.7056, "step": 16176 }, { "epoch": 0.49580115238445505, "grad_norm": 1.175814477445147, "learning_rate": 5.310742739644106e-06, "loss": 0.6401, "step": 16177 }, { "epoch": 0.49583180090719625, "grad_norm": 1.2146109557740223, "learning_rate": 5.310247379702356e-06, "loss": 0.5391, "step": 16178 }, { "epoch": 0.49586244942993746, "grad_norm": 1.301328892469921, "learning_rate": 5.309752016703657e-06, "loss": 0.63, "step": 16179 }, { "epoch": 0.49589309795267866, "grad_norm": 1.2473687011033465, "learning_rate": 5.309256650652894e-06, "loss": 0.6285, "step": 16180 }, { "epoch": 0.49592374647541987, "grad_norm": 1.2430376654282411, "learning_rate": 5.3087612815549476e-06, "loss": 0.7064, "step": 16181 }, { "epoch": 0.4959543949981611, "grad_norm": 1.1700936342250248, "learning_rate": 5.308265909414694e-06, "loss": 0.63, "step": 16182 }, { "epoch": 0.4959850435209023, "grad_norm": 1.3256422835153223, "learning_rate": 5.3077705342370204e-06, "loss": 0.7215, "step": 16183 }, { "epoch": 0.4960156920436435, "grad_norm": 1.261846891036048, "learning_rate": 5.307275156026804e-06, "loss": 0.6714, "step": 16184 }, { "epoch": 0.4960463405663847, "grad_norm": 1.281277480568419, "learning_rate": 5.306779774788926e-06, "loss": 0.6821, "step": 16185 }, { "epoch": 0.4960769890891259, "grad_norm": 0.46186422946052164, "learning_rate": 5.306284390528269e-06, "loss": 0.4149, "step": 16186 }, { "epoch": 0.4961076376118671, "grad_norm": 1.1848644190943511, "learning_rate": 5.305789003249714e-06, "loss": 0.6662, "step": 16187 }, { "epoch": 0.4961382861346083, "grad_norm": 1.379833751873045, "learning_rate": 5.305293612958141e-06, "loss": 0.7095, "step": 16188 }, { "epoch": 0.4961689346573495, "grad_norm": 1.3278891336446677, "learning_rate": 5.304798219658433e-06, "loss": 0.6945, "step": 16189 }, { "epoch": 0.4961995831800907, "grad_norm": 1.2923592984693137, "learning_rate": 5.304302823355468e-06, "loss": 0.7192, "step": 16190 }, { "epoch": 0.4962302317028319, "grad_norm": 1.2883846365541505, "learning_rate": 5.303807424054131e-06, "loss": 0.6423, "step": 16191 }, { "epoch": 0.49626088022557313, "grad_norm": 1.4135074339149676, "learning_rate": 5.303312021759302e-06, "loss": 0.7726, "step": 16192 }, { "epoch": 0.49629152874831434, "grad_norm": 1.3416645430523466, "learning_rate": 5.302816616475861e-06, "loss": 0.6179, "step": 16193 }, { "epoch": 0.49632217727105554, "grad_norm": 1.3055770671870863, "learning_rate": 5.302321208208692e-06, "loss": 0.6647, "step": 16194 }, { "epoch": 0.49635282579379675, "grad_norm": 0.4837446717680484, "learning_rate": 5.301825796962672e-06, "loss": 0.4383, "step": 16195 }, { "epoch": 0.49638347431653795, "grad_norm": 1.3027456471429406, "learning_rate": 5.3013303827426885e-06, "loss": 0.6517, "step": 16196 }, { "epoch": 0.49641412283927916, "grad_norm": 0.44082659966612714, "learning_rate": 5.300834965553617e-06, "loss": 0.4116, "step": 16197 }, { "epoch": 0.49644477136202037, "grad_norm": 0.4357478637413419, "learning_rate": 5.300339545400344e-06, "loss": 0.4116, "step": 16198 }, { "epoch": 0.49647541988476157, "grad_norm": 1.2695279600496712, "learning_rate": 5.2998441222877475e-06, "loss": 0.7133, "step": 16199 }, { "epoch": 0.4965060684075028, "grad_norm": 0.45146736037928686, "learning_rate": 5.2993486962207095e-06, "loss": 0.44, "step": 16200 }, { "epoch": 0.496536716930244, "grad_norm": 1.2966454898729358, "learning_rate": 5.298853267204111e-06, "loss": 0.7036, "step": 16201 }, { "epoch": 0.4965673654529852, "grad_norm": 1.4685368500368972, "learning_rate": 5.298357835242838e-06, "loss": 0.6607, "step": 16202 }, { "epoch": 0.4965980139757264, "grad_norm": 1.128452978171795, "learning_rate": 5.297862400341768e-06, "loss": 0.58, "step": 16203 }, { "epoch": 0.4966286624984676, "grad_norm": 1.0083630647103692, "learning_rate": 5.2973669625057825e-06, "loss": 0.5048, "step": 16204 }, { "epoch": 0.4966593110212088, "grad_norm": 1.2934070431508986, "learning_rate": 5.296871521739763e-06, "loss": 0.6445, "step": 16205 }, { "epoch": 0.49668995954394995, "grad_norm": 1.2640905487158898, "learning_rate": 5.296376078048595e-06, "loss": 0.5892, "step": 16206 }, { "epoch": 0.49672060806669116, "grad_norm": 1.390512913547861, "learning_rate": 5.295880631437158e-06, "loss": 0.6598, "step": 16207 }, { "epoch": 0.49675125658943237, "grad_norm": 1.3913382192874446, "learning_rate": 5.2953851819103305e-06, "loss": 0.6089, "step": 16208 }, { "epoch": 0.49678190511217357, "grad_norm": 1.1745730799648064, "learning_rate": 5.2948897294729995e-06, "loss": 0.6625, "step": 16209 }, { "epoch": 0.4968125536349148, "grad_norm": 1.428114448620153, "learning_rate": 5.294394274130044e-06, "loss": 0.6241, "step": 16210 }, { "epoch": 0.496843202157656, "grad_norm": 1.2991643572618021, "learning_rate": 5.293898815886347e-06, "loss": 0.6691, "step": 16211 }, { "epoch": 0.4968738506803972, "grad_norm": 1.3664666712674347, "learning_rate": 5.293403354746789e-06, "loss": 0.7103, "step": 16212 }, { "epoch": 0.4969044992031384, "grad_norm": 1.4933297951553288, "learning_rate": 5.292907890716252e-06, "loss": 0.6937, "step": 16213 }, { "epoch": 0.4969351477258796, "grad_norm": 1.3250323813740963, "learning_rate": 5.292412423799619e-06, "loss": 0.7358, "step": 16214 }, { "epoch": 0.4969657962486208, "grad_norm": 0.4839183054057031, "learning_rate": 5.291916954001773e-06, "loss": 0.4208, "step": 16215 }, { "epoch": 0.496996444771362, "grad_norm": 1.2751947955591325, "learning_rate": 5.2914214813275935e-06, "loss": 0.7328, "step": 16216 }, { "epoch": 0.4970270932941032, "grad_norm": 0.49372770939124583, "learning_rate": 5.290926005781964e-06, "loss": 0.3903, "step": 16217 }, { "epoch": 0.4970577418168444, "grad_norm": 1.4242424010338923, "learning_rate": 5.290430527369764e-06, "loss": 0.603, "step": 16218 }, { "epoch": 0.49708839033958563, "grad_norm": 1.2383678952872597, "learning_rate": 5.28993504609588e-06, "loss": 0.6994, "step": 16219 }, { "epoch": 0.49711903886232683, "grad_norm": 1.2857760317440132, "learning_rate": 5.289439561965192e-06, "loss": 0.6628, "step": 16220 }, { "epoch": 0.49714968738506804, "grad_norm": 1.2317609707162278, "learning_rate": 5.28894407498258e-06, "loss": 0.647, "step": 16221 }, { "epoch": 0.49718033590780925, "grad_norm": 1.1653292374678057, "learning_rate": 5.28844858515293e-06, "loss": 0.6327, "step": 16222 }, { "epoch": 0.49721098443055045, "grad_norm": 0.46499018235534384, "learning_rate": 5.287953092481122e-06, "loss": 0.408, "step": 16223 }, { "epoch": 0.49724163295329166, "grad_norm": 1.2693331650401476, "learning_rate": 5.287457596972039e-06, "loss": 0.6318, "step": 16224 }, { "epoch": 0.49727228147603286, "grad_norm": 1.333562813639525, "learning_rate": 5.28696209863056e-06, "loss": 0.6129, "step": 16225 }, { "epoch": 0.49730292999877407, "grad_norm": 1.3274316142866693, "learning_rate": 5.286466597461574e-06, "loss": 0.6657, "step": 16226 }, { "epoch": 0.4973335785215153, "grad_norm": 1.35988391778033, "learning_rate": 5.285971093469956e-06, "loss": 0.6243, "step": 16227 }, { "epoch": 0.4973642270442565, "grad_norm": 0.4524715900347935, "learning_rate": 5.285475586660593e-06, "loss": 0.391, "step": 16228 }, { "epoch": 0.4973948755669977, "grad_norm": 1.3486720527612837, "learning_rate": 5.284980077038365e-06, "loss": 0.5507, "step": 16229 }, { "epoch": 0.4974255240897389, "grad_norm": 1.2888906431977825, "learning_rate": 5.284484564608158e-06, "loss": 0.6065, "step": 16230 }, { "epoch": 0.4974561726124801, "grad_norm": 1.2721560977980715, "learning_rate": 5.2839890493748495e-06, "loss": 0.684, "step": 16231 }, { "epoch": 0.4974868211352213, "grad_norm": 1.2343915384290536, "learning_rate": 5.283493531343324e-06, "loss": 0.5489, "step": 16232 }, { "epoch": 0.4975174696579625, "grad_norm": 1.2221216244903834, "learning_rate": 5.282998010518465e-06, "loss": 0.6188, "step": 16233 }, { "epoch": 0.4975481181807037, "grad_norm": 1.2612492347709485, "learning_rate": 5.282502486905154e-06, "loss": 0.6716, "step": 16234 }, { "epoch": 0.4975787667034449, "grad_norm": 1.3416590833825068, "learning_rate": 5.282006960508275e-06, "loss": 0.7569, "step": 16235 }, { "epoch": 0.4976094152261861, "grad_norm": 0.4720137842377252, "learning_rate": 5.281511431332707e-06, "loss": 0.4025, "step": 16236 }, { "epoch": 0.4976400637489273, "grad_norm": 1.414179385952334, "learning_rate": 5.281015899383336e-06, "loss": 0.733, "step": 16237 }, { "epoch": 0.4976707122716685, "grad_norm": 1.1557774037552884, "learning_rate": 5.280520364665044e-06, "loss": 0.6607, "step": 16238 }, { "epoch": 0.4977013607944097, "grad_norm": 1.27251880699591, "learning_rate": 5.2800248271827124e-06, "loss": 0.6054, "step": 16239 }, { "epoch": 0.4977320093171509, "grad_norm": 1.2634080365473888, "learning_rate": 5.279529286941224e-06, "loss": 0.6611, "step": 16240 }, { "epoch": 0.4977626578398921, "grad_norm": 1.2429920626201922, "learning_rate": 5.279033743945463e-06, "loss": 0.6119, "step": 16241 }, { "epoch": 0.4977933063626333, "grad_norm": 1.3048976338825256, "learning_rate": 5.27853819820031e-06, "loss": 0.6298, "step": 16242 }, { "epoch": 0.4978239548853745, "grad_norm": 1.2938532812764563, "learning_rate": 5.278042649710651e-06, "loss": 0.6097, "step": 16243 }, { "epoch": 0.4978546034081157, "grad_norm": 1.216471959286279, "learning_rate": 5.277547098481364e-06, "loss": 0.6322, "step": 16244 }, { "epoch": 0.4978852519308569, "grad_norm": 1.2106499743551005, "learning_rate": 5.277051544517337e-06, "loss": 0.583, "step": 16245 }, { "epoch": 0.4979159004535981, "grad_norm": 1.3401555357576187, "learning_rate": 5.276555987823448e-06, "loss": 0.6921, "step": 16246 }, { "epoch": 0.49794654897633933, "grad_norm": 1.3310303799177519, "learning_rate": 5.276060428404582e-06, "loss": 0.6576, "step": 16247 }, { "epoch": 0.49797719749908054, "grad_norm": 1.1745385298672926, "learning_rate": 5.275564866265624e-06, "loss": 0.6917, "step": 16248 }, { "epoch": 0.49800784602182174, "grad_norm": 1.2091965539451304, "learning_rate": 5.275069301411454e-06, "loss": 0.6361, "step": 16249 }, { "epoch": 0.49803849454456295, "grad_norm": 1.285968942539638, "learning_rate": 5.274573733846956e-06, "loss": 0.612, "step": 16250 }, { "epoch": 0.49806914306730415, "grad_norm": 0.4578654495244242, "learning_rate": 5.274078163577011e-06, "loss": 0.4001, "step": 16251 }, { "epoch": 0.49809979159004536, "grad_norm": 1.1491626548391927, "learning_rate": 5.2735825906065065e-06, "loss": 0.6167, "step": 16252 }, { "epoch": 0.49813044011278657, "grad_norm": 1.2849331982198804, "learning_rate": 5.273087014940321e-06, "loss": 0.6766, "step": 16253 }, { "epoch": 0.49816108863552777, "grad_norm": 0.452222947409017, "learning_rate": 5.27259143658334e-06, "loss": 0.4096, "step": 16254 }, { "epoch": 0.498191737158269, "grad_norm": 1.1476898763225092, "learning_rate": 5.272095855540444e-06, "loss": 0.652, "step": 16255 }, { "epoch": 0.4982223856810102, "grad_norm": 1.312966325516951, "learning_rate": 5.271600271816521e-06, "loss": 0.5989, "step": 16256 }, { "epoch": 0.4982530342037514, "grad_norm": 0.4398704328869395, "learning_rate": 5.271104685416449e-06, "loss": 0.3997, "step": 16257 }, { "epoch": 0.4982836827264926, "grad_norm": 1.3836486089697797, "learning_rate": 5.270609096345114e-06, "loss": 0.6681, "step": 16258 }, { "epoch": 0.4983143312492338, "grad_norm": 1.3283845348447954, "learning_rate": 5.270113504607397e-06, "loss": 0.6757, "step": 16259 }, { "epoch": 0.498344979771975, "grad_norm": 1.2571816363283999, "learning_rate": 5.269617910208183e-06, "loss": 0.6106, "step": 16260 }, { "epoch": 0.4983756282947162, "grad_norm": 1.2496626291288813, "learning_rate": 5.269122313152356e-06, "loss": 0.6707, "step": 16261 }, { "epoch": 0.4984062768174574, "grad_norm": 1.7089564702444706, "learning_rate": 5.268626713444797e-06, "loss": 0.6312, "step": 16262 }, { "epoch": 0.4984369253401986, "grad_norm": 1.257796962021739, "learning_rate": 5.26813111109039e-06, "loss": 0.5874, "step": 16263 }, { "epoch": 0.49846757386293983, "grad_norm": 0.43995526088059256, "learning_rate": 5.267635506094019e-06, "loss": 0.4015, "step": 16264 }, { "epoch": 0.49849822238568103, "grad_norm": 0.4997316368030012, "learning_rate": 5.267139898460568e-06, "loss": 0.4068, "step": 16265 }, { "epoch": 0.49852887090842224, "grad_norm": 1.4400053414143883, "learning_rate": 5.266644288194918e-06, "loss": 0.743, "step": 16266 }, { "epoch": 0.49855951943116344, "grad_norm": 1.2495832884228015, "learning_rate": 5.266148675301953e-06, "loss": 0.6445, "step": 16267 }, { "epoch": 0.4985901679539046, "grad_norm": 0.4393315756042696, "learning_rate": 5.265653059786558e-06, "loss": 0.4012, "step": 16268 }, { "epoch": 0.4986208164766458, "grad_norm": 1.2347611240358642, "learning_rate": 5.265157441653616e-06, "loss": 0.7242, "step": 16269 }, { "epoch": 0.498651464999387, "grad_norm": 1.1449967024975665, "learning_rate": 5.264661820908008e-06, "loss": 0.6286, "step": 16270 }, { "epoch": 0.4986821135221282, "grad_norm": 1.3385268810278026, "learning_rate": 5.264166197554621e-06, "loss": 0.6482, "step": 16271 }, { "epoch": 0.4987127620448694, "grad_norm": 1.4059665895572673, "learning_rate": 5.263670571598335e-06, "loss": 0.6175, "step": 16272 }, { "epoch": 0.4987434105676106, "grad_norm": 1.2890303766507674, "learning_rate": 5.263174943044037e-06, "loss": 0.7053, "step": 16273 }, { "epoch": 0.49877405909035183, "grad_norm": 1.1714549779602448, "learning_rate": 5.262679311896609e-06, "loss": 0.5668, "step": 16274 }, { "epoch": 0.49880470761309303, "grad_norm": 1.2563071915479476, "learning_rate": 5.262183678160935e-06, "loss": 0.6902, "step": 16275 }, { "epoch": 0.49883535613583424, "grad_norm": 1.2678994834751574, "learning_rate": 5.261688041841897e-06, "loss": 0.6065, "step": 16276 }, { "epoch": 0.49886600465857545, "grad_norm": 0.4761802118632617, "learning_rate": 5.26119240294438e-06, "loss": 0.4488, "step": 16277 }, { "epoch": 0.49889665318131665, "grad_norm": 1.3163259665419755, "learning_rate": 5.260696761473268e-06, "loss": 0.6254, "step": 16278 }, { "epoch": 0.49892730170405786, "grad_norm": 1.2171936535520589, "learning_rate": 5.260201117433441e-06, "loss": 0.5397, "step": 16279 }, { "epoch": 0.49895795022679906, "grad_norm": 1.1178340677886063, "learning_rate": 5.25970547082979e-06, "loss": 0.6143, "step": 16280 }, { "epoch": 0.49898859874954027, "grad_norm": 1.363536503901948, "learning_rate": 5.259209821667193e-06, "loss": 0.5844, "step": 16281 }, { "epoch": 0.4990192472722815, "grad_norm": 1.3600346605497011, "learning_rate": 5.2587141699505355e-06, "loss": 0.7456, "step": 16282 }, { "epoch": 0.4990498957950227, "grad_norm": 1.2772188754324425, "learning_rate": 5.2582185156847e-06, "loss": 0.5725, "step": 16283 }, { "epoch": 0.4990805443177639, "grad_norm": 1.3027255776266566, "learning_rate": 5.2577228588745736e-06, "loss": 0.6247, "step": 16284 }, { "epoch": 0.4991111928405051, "grad_norm": 1.3382102755708205, "learning_rate": 5.257227199525035e-06, "loss": 0.6912, "step": 16285 }, { "epoch": 0.4991418413632463, "grad_norm": 1.182507832555225, "learning_rate": 5.256731537640973e-06, "loss": 0.6561, "step": 16286 }, { "epoch": 0.4991724898859875, "grad_norm": 1.1223385663181715, "learning_rate": 5.256235873227268e-06, "loss": 0.6074, "step": 16287 }, { "epoch": 0.4992031384087287, "grad_norm": 1.2656744123091173, "learning_rate": 5.255740206288808e-06, "loss": 0.7003, "step": 16288 }, { "epoch": 0.4992337869314699, "grad_norm": 1.412860895143926, "learning_rate": 5.255244536830472e-06, "loss": 0.6503, "step": 16289 }, { "epoch": 0.4992644354542111, "grad_norm": 1.30167831125442, "learning_rate": 5.254748864857147e-06, "loss": 0.6185, "step": 16290 }, { "epoch": 0.4992950839769523, "grad_norm": 1.3367351843502684, "learning_rate": 5.254253190373716e-06, "loss": 0.6158, "step": 16291 }, { "epoch": 0.49932573249969353, "grad_norm": 1.319092419118883, "learning_rate": 5.253757513385064e-06, "loss": 0.6724, "step": 16292 }, { "epoch": 0.49935638102243474, "grad_norm": 1.3306123654040478, "learning_rate": 5.253261833896074e-06, "loss": 0.6523, "step": 16293 }, { "epoch": 0.49938702954517594, "grad_norm": 0.4843209998833403, "learning_rate": 5.252766151911629e-06, "loss": 0.4217, "step": 16294 }, { "epoch": 0.49941767806791715, "grad_norm": 1.316933901293172, "learning_rate": 5.252270467436615e-06, "loss": 0.7081, "step": 16295 }, { "epoch": 0.49944832659065835, "grad_norm": 1.142073580775706, "learning_rate": 5.251774780475916e-06, "loss": 0.5943, "step": 16296 }, { "epoch": 0.49947897511339956, "grad_norm": 1.3452353623678335, "learning_rate": 5.251279091034417e-06, "loss": 0.6678, "step": 16297 }, { "epoch": 0.49950962363614076, "grad_norm": 1.2350534098213595, "learning_rate": 5.250783399116998e-06, "loss": 0.5652, "step": 16298 }, { "epoch": 0.4995402721588819, "grad_norm": 1.108326871739366, "learning_rate": 5.25028770472855e-06, "loss": 0.6043, "step": 16299 }, { "epoch": 0.4995709206816231, "grad_norm": 1.2413085375492148, "learning_rate": 5.24979200787395e-06, "loss": 0.6093, "step": 16300 }, { "epoch": 0.4996015692043643, "grad_norm": 1.2444661008002504, "learning_rate": 5.249296308558086e-06, "loss": 0.6291, "step": 16301 }, { "epoch": 0.49963221772710553, "grad_norm": 1.2591002728249125, "learning_rate": 5.248800606785842e-06, "loss": 0.6433, "step": 16302 }, { "epoch": 0.49966286624984674, "grad_norm": 1.2647144496504998, "learning_rate": 5.2483049025621025e-06, "loss": 0.6429, "step": 16303 }, { "epoch": 0.49969351477258794, "grad_norm": 1.1943105177177922, "learning_rate": 5.24780919589175e-06, "loss": 0.587, "step": 16304 }, { "epoch": 0.49972416329532915, "grad_norm": 1.3399853056076216, "learning_rate": 5.247313486779671e-06, "loss": 0.56, "step": 16305 }, { "epoch": 0.49975481181807035, "grad_norm": 1.3582875694784429, "learning_rate": 5.246817775230748e-06, "loss": 0.6147, "step": 16306 }, { "epoch": 0.49978546034081156, "grad_norm": 1.2902850442326201, "learning_rate": 5.2463220612498675e-06, "loss": 0.6585, "step": 16307 }, { "epoch": 0.49981610886355277, "grad_norm": 1.148317056820015, "learning_rate": 5.245826344841912e-06, "loss": 0.609, "step": 16308 }, { "epoch": 0.49984675738629397, "grad_norm": 1.4564298002767884, "learning_rate": 5.2453306260117665e-06, "loss": 0.7052, "step": 16309 }, { "epoch": 0.4998774059090352, "grad_norm": 1.3000785868285487, "learning_rate": 5.2448349047643165e-06, "loss": 0.5938, "step": 16310 }, { "epoch": 0.4999080544317764, "grad_norm": 1.3458912968953478, "learning_rate": 5.244339181104446e-06, "loss": 0.6495, "step": 16311 }, { "epoch": 0.4999387029545176, "grad_norm": 1.1603490867371649, "learning_rate": 5.243843455037038e-06, "loss": 0.5065, "step": 16312 }, { "epoch": 0.4999693514772588, "grad_norm": 1.2133303959065378, "learning_rate": 5.243347726566977e-06, "loss": 0.6751, "step": 16313 }, { "epoch": 0.5, "grad_norm": 1.6804120688594222, "learning_rate": 5.242851995699149e-06, "loss": 0.6966, "step": 16314 }, { "epoch": 0.5000306485227412, "grad_norm": 1.0750259263823145, "learning_rate": 5.2423562624384394e-06, "loss": 0.5408, "step": 16315 }, { "epoch": 0.5000612970454824, "grad_norm": 0.4565353543671099, "learning_rate": 5.24186052678973e-06, "loss": 0.3967, "step": 16316 }, { "epoch": 0.5000919455682236, "grad_norm": 0.45569702498227826, "learning_rate": 5.241364788757907e-06, "loss": 0.4232, "step": 16317 }, { "epoch": 0.5001225940909648, "grad_norm": 1.2491413851450515, "learning_rate": 5.240869048347857e-06, "loss": 0.8172, "step": 16318 }, { "epoch": 0.500153242613706, "grad_norm": 0.46893955396244147, "learning_rate": 5.240373305564463e-06, "loss": 0.436, "step": 16319 }, { "epoch": 0.5001838911364472, "grad_norm": 0.4845747884884303, "learning_rate": 5.239877560412606e-06, "loss": 0.4358, "step": 16320 }, { "epoch": 0.5002145396591884, "grad_norm": 1.2824730135957134, "learning_rate": 5.239381812897176e-06, "loss": 0.6434, "step": 16321 }, { "epoch": 0.5002451881819296, "grad_norm": 1.3775126843549814, "learning_rate": 5.238886063023055e-06, "loss": 0.6182, "step": 16322 }, { "epoch": 0.5002758367046708, "grad_norm": 0.4693256036040255, "learning_rate": 5.2383903107951305e-06, "loss": 0.4263, "step": 16323 }, { "epoch": 0.5003064852274121, "grad_norm": 1.3371448321392532, "learning_rate": 5.2378945562182825e-06, "loss": 0.6628, "step": 16324 }, { "epoch": 0.5003371337501532, "grad_norm": 1.3358700547401339, "learning_rate": 5.2373987992974005e-06, "loss": 0.6152, "step": 16325 }, { "epoch": 0.5003677822728945, "grad_norm": 0.4796676247629007, "learning_rate": 5.236903040037366e-06, "loss": 0.4189, "step": 16326 }, { "epoch": 0.5003984307956356, "grad_norm": 0.42229464975457864, "learning_rate": 5.236407278443068e-06, "loss": 0.4087, "step": 16327 }, { "epoch": 0.5004290793183769, "grad_norm": 1.5143584335420701, "learning_rate": 5.235911514519385e-06, "loss": 0.604, "step": 16328 }, { "epoch": 0.500459727841118, "grad_norm": 1.4258856481020592, "learning_rate": 5.235415748271208e-06, "loss": 0.6624, "step": 16329 }, { "epoch": 0.5004903763638593, "grad_norm": 1.2387170137113748, "learning_rate": 5.234919979703419e-06, "loss": 0.6565, "step": 16330 }, { "epoch": 0.5005210248866004, "grad_norm": 1.2714681100185194, "learning_rate": 5.234424208820902e-06, "loss": 0.6757, "step": 16331 }, { "epoch": 0.5005516734093417, "grad_norm": 1.298128553396898, "learning_rate": 5.233928435628543e-06, "loss": 0.5547, "step": 16332 }, { "epoch": 0.5005823219320829, "grad_norm": 1.176618779034955, "learning_rate": 5.233432660131228e-06, "loss": 0.6985, "step": 16333 }, { "epoch": 0.5006129704548241, "grad_norm": 0.48077252765852024, "learning_rate": 5.232936882333844e-06, "loss": 0.4305, "step": 16334 }, { "epoch": 0.5006436189775653, "grad_norm": 1.4508030659878628, "learning_rate": 5.232441102241269e-06, "loss": 0.6596, "step": 16335 }, { "epoch": 0.5006742675003065, "grad_norm": 1.505532715135321, "learning_rate": 5.231945319858395e-06, "loss": 0.6919, "step": 16336 }, { "epoch": 0.5007049160230477, "grad_norm": 1.1537489470470006, "learning_rate": 5.231449535190103e-06, "loss": 0.5183, "step": 16337 }, { "epoch": 0.5007355645457889, "grad_norm": 1.1268771114624243, "learning_rate": 5.230953748241282e-06, "loss": 0.6159, "step": 16338 }, { "epoch": 0.5007662130685301, "grad_norm": 1.5857480150843017, "learning_rate": 5.230457959016812e-06, "loss": 0.7163, "step": 16339 }, { "epoch": 0.5007968615912713, "grad_norm": 1.2007670075169063, "learning_rate": 5.229962167521582e-06, "loss": 0.6499, "step": 16340 }, { "epoch": 0.5008275101140125, "grad_norm": 0.46420623394252203, "learning_rate": 5.229466373760474e-06, "loss": 0.4151, "step": 16341 }, { "epoch": 0.5008581586367538, "grad_norm": 1.2737035165485593, "learning_rate": 5.228970577738377e-06, "loss": 0.6588, "step": 16342 }, { "epoch": 0.5008888071594949, "grad_norm": 1.3642293259479465, "learning_rate": 5.2284747794601745e-06, "loss": 0.6151, "step": 16343 }, { "epoch": 0.5009194556822362, "grad_norm": 1.4458492124441984, "learning_rate": 5.2279789789307515e-06, "loss": 0.5996, "step": 16344 }, { "epoch": 0.5009501042049773, "grad_norm": 0.4707656358588906, "learning_rate": 5.227483176154991e-06, "loss": 0.4222, "step": 16345 }, { "epoch": 0.5009807527277185, "grad_norm": 1.257176547600419, "learning_rate": 5.226987371137784e-06, "loss": 0.5973, "step": 16346 }, { "epoch": 0.5010114012504597, "grad_norm": 1.9443084331904197, "learning_rate": 5.226491563884011e-06, "loss": 0.5182, "step": 16347 }, { "epoch": 0.5010420497732009, "grad_norm": 1.412861968665947, "learning_rate": 5.225995754398557e-06, "loss": 0.6632, "step": 16348 }, { "epoch": 0.5010726982959421, "grad_norm": 1.4149838375745039, "learning_rate": 5.22549994268631e-06, "loss": 0.8154, "step": 16349 }, { "epoch": 0.5011033468186833, "grad_norm": 0.4539069282549431, "learning_rate": 5.225004128752156e-06, "loss": 0.3961, "step": 16350 }, { "epoch": 0.5011339953414246, "grad_norm": 1.2489070130506301, "learning_rate": 5.224508312600978e-06, "loss": 0.5706, "step": 16351 }, { "epoch": 0.5011646438641657, "grad_norm": 1.2722187818033739, "learning_rate": 5.224012494237661e-06, "loss": 0.6353, "step": 16352 }, { "epoch": 0.501195292386907, "grad_norm": 1.2895535440384165, "learning_rate": 5.2235166736670925e-06, "loss": 0.6537, "step": 16353 }, { "epoch": 0.5012259409096481, "grad_norm": 1.3098214711553537, "learning_rate": 5.2230208508941575e-06, "loss": 0.6282, "step": 16354 }, { "epoch": 0.5012565894323894, "grad_norm": 1.4081008482507247, "learning_rate": 5.22252502592374e-06, "loss": 0.6367, "step": 16355 }, { "epoch": 0.5012872379551305, "grad_norm": 1.0146489955347764, "learning_rate": 5.222029198760725e-06, "loss": 0.6358, "step": 16356 }, { "epoch": 0.5013178864778718, "grad_norm": 1.2476846113949636, "learning_rate": 5.221533369410002e-06, "loss": 0.6191, "step": 16357 }, { "epoch": 0.5013485350006129, "grad_norm": 1.263967175536018, "learning_rate": 5.221037537876454e-06, "loss": 0.6581, "step": 16358 }, { "epoch": 0.5013791835233542, "grad_norm": 1.1750055849129704, "learning_rate": 5.2205417041649655e-06, "loss": 0.655, "step": 16359 }, { "epoch": 0.5014098320460953, "grad_norm": 1.3450988805471964, "learning_rate": 5.220045868280424e-06, "loss": 0.6769, "step": 16360 }, { "epoch": 0.5014404805688366, "grad_norm": 1.4101907046759277, "learning_rate": 5.219550030227714e-06, "loss": 0.6692, "step": 16361 }, { "epoch": 0.5014711290915778, "grad_norm": 0.48163702244401696, "learning_rate": 5.219054190011721e-06, "loss": 0.4153, "step": 16362 }, { "epoch": 0.501501777614319, "grad_norm": 1.1946722952260762, "learning_rate": 5.2185583476373306e-06, "loss": 0.5881, "step": 16363 }, { "epoch": 0.5015324261370602, "grad_norm": 1.2180423926026662, "learning_rate": 5.218062503109429e-06, "loss": 0.675, "step": 16364 }, { "epoch": 0.5015630746598014, "grad_norm": 1.3738167391535057, "learning_rate": 5.217566656432903e-06, "loss": 0.6761, "step": 16365 }, { "epoch": 0.5015937231825426, "grad_norm": 1.2323992448132584, "learning_rate": 5.217070807612636e-06, "loss": 0.5778, "step": 16366 }, { "epoch": 0.5016243717052838, "grad_norm": 1.3349508918131503, "learning_rate": 5.216574956653515e-06, "loss": 0.6183, "step": 16367 }, { "epoch": 0.501655020228025, "grad_norm": 1.2921883052397054, "learning_rate": 5.216079103560425e-06, "loss": 0.6071, "step": 16368 }, { "epoch": 0.5016856687507663, "grad_norm": 1.1997334108869953, "learning_rate": 5.215583248338254e-06, "loss": 0.6119, "step": 16369 }, { "epoch": 0.5017163172735074, "grad_norm": 1.6065754948999837, "learning_rate": 5.215087390991885e-06, "loss": 0.7405, "step": 16370 }, { "epoch": 0.5017469657962487, "grad_norm": 1.2913675313891282, "learning_rate": 5.214591531526204e-06, "loss": 0.6413, "step": 16371 }, { "epoch": 0.5017776143189898, "grad_norm": 1.232655140811936, "learning_rate": 5.2140956699460986e-06, "loss": 0.6386, "step": 16372 }, { "epoch": 0.5018082628417311, "grad_norm": 1.2859046815539263, "learning_rate": 5.213599806256455e-06, "loss": 0.6427, "step": 16373 }, { "epoch": 0.5018389113644722, "grad_norm": 1.4709853087301556, "learning_rate": 5.213103940462155e-06, "loss": 0.7075, "step": 16374 }, { "epoch": 0.5018695598872135, "grad_norm": 1.148777814759927, "learning_rate": 5.212608072568089e-06, "loss": 0.6572, "step": 16375 }, { "epoch": 0.5019002084099546, "grad_norm": 0.459944760189189, "learning_rate": 5.2121122025791415e-06, "loss": 0.4085, "step": 16376 }, { "epoch": 0.5019308569326958, "grad_norm": 1.4741502965747497, "learning_rate": 5.2116163305002e-06, "loss": 0.5774, "step": 16377 }, { "epoch": 0.501961505455437, "grad_norm": 1.2624398160908723, "learning_rate": 5.211120456336145e-06, "loss": 0.6738, "step": 16378 }, { "epoch": 0.5019921539781782, "grad_norm": 1.3509968264693173, "learning_rate": 5.210624580091869e-06, "loss": 0.6259, "step": 16379 }, { "epoch": 0.5020228025009195, "grad_norm": 1.4493954485118543, "learning_rate": 5.210128701772254e-06, "loss": 0.6047, "step": 16380 }, { "epoch": 0.5020534510236606, "grad_norm": 0.42817324478062563, "learning_rate": 5.209632821382187e-06, "loss": 0.4094, "step": 16381 }, { "epoch": 0.5020840995464019, "grad_norm": 1.197931191026926, "learning_rate": 5.209136938926553e-06, "loss": 0.7136, "step": 16382 }, { "epoch": 0.502114748069143, "grad_norm": 1.324438642080808, "learning_rate": 5.2086410544102405e-06, "loss": 0.6492, "step": 16383 }, { "epoch": 0.5021453965918843, "grad_norm": 1.1970397805164177, "learning_rate": 5.208145167838134e-06, "loss": 0.6425, "step": 16384 }, { "epoch": 0.5021760451146254, "grad_norm": 1.2897547805387821, "learning_rate": 5.20764927921512e-06, "loss": 0.6256, "step": 16385 }, { "epoch": 0.5022066936373667, "grad_norm": 0.46046627641049803, "learning_rate": 5.207153388546085e-06, "loss": 0.4261, "step": 16386 }, { "epoch": 0.5022373421601078, "grad_norm": 1.2530424450238762, "learning_rate": 5.206657495835914e-06, "loss": 0.6197, "step": 16387 }, { "epoch": 0.5022679906828491, "grad_norm": 0.46821108174369713, "learning_rate": 5.206161601089495e-06, "loss": 0.424, "step": 16388 }, { "epoch": 0.5022986392055903, "grad_norm": 1.1774981411391654, "learning_rate": 5.2056657043117124e-06, "loss": 0.6329, "step": 16389 }, { "epoch": 0.5023292877283315, "grad_norm": 1.3025192036385471, "learning_rate": 5.2051698055074526e-06, "loss": 0.6611, "step": 16390 }, { "epoch": 0.5023599362510727, "grad_norm": 1.1709126449070004, "learning_rate": 5.204673904681601e-06, "loss": 0.5908, "step": 16391 }, { "epoch": 0.5023905847738139, "grad_norm": 1.2828033653529678, "learning_rate": 5.204178001839049e-06, "loss": 0.669, "step": 16392 }, { "epoch": 0.5024212332965551, "grad_norm": 1.2808392994552362, "learning_rate": 5.203682096984674e-06, "loss": 0.648, "step": 16393 }, { "epoch": 0.5024518818192963, "grad_norm": 1.2437982659740336, "learning_rate": 5.203186190123371e-06, "loss": 0.5756, "step": 16394 }, { "epoch": 0.5024825303420375, "grad_norm": 1.3231110019367138, "learning_rate": 5.20269028126002e-06, "loss": 0.7647, "step": 16395 }, { "epoch": 0.5025131788647788, "grad_norm": 0.4637321054507274, "learning_rate": 5.202194370399511e-06, "loss": 0.3968, "step": 16396 }, { "epoch": 0.5025438273875199, "grad_norm": 1.2411813552228192, "learning_rate": 5.201698457546729e-06, "loss": 0.7499, "step": 16397 }, { "epoch": 0.5025744759102612, "grad_norm": 1.2674002577737091, "learning_rate": 5.2012025427065606e-06, "loss": 0.7174, "step": 16398 }, { "epoch": 0.5026051244330023, "grad_norm": 0.46669671621445036, "learning_rate": 5.200706625883891e-06, "loss": 0.4153, "step": 16399 }, { "epoch": 0.5026357729557436, "grad_norm": 0.44610869683211335, "learning_rate": 5.2002107070836095e-06, "loss": 0.4028, "step": 16400 }, { "epoch": 0.5026664214784847, "grad_norm": 1.452591036084373, "learning_rate": 5.199714786310599e-06, "loss": 0.6412, "step": 16401 }, { "epoch": 0.502697070001226, "grad_norm": 0.47203786215064203, "learning_rate": 5.199218863569748e-06, "loss": 0.3947, "step": 16402 }, { "epoch": 0.5027277185239671, "grad_norm": 1.2525752228042664, "learning_rate": 5.198722938865944e-06, "loss": 0.6409, "step": 16403 }, { "epoch": 0.5027583670467084, "grad_norm": 1.1972813055283993, "learning_rate": 5.19822701220407e-06, "loss": 0.6571, "step": 16404 }, { "epoch": 0.5027890155694495, "grad_norm": 1.4523545937942703, "learning_rate": 5.197731083589016e-06, "loss": 0.7038, "step": 16405 }, { "epoch": 0.5028196640921908, "grad_norm": 1.1914479678499477, "learning_rate": 5.197235153025666e-06, "loss": 0.6195, "step": 16406 }, { "epoch": 0.502850312614932, "grad_norm": 1.196222422709875, "learning_rate": 5.1967392205189094e-06, "loss": 0.6556, "step": 16407 }, { "epoch": 0.5028809611376731, "grad_norm": 1.1524249212491968, "learning_rate": 5.196243286073629e-06, "loss": 0.6849, "step": 16408 }, { "epoch": 0.5029116096604144, "grad_norm": 1.1784339694079644, "learning_rate": 5.195747349694714e-06, "loss": 0.6021, "step": 16409 }, { "epoch": 0.5029422581831555, "grad_norm": 1.4147616454707896, "learning_rate": 5.195251411387049e-06, "loss": 0.6756, "step": 16410 }, { "epoch": 0.5029729067058968, "grad_norm": 1.2648138341046198, "learning_rate": 5.1947554711555235e-06, "loss": 0.6722, "step": 16411 }, { "epoch": 0.5030035552286379, "grad_norm": 1.20611524285682, "learning_rate": 5.1942595290050225e-06, "loss": 0.6432, "step": 16412 }, { "epoch": 0.5030342037513792, "grad_norm": 0.5582218456004877, "learning_rate": 5.193763584940431e-06, "loss": 0.4097, "step": 16413 }, { "epoch": 0.5030648522741203, "grad_norm": 1.4803013734976058, "learning_rate": 5.1932676389666395e-06, "loss": 0.64, "step": 16414 }, { "epoch": 0.5030955007968616, "grad_norm": 1.3511449377226894, "learning_rate": 5.1927716910885314e-06, "loss": 0.7242, "step": 16415 }, { "epoch": 0.5031261493196028, "grad_norm": 0.4718236971904893, "learning_rate": 5.192275741310995e-06, "loss": 0.404, "step": 16416 }, { "epoch": 0.503156797842344, "grad_norm": 1.4790967742305825, "learning_rate": 5.1917797896389155e-06, "loss": 0.6557, "step": 16417 }, { "epoch": 0.5031874463650852, "grad_norm": 1.2107831784108485, "learning_rate": 5.191283836077181e-06, "loss": 0.6106, "step": 16418 }, { "epoch": 0.5032180948878264, "grad_norm": 0.4405365955421112, "learning_rate": 5.190787880630679e-06, "loss": 0.4163, "step": 16419 }, { "epoch": 0.5032487434105676, "grad_norm": 0.4489003319075521, "learning_rate": 5.190291923304295e-06, "loss": 0.4303, "step": 16420 }, { "epoch": 0.5032793919333088, "grad_norm": 0.46264941030229256, "learning_rate": 5.189795964102915e-06, "loss": 0.4395, "step": 16421 }, { "epoch": 0.50331004045605, "grad_norm": 1.2137832084097582, "learning_rate": 5.189300003031426e-06, "loss": 0.599, "step": 16422 }, { "epoch": 0.5033406889787913, "grad_norm": 1.4342826750121895, "learning_rate": 5.188804040094718e-06, "loss": 0.7967, "step": 16423 }, { "epoch": 0.5033713375015324, "grad_norm": 0.44417943671361965, "learning_rate": 5.188308075297674e-06, "loss": 0.4217, "step": 16424 }, { "epoch": 0.5034019860242737, "grad_norm": 1.2393731472617169, "learning_rate": 5.1878121086451824e-06, "loss": 0.7305, "step": 16425 }, { "epoch": 0.5034326345470148, "grad_norm": 1.1302601393861473, "learning_rate": 5.187316140142131e-06, "loss": 0.6836, "step": 16426 }, { "epoch": 0.5034632830697561, "grad_norm": 1.2509715069134886, "learning_rate": 5.1868201697934054e-06, "loss": 0.5996, "step": 16427 }, { "epoch": 0.5034939315924972, "grad_norm": 0.4624831422872901, "learning_rate": 5.1863241976038915e-06, "loss": 0.4078, "step": 16428 }, { "epoch": 0.5035245801152385, "grad_norm": 1.131509876160133, "learning_rate": 5.185828223578479e-06, "loss": 0.6185, "step": 16429 }, { "epoch": 0.5035552286379796, "grad_norm": 0.44226751286751753, "learning_rate": 5.185332247722053e-06, "loss": 0.404, "step": 16430 }, { "epoch": 0.5035858771607209, "grad_norm": 1.1688185721462951, "learning_rate": 5.184836270039503e-06, "loss": 0.5786, "step": 16431 }, { "epoch": 0.503616525683462, "grad_norm": 1.1461630555236961, "learning_rate": 5.184340290535711e-06, "loss": 0.6233, "step": 16432 }, { "epoch": 0.5036471742062033, "grad_norm": 1.2217264009881859, "learning_rate": 5.183844309215567e-06, "loss": 0.6882, "step": 16433 }, { "epoch": 0.5036778227289445, "grad_norm": 0.46469133210113145, "learning_rate": 5.18334832608396e-06, "loss": 0.4473, "step": 16434 }, { "epoch": 0.5037084712516857, "grad_norm": 1.3259672166572654, "learning_rate": 5.182852341145774e-06, "loss": 0.613, "step": 16435 }, { "epoch": 0.5037391197744269, "grad_norm": 1.5001819658168867, "learning_rate": 5.182356354405896e-06, "loss": 0.6247, "step": 16436 }, { "epoch": 0.5037697682971681, "grad_norm": 1.2456287886540616, "learning_rate": 5.1818603658692155e-06, "loss": 0.5477, "step": 16437 }, { "epoch": 0.5038004168199093, "grad_norm": 1.1784697598562732, "learning_rate": 5.18136437554062e-06, "loss": 0.6359, "step": 16438 }, { "epoch": 0.5038310653426504, "grad_norm": 1.2430988787344233, "learning_rate": 5.18086838342499e-06, "loss": 0.6289, "step": 16439 }, { "epoch": 0.5038617138653917, "grad_norm": 1.3595475492524376, "learning_rate": 5.180372389527221e-06, "loss": 0.6469, "step": 16440 }, { "epoch": 0.5038923623881328, "grad_norm": 1.1068387080738593, "learning_rate": 5.179876393852198e-06, "loss": 0.5834, "step": 16441 }, { "epoch": 0.5039230109108741, "grad_norm": 1.2618709032912971, "learning_rate": 5.179380396404805e-06, "loss": 0.6423, "step": 16442 }, { "epoch": 0.5039536594336153, "grad_norm": 1.3360355276545597, "learning_rate": 5.178884397189931e-06, "loss": 0.701, "step": 16443 }, { "epoch": 0.5039843079563565, "grad_norm": 1.4569082145244183, "learning_rate": 5.178388396212462e-06, "loss": 0.7152, "step": 16444 }, { "epoch": 0.5040149564790977, "grad_norm": 1.255881443716551, "learning_rate": 5.1778923934772885e-06, "loss": 0.6496, "step": 16445 }, { "epoch": 0.5040456050018389, "grad_norm": 1.438096744321343, "learning_rate": 5.177396388989296e-06, "loss": 0.685, "step": 16446 }, { "epoch": 0.5040762535245801, "grad_norm": 1.5451615648397423, "learning_rate": 5.176900382753369e-06, "loss": 0.6879, "step": 16447 }, { "epoch": 0.5041069020473213, "grad_norm": 0.4615068850763838, "learning_rate": 5.1764043747744e-06, "loss": 0.3992, "step": 16448 }, { "epoch": 0.5041375505700625, "grad_norm": 1.1954557645087254, "learning_rate": 5.175908365057272e-06, "loss": 0.6131, "step": 16449 }, { "epoch": 0.5041681990928037, "grad_norm": 2.5938273297856993, "learning_rate": 5.175412353606876e-06, "loss": 0.6621, "step": 16450 }, { "epoch": 0.5041988476155449, "grad_norm": 1.467812804246358, "learning_rate": 5.1749163404280945e-06, "loss": 0.671, "step": 16451 }, { "epoch": 0.5042294961382862, "grad_norm": 1.3153457045566028, "learning_rate": 5.1744203255258185e-06, "loss": 0.6024, "step": 16452 }, { "epoch": 0.5042601446610273, "grad_norm": 1.299537298732558, "learning_rate": 5.173924308904934e-06, "loss": 0.5776, "step": 16453 }, { "epoch": 0.5042907931837686, "grad_norm": 0.4305049554526781, "learning_rate": 5.1734282905703295e-06, "loss": 0.3887, "step": 16454 }, { "epoch": 0.5043214417065097, "grad_norm": 1.3338296343256666, "learning_rate": 5.172932270526891e-06, "loss": 0.6234, "step": 16455 }, { "epoch": 0.504352090229251, "grad_norm": 1.2971133245437312, "learning_rate": 5.172436248779507e-06, "loss": 0.7487, "step": 16456 }, { "epoch": 0.5043827387519921, "grad_norm": 1.3186165978291435, "learning_rate": 5.171940225333065e-06, "loss": 0.6231, "step": 16457 }, { "epoch": 0.5044133872747334, "grad_norm": 1.3556524986442235, "learning_rate": 5.171444200192451e-06, "loss": 0.6956, "step": 16458 }, { "epoch": 0.5044440357974745, "grad_norm": 1.3169135114085606, "learning_rate": 5.170948173362555e-06, "loss": 0.5457, "step": 16459 }, { "epoch": 0.5044746843202158, "grad_norm": 1.3622388827012346, "learning_rate": 5.17045214484826e-06, "loss": 0.609, "step": 16460 }, { "epoch": 0.504505332842957, "grad_norm": 1.2834617407611486, "learning_rate": 5.1699561146544595e-06, "loss": 0.607, "step": 16461 }, { "epoch": 0.5045359813656982, "grad_norm": 1.2504114976642502, "learning_rate": 5.1694600827860365e-06, "loss": 0.6597, "step": 16462 }, { "epoch": 0.5045666298884394, "grad_norm": 1.1199290112409053, "learning_rate": 5.16896404924788e-06, "loss": 0.5628, "step": 16463 }, { "epoch": 0.5045972784111806, "grad_norm": 1.3816531186172767, "learning_rate": 5.1684680140448775e-06, "loss": 0.6046, "step": 16464 }, { "epoch": 0.5046279269339218, "grad_norm": 1.4204827616491953, "learning_rate": 5.167971977181916e-06, "loss": 0.6431, "step": 16465 }, { "epoch": 0.504658575456663, "grad_norm": 1.1830024899377574, "learning_rate": 5.167475938663885e-06, "loss": 0.6193, "step": 16466 }, { "epoch": 0.5046892239794042, "grad_norm": 1.2222135968159253, "learning_rate": 5.16697989849567e-06, "loss": 0.5882, "step": 16467 }, { "epoch": 0.5047198725021455, "grad_norm": 1.2691021959215474, "learning_rate": 5.166483856682158e-06, "loss": 0.6443, "step": 16468 }, { "epoch": 0.5047505210248866, "grad_norm": 0.5312634792829792, "learning_rate": 5.1659878132282406e-06, "loss": 0.4284, "step": 16469 }, { "epoch": 0.5047811695476278, "grad_norm": 1.3538714529963793, "learning_rate": 5.165491768138801e-06, "loss": 0.6437, "step": 16470 }, { "epoch": 0.504811818070369, "grad_norm": 1.377658281821687, "learning_rate": 5.164995721418729e-06, "loss": 0.5941, "step": 16471 }, { "epoch": 0.5048424665931102, "grad_norm": 1.3123497387303875, "learning_rate": 5.164499673072913e-06, "loss": 0.6732, "step": 16472 }, { "epoch": 0.5048731151158514, "grad_norm": 1.4754901899298698, "learning_rate": 5.164003623106238e-06, "loss": 0.6723, "step": 16473 }, { "epoch": 0.5049037636385926, "grad_norm": 10.645472814783174, "learning_rate": 5.163507571523595e-06, "loss": 0.6246, "step": 16474 }, { "epoch": 0.5049344121613338, "grad_norm": 1.4130527626512415, "learning_rate": 5.163011518329868e-06, "loss": 0.6499, "step": 16475 }, { "epoch": 0.504965060684075, "grad_norm": 1.2034420866302462, "learning_rate": 5.162515463529949e-06, "loss": 0.6702, "step": 16476 }, { "epoch": 0.5049957092068162, "grad_norm": 1.2074674910054393, "learning_rate": 5.162019407128722e-06, "loss": 0.6453, "step": 16477 }, { "epoch": 0.5050263577295574, "grad_norm": 1.201804030467858, "learning_rate": 5.161523349131078e-06, "loss": 0.676, "step": 16478 }, { "epoch": 0.5050570062522987, "grad_norm": 1.4078850611063976, "learning_rate": 5.1610272895419e-06, "loss": 0.6738, "step": 16479 }, { "epoch": 0.5050876547750398, "grad_norm": 1.239969014184185, "learning_rate": 5.160531228366081e-06, "loss": 0.6956, "step": 16480 }, { "epoch": 0.5051183032977811, "grad_norm": 1.1725254198677053, "learning_rate": 5.160035165608508e-06, "loss": 0.6067, "step": 16481 }, { "epoch": 0.5051489518205222, "grad_norm": 0.4715301799192444, "learning_rate": 5.159539101274065e-06, "loss": 0.4004, "step": 16482 }, { "epoch": 0.5051796003432635, "grad_norm": 1.1565405854351505, "learning_rate": 5.159043035367643e-06, "loss": 0.6127, "step": 16483 }, { "epoch": 0.5052102488660046, "grad_norm": 1.4905092997299498, "learning_rate": 5.158546967894131e-06, "loss": 0.6735, "step": 16484 }, { "epoch": 0.5052408973887459, "grad_norm": 1.428245619420614, "learning_rate": 5.158050898858415e-06, "loss": 0.6766, "step": 16485 }, { "epoch": 0.505271545911487, "grad_norm": 1.1102069425449037, "learning_rate": 5.157554828265381e-06, "loss": 0.6932, "step": 16486 }, { "epoch": 0.5053021944342283, "grad_norm": 1.2088756863812937, "learning_rate": 5.15705875611992e-06, "loss": 0.6537, "step": 16487 }, { "epoch": 0.5053328429569695, "grad_norm": 1.2527556785409493, "learning_rate": 5.156562682426919e-06, "loss": 0.5872, "step": 16488 }, { "epoch": 0.5053634914797107, "grad_norm": 0.4586370038162908, "learning_rate": 5.156066607191266e-06, "loss": 0.4212, "step": 16489 }, { "epoch": 0.5053941400024519, "grad_norm": 1.2334699810307939, "learning_rate": 5.155570530417848e-06, "loss": 0.6621, "step": 16490 }, { "epoch": 0.5054247885251931, "grad_norm": 1.2722682222875494, "learning_rate": 5.155074452111555e-06, "loss": 0.6568, "step": 16491 }, { "epoch": 0.5054554370479343, "grad_norm": 2.5117464264823703, "learning_rate": 5.1545783722772725e-06, "loss": 0.6275, "step": 16492 }, { "epoch": 0.5054860855706755, "grad_norm": 1.173964077427964, "learning_rate": 5.154082290919891e-06, "loss": 0.5528, "step": 16493 }, { "epoch": 0.5055167340934167, "grad_norm": 1.2015487609598463, "learning_rate": 5.153586208044296e-06, "loss": 0.6921, "step": 16494 }, { "epoch": 0.505547382616158, "grad_norm": 1.225190138266385, "learning_rate": 5.153090123655378e-06, "loss": 0.6884, "step": 16495 }, { "epoch": 0.5055780311388991, "grad_norm": 1.3639848629734075, "learning_rate": 5.152594037758023e-06, "loss": 0.6114, "step": 16496 }, { "epoch": 0.5056086796616404, "grad_norm": 1.3781052732320351, "learning_rate": 5.152097950357119e-06, "loss": 0.6081, "step": 16497 }, { "epoch": 0.5056393281843815, "grad_norm": 1.3598390608803461, "learning_rate": 5.151601861457557e-06, "loss": 0.6211, "step": 16498 }, { "epoch": 0.5056699767071228, "grad_norm": 1.3880237573106748, "learning_rate": 5.151105771064221e-06, "loss": 0.6606, "step": 16499 }, { "epoch": 0.5057006252298639, "grad_norm": 1.4737382704262005, "learning_rate": 5.150609679182004e-06, "loss": 0.6432, "step": 16500 }, { "epoch": 0.5057312737526051, "grad_norm": 1.35618182880159, "learning_rate": 5.150113585815788e-06, "loss": 0.672, "step": 16501 }, { "epoch": 0.5057619222753463, "grad_norm": 1.2886152947437364, "learning_rate": 5.149617490970466e-06, "loss": 0.6571, "step": 16502 }, { "epoch": 0.5057925707980875, "grad_norm": 0.49386284208331754, "learning_rate": 5.149121394650924e-06, "loss": 0.441, "step": 16503 }, { "epoch": 0.5058232193208287, "grad_norm": 0.4697195668473969, "learning_rate": 5.148625296862053e-06, "loss": 0.4211, "step": 16504 }, { "epoch": 0.5058538678435699, "grad_norm": 1.2663616680711824, "learning_rate": 5.148129197608737e-06, "loss": 0.6047, "step": 16505 }, { "epoch": 0.5058845163663112, "grad_norm": 1.405624158084176, "learning_rate": 5.147633096895866e-06, "loss": 0.6488, "step": 16506 }, { "epoch": 0.5059151648890523, "grad_norm": 1.4449368567193894, "learning_rate": 5.14713699472833e-06, "loss": 0.7375, "step": 16507 }, { "epoch": 0.5059458134117936, "grad_norm": 1.4560472166374696, "learning_rate": 5.146640891111013e-06, "loss": 0.7427, "step": 16508 }, { "epoch": 0.5059764619345347, "grad_norm": 0.44795489641785063, "learning_rate": 5.146144786048808e-06, "loss": 0.4334, "step": 16509 }, { "epoch": 0.506007110457276, "grad_norm": 1.4668800243920381, "learning_rate": 5.145648679546598e-06, "loss": 0.6161, "step": 16510 }, { "epoch": 0.5060377589800171, "grad_norm": 1.222226679720693, "learning_rate": 5.145152571609279e-06, "loss": 0.6169, "step": 16511 }, { "epoch": 0.5060684075027584, "grad_norm": 1.2083863544228899, "learning_rate": 5.14465646224173e-06, "loss": 0.6403, "step": 16512 }, { "epoch": 0.5060990560254995, "grad_norm": 1.2341393131616867, "learning_rate": 5.144160351448847e-06, "loss": 0.5705, "step": 16513 }, { "epoch": 0.5061297045482408, "grad_norm": 1.3854204520270033, "learning_rate": 5.143664239235513e-06, "loss": 0.7182, "step": 16514 }, { "epoch": 0.506160353070982, "grad_norm": 1.2318275912942025, "learning_rate": 5.143168125606621e-06, "loss": 0.6886, "step": 16515 }, { "epoch": 0.5061910015937232, "grad_norm": 1.132173601379188, "learning_rate": 5.1426720105670545e-06, "loss": 0.5595, "step": 16516 }, { "epoch": 0.5062216501164644, "grad_norm": 1.2716410365804702, "learning_rate": 5.142175894121706e-06, "loss": 0.6388, "step": 16517 }, { "epoch": 0.5062522986392056, "grad_norm": 1.280728656769705, "learning_rate": 5.14167977627546e-06, "loss": 0.6087, "step": 16518 }, { "epoch": 0.5062829471619468, "grad_norm": 1.2190083253662929, "learning_rate": 5.141183657033208e-06, "loss": 0.6464, "step": 16519 }, { "epoch": 0.506313595684688, "grad_norm": 1.2049107508984003, "learning_rate": 5.140687536399838e-06, "loss": 0.509, "step": 16520 }, { "epoch": 0.5063442442074292, "grad_norm": 1.228458263163571, "learning_rate": 5.140191414380236e-06, "loss": 0.6385, "step": 16521 }, { "epoch": 0.5063748927301704, "grad_norm": 1.2127747289458972, "learning_rate": 5.139695290979293e-06, "loss": 0.6413, "step": 16522 }, { "epoch": 0.5064055412529116, "grad_norm": 1.2831875688932008, "learning_rate": 5.139199166201897e-06, "loss": 0.679, "step": 16523 }, { "epoch": 0.5064361897756529, "grad_norm": 1.2227444821190638, "learning_rate": 5.138703040052936e-06, "loss": 0.6978, "step": 16524 }, { "epoch": 0.506466838298394, "grad_norm": 1.3801436371543279, "learning_rate": 5.138206912537297e-06, "loss": 0.6684, "step": 16525 }, { "epoch": 0.5064974868211353, "grad_norm": 1.5307442515673253, "learning_rate": 5.1377107836598715e-06, "loss": 0.6872, "step": 16526 }, { "epoch": 0.5065281353438764, "grad_norm": 1.1313381501830824, "learning_rate": 5.137214653425546e-06, "loss": 0.5883, "step": 16527 }, { "epoch": 0.5065587838666177, "grad_norm": 1.4427669523290771, "learning_rate": 5.13671852183921e-06, "loss": 0.7117, "step": 16528 }, { "epoch": 0.5065894323893588, "grad_norm": 1.1827682188609023, "learning_rate": 5.13622238890575e-06, "loss": 0.5966, "step": 16529 }, { "epoch": 0.5066200809121001, "grad_norm": 1.2549868994385307, "learning_rate": 5.1357262546300565e-06, "loss": 0.6256, "step": 16530 }, { "epoch": 0.5066507294348412, "grad_norm": 1.1177863417216882, "learning_rate": 5.135230119017019e-06, "loss": 0.5896, "step": 16531 }, { "epoch": 0.5066813779575824, "grad_norm": 1.1973958450925786, "learning_rate": 5.134733982071523e-06, "loss": 0.6495, "step": 16532 }, { "epoch": 0.5067120264803237, "grad_norm": 1.2504845352399725, "learning_rate": 5.134237843798457e-06, "loss": 0.6482, "step": 16533 }, { "epoch": 0.5067426750030648, "grad_norm": 1.3109214856896925, "learning_rate": 5.133741704202714e-06, "loss": 0.6049, "step": 16534 }, { "epoch": 0.5067733235258061, "grad_norm": 1.2991004710418121, "learning_rate": 5.13324556328918e-06, "loss": 0.7174, "step": 16535 }, { "epoch": 0.5068039720485472, "grad_norm": 1.293663070547537, "learning_rate": 5.13274942106274e-06, "loss": 0.6836, "step": 16536 }, { "epoch": 0.5068346205712885, "grad_norm": 1.3031347634474335, "learning_rate": 5.13225327752829e-06, "loss": 0.5312, "step": 16537 }, { "epoch": 0.5068652690940296, "grad_norm": 0.5546188788445078, "learning_rate": 5.131757132690713e-06, "loss": 0.4204, "step": 16538 }, { "epoch": 0.5068959176167709, "grad_norm": 1.262453415076598, "learning_rate": 5.131260986554899e-06, "loss": 0.6249, "step": 16539 }, { "epoch": 0.506926566139512, "grad_norm": 1.2474673889330017, "learning_rate": 5.130764839125736e-06, "loss": 0.6091, "step": 16540 }, { "epoch": 0.5069572146622533, "grad_norm": 0.46482212581425253, "learning_rate": 5.130268690408114e-06, "loss": 0.4265, "step": 16541 }, { "epoch": 0.5069878631849944, "grad_norm": 1.243062982261131, "learning_rate": 5.1297725404069234e-06, "loss": 0.6184, "step": 16542 }, { "epoch": 0.5070185117077357, "grad_norm": 1.2466235737555245, "learning_rate": 5.129276389127049e-06, "loss": 0.6178, "step": 16543 }, { "epoch": 0.5070491602304769, "grad_norm": 0.4700789756120135, "learning_rate": 5.128780236573381e-06, "loss": 0.4133, "step": 16544 }, { "epoch": 0.5070798087532181, "grad_norm": 1.2998482101140536, "learning_rate": 5.1282840827508085e-06, "loss": 0.5725, "step": 16545 }, { "epoch": 0.5071104572759593, "grad_norm": 0.5011956304187032, "learning_rate": 5.1277879276642206e-06, "loss": 0.4024, "step": 16546 }, { "epoch": 0.5071411057987005, "grad_norm": 1.4485970906470398, "learning_rate": 5.1272917713185055e-06, "loss": 0.7156, "step": 16547 }, { "epoch": 0.5071717543214417, "grad_norm": 1.0855499776668283, "learning_rate": 5.1267956137185514e-06, "loss": 0.6716, "step": 16548 }, { "epoch": 0.5072024028441829, "grad_norm": 0.476996840201467, "learning_rate": 5.126299454869248e-06, "loss": 0.4276, "step": 16549 }, { "epoch": 0.5072330513669241, "grad_norm": 1.3250006049342362, "learning_rate": 5.1258032947754845e-06, "loss": 0.6179, "step": 16550 }, { "epoch": 0.5072636998896654, "grad_norm": 1.3015967209045518, "learning_rate": 5.125307133442148e-06, "loss": 0.5409, "step": 16551 }, { "epoch": 0.5072943484124065, "grad_norm": 1.2240059770200944, "learning_rate": 5.124810970874129e-06, "loss": 0.6308, "step": 16552 }, { "epoch": 0.5073249969351478, "grad_norm": 1.2775755650322775, "learning_rate": 5.124314807076314e-06, "loss": 0.6115, "step": 16553 }, { "epoch": 0.5073556454578889, "grad_norm": 1.2908091019593857, "learning_rate": 5.1238186420535965e-06, "loss": 0.6801, "step": 16554 }, { "epoch": 0.5073862939806302, "grad_norm": 1.1817260644128693, "learning_rate": 5.123322475810859e-06, "loss": 0.6534, "step": 16555 }, { "epoch": 0.5074169425033713, "grad_norm": 1.420456076703619, "learning_rate": 5.122826308352995e-06, "loss": 0.695, "step": 16556 }, { "epoch": 0.5074475910261126, "grad_norm": 1.0951312474379802, "learning_rate": 5.122330139684892e-06, "loss": 0.6215, "step": 16557 }, { "epoch": 0.5074782395488537, "grad_norm": 0.4497275751714693, "learning_rate": 5.1218339698114395e-06, "loss": 0.4117, "step": 16558 }, { "epoch": 0.507508888071595, "grad_norm": 0.45834954972316694, "learning_rate": 5.121337798737523e-06, "loss": 0.4034, "step": 16559 }, { "epoch": 0.5075395365943361, "grad_norm": 1.1768929951014526, "learning_rate": 5.1208416264680376e-06, "loss": 0.5681, "step": 16560 }, { "epoch": 0.5075701851170774, "grad_norm": 1.26105248218208, "learning_rate": 5.120345453007867e-06, "loss": 0.6045, "step": 16561 }, { "epoch": 0.5076008336398186, "grad_norm": 1.1886969953622553, "learning_rate": 5.1198492783619015e-06, "loss": 0.6415, "step": 16562 }, { "epoch": 0.5076314821625597, "grad_norm": 0.4904341880048201, "learning_rate": 5.119353102535031e-06, "loss": 0.3981, "step": 16563 }, { "epoch": 0.507662130685301, "grad_norm": 1.3106781992960599, "learning_rate": 5.118856925532144e-06, "loss": 0.7204, "step": 16564 }, { "epoch": 0.5076927792080421, "grad_norm": 1.0980085061501672, "learning_rate": 5.11836074735813e-06, "loss": 0.6467, "step": 16565 }, { "epoch": 0.5077234277307834, "grad_norm": 1.3577326550778794, "learning_rate": 5.117864568017875e-06, "loss": 0.6722, "step": 16566 }, { "epoch": 0.5077540762535245, "grad_norm": 1.5697804692071022, "learning_rate": 5.117368387516272e-06, "loss": 0.6853, "step": 16567 }, { "epoch": 0.5077847247762658, "grad_norm": 1.1998910563118652, "learning_rate": 5.116872205858207e-06, "loss": 0.6153, "step": 16568 }, { "epoch": 0.507815373299007, "grad_norm": 1.1545629123580385, "learning_rate": 5.116376023048573e-06, "loss": 0.6741, "step": 16569 }, { "epoch": 0.5078460218217482, "grad_norm": 0.47477760869002456, "learning_rate": 5.115879839092253e-06, "loss": 0.4289, "step": 16570 }, { "epoch": 0.5078766703444894, "grad_norm": 1.2759567769342004, "learning_rate": 5.115383653994141e-06, "loss": 0.5304, "step": 16571 }, { "epoch": 0.5079073188672306, "grad_norm": 1.1407276817614562, "learning_rate": 5.114887467759123e-06, "loss": 0.5845, "step": 16572 }, { "epoch": 0.5079379673899718, "grad_norm": 1.1551581917990505, "learning_rate": 5.114391280392092e-06, "loss": 0.6764, "step": 16573 }, { "epoch": 0.507968615912713, "grad_norm": 0.4593240089003235, "learning_rate": 5.1138950918979315e-06, "loss": 0.4116, "step": 16574 }, { "epoch": 0.5079992644354542, "grad_norm": 0.4324359542702389, "learning_rate": 5.113398902281536e-06, "loss": 0.4089, "step": 16575 }, { "epoch": 0.5080299129581954, "grad_norm": 0.4477951239353209, "learning_rate": 5.112902711547789e-06, "loss": 0.417, "step": 16576 }, { "epoch": 0.5080605614809366, "grad_norm": 1.122715122924344, "learning_rate": 5.112406519701586e-06, "loss": 0.5833, "step": 16577 }, { "epoch": 0.5080912100036779, "grad_norm": 1.3559356114799552, "learning_rate": 5.111910326747811e-06, "loss": 0.7047, "step": 16578 }, { "epoch": 0.508121858526419, "grad_norm": 1.1690114192834336, "learning_rate": 5.111414132691355e-06, "loss": 0.5936, "step": 16579 }, { "epoch": 0.5081525070491603, "grad_norm": 1.17840315419219, "learning_rate": 5.110917937537108e-06, "loss": 0.618, "step": 16580 }, { "epoch": 0.5081831555719014, "grad_norm": 1.2011284120417527, "learning_rate": 5.110421741289957e-06, "loss": 0.7058, "step": 16581 }, { "epoch": 0.5082138040946427, "grad_norm": 1.2172399529882645, "learning_rate": 5.109925543954793e-06, "loss": 0.6214, "step": 16582 }, { "epoch": 0.5082444526173838, "grad_norm": 1.2859403928524873, "learning_rate": 5.109429345536504e-06, "loss": 0.6394, "step": 16583 }, { "epoch": 0.5082751011401251, "grad_norm": 1.2209594687039085, "learning_rate": 5.108933146039981e-06, "loss": 0.6677, "step": 16584 }, { "epoch": 0.5083057496628662, "grad_norm": 1.3479690402055085, "learning_rate": 5.108436945470111e-06, "loss": 0.7086, "step": 16585 }, { "epoch": 0.5083363981856075, "grad_norm": 1.2963056638857748, "learning_rate": 5.107940743831784e-06, "loss": 0.6857, "step": 16586 }, { "epoch": 0.5083670467083486, "grad_norm": 1.3753630944433155, "learning_rate": 5.107444541129889e-06, "loss": 0.7098, "step": 16587 }, { "epoch": 0.5083976952310899, "grad_norm": 0.49059707090053273, "learning_rate": 5.106948337369315e-06, "loss": 0.406, "step": 16588 }, { "epoch": 0.5084283437538311, "grad_norm": 1.2856305181498668, "learning_rate": 5.106452132554953e-06, "loss": 0.7154, "step": 16589 }, { "epoch": 0.5084589922765723, "grad_norm": 1.2515452674928718, "learning_rate": 5.10595592669169e-06, "loss": 0.6378, "step": 16590 }, { "epoch": 0.5084896407993135, "grad_norm": 1.344992951500686, "learning_rate": 5.105459719784416e-06, "loss": 0.608, "step": 16591 }, { "epoch": 0.5085202893220547, "grad_norm": 1.3447309896067836, "learning_rate": 5.104963511838021e-06, "loss": 0.6227, "step": 16592 }, { "epoch": 0.5085509378447959, "grad_norm": 1.42962918342897, "learning_rate": 5.104467302857393e-06, "loss": 0.668, "step": 16593 }, { "epoch": 0.508581586367537, "grad_norm": 1.181334369613172, "learning_rate": 5.103971092847422e-06, "loss": 0.7602, "step": 16594 }, { "epoch": 0.5086122348902783, "grad_norm": 1.5185384768042565, "learning_rate": 5.103474881812998e-06, "loss": 0.7186, "step": 16595 }, { "epoch": 0.5086428834130194, "grad_norm": 1.19784496622589, "learning_rate": 5.102978669759009e-06, "loss": 0.6129, "step": 16596 }, { "epoch": 0.5086735319357607, "grad_norm": 1.2960500280174942, "learning_rate": 5.102482456690345e-06, "loss": 0.6674, "step": 16597 }, { "epoch": 0.5087041804585019, "grad_norm": 1.147530021741446, "learning_rate": 5.101986242611895e-06, "loss": 0.6408, "step": 16598 }, { "epoch": 0.5087348289812431, "grad_norm": 1.23409292481666, "learning_rate": 5.101490027528548e-06, "loss": 0.6466, "step": 16599 }, { "epoch": 0.5087654775039843, "grad_norm": 1.3963590511725192, "learning_rate": 5.100993811445195e-06, "loss": 0.61, "step": 16600 }, { "epoch": 0.5087961260267255, "grad_norm": 1.3335808813130787, "learning_rate": 5.100497594366724e-06, "loss": 0.7759, "step": 16601 }, { "epoch": 0.5088267745494667, "grad_norm": 1.2936600655679933, "learning_rate": 5.100001376298023e-06, "loss": 0.6379, "step": 16602 }, { "epoch": 0.5088574230722079, "grad_norm": 0.47776343726356174, "learning_rate": 5.099505157243984e-06, "loss": 0.4045, "step": 16603 }, { "epoch": 0.5088880715949491, "grad_norm": 1.4404768426708077, "learning_rate": 5.099008937209495e-06, "loss": 0.652, "step": 16604 }, { "epoch": 0.5089187201176903, "grad_norm": 1.1945641996397751, "learning_rate": 5.098512716199445e-06, "loss": 0.6177, "step": 16605 }, { "epoch": 0.5089493686404315, "grad_norm": 0.46042121697767974, "learning_rate": 5.098016494218725e-06, "loss": 0.4172, "step": 16606 }, { "epoch": 0.5089800171631728, "grad_norm": 1.2027410897825053, "learning_rate": 5.097520271272223e-06, "loss": 0.6954, "step": 16607 }, { "epoch": 0.5090106656859139, "grad_norm": 1.3562819074844776, "learning_rate": 5.097024047364829e-06, "loss": 0.6663, "step": 16608 }, { "epoch": 0.5090413142086552, "grad_norm": 1.17749145081629, "learning_rate": 5.096527822501431e-06, "loss": 0.6683, "step": 16609 }, { "epoch": 0.5090719627313963, "grad_norm": 1.4169830970702455, "learning_rate": 5.0960315966869215e-06, "loss": 0.7204, "step": 16610 }, { "epoch": 0.5091026112541376, "grad_norm": 1.1772567446304294, "learning_rate": 5.095535369926188e-06, "loss": 0.6361, "step": 16611 }, { "epoch": 0.5091332597768787, "grad_norm": 0.4469644248213176, "learning_rate": 5.09503914222412e-06, "loss": 0.4062, "step": 16612 }, { "epoch": 0.50916390829962, "grad_norm": 1.0948236150572535, "learning_rate": 5.094542913585605e-06, "loss": 0.5935, "step": 16613 }, { "epoch": 0.5091945568223611, "grad_norm": 1.40660939550203, "learning_rate": 5.094046684015536e-06, "loss": 0.6805, "step": 16614 }, { "epoch": 0.5092252053451024, "grad_norm": 1.531910287424808, "learning_rate": 5.0935504535188005e-06, "loss": 0.6631, "step": 16615 }, { "epoch": 0.5092558538678436, "grad_norm": 1.18923748315763, "learning_rate": 5.09305422210029e-06, "loss": 0.6692, "step": 16616 }, { "epoch": 0.5092865023905848, "grad_norm": 1.204431302531003, "learning_rate": 5.09255798976489e-06, "loss": 0.644, "step": 16617 }, { "epoch": 0.509317150913326, "grad_norm": 1.1036031099902275, "learning_rate": 5.092061756517494e-06, "loss": 0.6549, "step": 16618 }, { "epoch": 0.5093477994360672, "grad_norm": 1.2674686749215516, "learning_rate": 5.09156552236299e-06, "loss": 0.6475, "step": 16619 }, { "epoch": 0.5093784479588084, "grad_norm": 1.2974317135142808, "learning_rate": 5.091069287306266e-06, "loss": 0.6911, "step": 16620 }, { "epoch": 0.5094090964815496, "grad_norm": 1.392946392154442, "learning_rate": 5.090573051352215e-06, "loss": 0.6896, "step": 16621 }, { "epoch": 0.5094397450042908, "grad_norm": 1.2675172940545987, "learning_rate": 5.0900768145057224e-06, "loss": 0.6367, "step": 16622 }, { "epoch": 0.509470393527032, "grad_norm": 0.4579756606038044, "learning_rate": 5.089580576771682e-06, "loss": 0.4242, "step": 16623 }, { "epoch": 0.5095010420497732, "grad_norm": 1.4395614303331532, "learning_rate": 5.089084338154981e-06, "loss": 0.6651, "step": 16624 }, { "epoch": 0.5095316905725144, "grad_norm": 1.3705937608958039, "learning_rate": 5.088588098660508e-06, "loss": 0.7141, "step": 16625 }, { "epoch": 0.5095623390952556, "grad_norm": 1.2572087024240501, "learning_rate": 5.088091858293153e-06, "loss": 0.6364, "step": 16626 }, { "epoch": 0.5095929876179968, "grad_norm": 1.38610081502105, "learning_rate": 5.08759561705781e-06, "loss": 0.6226, "step": 16627 }, { "epoch": 0.509623636140738, "grad_norm": 0.46590868634931704, "learning_rate": 5.087099374959362e-06, "loss": 0.413, "step": 16628 }, { "epoch": 0.5096542846634792, "grad_norm": 0.46845998583651327, "learning_rate": 5.086603132002702e-06, "loss": 0.414, "step": 16629 }, { "epoch": 0.5096849331862204, "grad_norm": 0.4724826217391211, "learning_rate": 5.0861068881927185e-06, "loss": 0.4093, "step": 16630 }, { "epoch": 0.5097155817089616, "grad_norm": 1.2815782456251608, "learning_rate": 5.085610643534305e-06, "loss": 0.597, "step": 16631 }, { "epoch": 0.5097462302317028, "grad_norm": 1.3196959339972127, "learning_rate": 5.0851143980323445e-06, "loss": 0.6107, "step": 16632 }, { "epoch": 0.509776878754444, "grad_norm": 1.4764929401762508, "learning_rate": 5.084618151691733e-06, "loss": 0.6546, "step": 16633 }, { "epoch": 0.5098075272771853, "grad_norm": 1.2399224361206767, "learning_rate": 5.084121904517354e-06, "loss": 0.6745, "step": 16634 }, { "epoch": 0.5098381757999264, "grad_norm": 1.2768084597770075, "learning_rate": 5.083625656514101e-06, "loss": 0.5973, "step": 16635 }, { "epoch": 0.5098688243226677, "grad_norm": 0.4282346030833315, "learning_rate": 5.083129407686865e-06, "loss": 0.4113, "step": 16636 }, { "epoch": 0.5098994728454088, "grad_norm": 1.1981279239714544, "learning_rate": 5.082633158040532e-06, "loss": 0.5952, "step": 16637 }, { "epoch": 0.5099301213681501, "grad_norm": 1.3980490841828097, "learning_rate": 5.082136907579995e-06, "loss": 0.7388, "step": 16638 }, { "epoch": 0.5099607698908912, "grad_norm": 1.2181012153952822, "learning_rate": 5.081640656310141e-06, "loss": 0.6785, "step": 16639 }, { "epoch": 0.5099914184136325, "grad_norm": 1.5431410803724055, "learning_rate": 5.081144404235861e-06, "loss": 0.603, "step": 16640 }, { "epoch": 0.5100220669363736, "grad_norm": 1.1953381980672786, "learning_rate": 5.080648151362043e-06, "loss": 0.6257, "step": 16641 }, { "epoch": 0.5100527154591149, "grad_norm": 1.1509819701102093, "learning_rate": 5.080151897693581e-06, "loss": 0.5852, "step": 16642 }, { "epoch": 0.510083363981856, "grad_norm": 0.48924292358560456, "learning_rate": 5.079655643235358e-06, "loss": 0.4157, "step": 16643 }, { "epoch": 0.5101140125045973, "grad_norm": 1.468308852321121, "learning_rate": 5.079159387992271e-06, "loss": 0.6806, "step": 16644 }, { "epoch": 0.5101446610273385, "grad_norm": 1.1620024915389966, "learning_rate": 5.0786631319692034e-06, "loss": 0.586, "step": 16645 }, { "epoch": 0.5101753095500797, "grad_norm": 1.1878450944322194, "learning_rate": 5.07816687517105e-06, "loss": 0.6795, "step": 16646 }, { "epoch": 0.5102059580728209, "grad_norm": 1.2403487261585298, "learning_rate": 5.077670617602698e-06, "loss": 0.6376, "step": 16647 }, { "epoch": 0.5102366065955621, "grad_norm": 1.2987979344885603, "learning_rate": 5.0771743592690356e-06, "loss": 0.6813, "step": 16648 }, { "epoch": 0.5102672551183033, "grad_norm": 1.3216250268385625, "learning_rate": 5.076678100174958e-06, "loss": 0.6852, "step": 16649 }, { "epoch": 0.5102979036410445, "grad_norm": 0.4425613771368102, "learning_rate": 5.0761818403253496e-06, "loss": 0.4078, "step": 16650 }, { "epoch": 0.5103285521637857, "grad_norm": 1.2355154264027646, "learning_rate": 5.0756855797251015e-06, "loss": 0.6066, "step": 16651 }, { "epoch": 0.510359200686527, "grad_norm": 1.2997608296027332, "learning_rate": 5.0751893183791046e-06, "loss": 0.7087, "step": 16652 }, { "epoch": 0.5103898492092681, "grad_norm": 1.242085067901712, "learning_rate": 5.074693056292248e-06, "loss": 0.6194, "step": 16653 }, { "epoch": 0.5104204977320094, "grad_norm": 1.3959873469248831, "learning_rate": 5.0741967934694224e-06, "loss": 0.705, "step": 16654 }, { "epoch": 0.5104511462547505, "grad_norm": 0.4456479820030556, "learning_rate": 5.073700529915516e-06, "loss": 0.3972, "step": 16655 }, { "epoch": 0.5104817947774917, "grad_norm": 1.2851022153528227, "learning_rate": 5.073204265635418e-06, "loss": 0.6019, "step": 16656 }, { "epoch": 0.5105124433002329, "grad_norm": 1.3370085015524575, "learning_rate": 5.072708000634023e-06, "loss": 0.6217, "step": 16657 }, { "epoch": 0.5105430918229741, "grad_norm": 1.3452380650198525, "learning_rate": 5.072211734916215e-06, "loss": 0.8175, "step": 16658 }, { "epoch": 0.5105737403457153, "grad_norm": 1.3212360581706764, "learning_rate": 5.071715468486887e-06, "loss": 0.5773, "step": 16659 }, { "epoch": 0.5106043888684565, "grad_norm": 1.3932077601367043, "learning_rate": 5.071219201350928e-06, "loss": 0.6864, "step": 16660 }, { "epoch": 0.5106350373911978, "grad_norm": 1.3154013207852682, "learning_rate": 5.070722933513228e-06, "loss": 0.6802, "step": 16661 }, { "epoch": 0.5106656859139389, "grad_norm": 1.3672918574980273, "learning_rate": 5.070226664978677e-06, "loss": 0.6788, "step": 16662 }, { "epoch": 0.5106963344366802, "grad_norm": 1.407698263582817, "learning_rate": 5.069730395752164e-06, "loss": 0.7026, "step": 16663 }, { "epoch": 0.5107269829594213, "grad_norm": 1.4038505842780198, "learning_rate": 5.06923412583858e-06, "loss": 0.5673, "step": 16664 }, { "epoch": 0.5107576314821626, "grad_norm": 1.3985231830184932, "learning_rate": 5.068737855242816e-06, "loss": 0.7647, "step": 16665 }, { "epoch": 0.5107882800049037, "grad_norm": 1.2197825148991825, "learning_rate": 5.0682415839697585e-06, "loss": 0.6131, "step": 16666 }, { "epoch": 0.510818928527645, "grad_norm": 1.226575211445134, "learning_rate": 5.0677453120242995e-06, "loss": 0.6909, "step": 16667 }, { "epoch": 0.5108495770503861, "grad_norm": 0.49561807976269573, "learning_rate": 5.067249039411329e-06, "loss": 0.404, "step": 16668 }, { "epoch": 0.5108802255731274, "grad_norm": 1.1300248416327692, "learning_rate": 5.0667527661357365e-06, "loss": 0.6001, "step": 16669 }, { "epoch": 0.5109108740958686, "grad_norm": 1.2956877637573085, "learning_rate": 5.0662564922024115e-06, "loss": 0.6917, "step": 16670 }, { "epoch": 0.5109415226186098, "grad_norm": 0.488004078027959, "learning_rate": 5.065760217616243e-06, "loss": 0.4314, "step": 16671 }, { "epoch": 0.510972171141351, "grad_norm": 1.2509640306885794, "learning_rate": 5.065263942382125e-06, "loss": 0.538, "step": 16672 }, { "epoch": 0.5110028196640922, "grad_norm": 1.198280734701915, "learning_rate": 5.064767666504944e-06, "loss": 0.6571, "step": 16673 }, { "epoch": 0.5110334681868334, "grad_norm": 1.1465021340452355, "learning_rate": 5.064271389989589e-06, "loss": 0.6645, "step": 16674 }, { "epoch": 0.5110641167095746, "grad_norm": 1.4058456357688265, "learning_rate": 5.063775112840953e-06, "loss": 0.716, "step": 16675 }, { "epoch": 0.5110947652323158, "grad_norm": 0.44574977751625194, "learning_rate": 5.063278835063923e-06, "loss": 0.4287, "step": 16676 }, { "epoch": 0.511125413755057, "grad_norm": 1.390699524884588, "learning_rate": 5.062782556663393e-06, "loss": 0.671, "step": 16677 }, { "epoch": 0.5111560622777982, "grad_norm": 1.697914954132034, "learning_rate": 5.062286277644248e-06, "loss": 0.6617, "step": 16678 }, { "epoch": 0.5111867108005395, "grad_norm": 1.1962538996837537, "learning_rate": 5.0617899980113815e-06, "loss": 0.7113, "step": 16679 }, { "epoch": 0.5112173593232806, "grad_norm": 1.216105543137198, "learning_rate": 5.061293717769682e-06, "loss": 0.5656, "step": 16680 }, { "epoch": 0.5112480078460219, "grad_norm": 1.2788848992648243, "learning_rate": 5.060797436924041e-06, "loss": 0.6736, "step": 16681 }, { "epoch": 0.511278656368763, "grad_norm": 1.2180701924550277, "learning_rate": 5.060301155479346e-06, "loss": 0.6396, "step": 16682 }, { "epoch": 0.5113093048915043, "grad_norm": 0.46547776955943415, "learning_rate": 5.059804873440488e-06, "loss": 0.4081, "step": 16683 }, { "epoch": 0.5113399534142454, "grad_norm": 0.4785321810209654, "learning_rate": 5.059308590812357e-06, "loss": 0.4308, "step": 16684 }, { "epoch": 0.5113706019369867, "grad_norm": 1.228362103909252, "learning_rate": 5.058812307599846e-06, "loss": 0.6913, "step": 16685 }, { "epoch": 0.5114012504597278, "grad_norm": 1.2027584265454438, "learning_rate": 5.05831602380784e-06, "loss": 0.7209, "step": 16686 }, { "epoch": 0.511431898982469, "grad_norm": 1.523558396090875, "learning_rate": 5.057819739441231e-06, "loss": 0.5973, "step": 16687 }, { "epoch": 0.5114625475052103, "grad_norm": 1.2579214758657582, "learning_rate": 5.057323454504911e-06, "loss": 0.5768, "step": 16688 }, { "epoch": 0.5114931960279514, "grad_norm": 0.4768477600428436, "learning_rate": 5.056827169003766e-06, "loss": 0.4017, "step": 16689 }, { "epoch": 0.5115238445506927, "grad_norm": 0.43915880582453815, "learning_rate": 5.05633088294269e-06, "loss": 0.3851, "step": 16690 }, { "epoch": 0.5115544930734338, "grad_norm": 1.278468832606294, "learning_rate": 5.055834596326571e-06, "loss": 0.6186, "step": 16691 }, { "epoch": 0.5115851415961751, "grad_norm": 1.2473599273402902, "learning_rate": 5.055338309160301e-06, "loss": 0.626, "step": 16692 }, { "epoch": 0.5116157901189162, "grad_norm": 1.0962545139903077, "learning_rate": 5.054842021448766e-06, "loss": 0.6082, "step": 16693 }, { "epoch": 0.5116464386416575, "grad_norm": 1.0824215726440103, "learning_rate": 5.05434573319686e-06, "loss": 0.5828, "step": 16694 }, { "epoch": 0.5116770871643986, "grad_norm": 1.3033581155334923, "learning_rate": 5.05384944440947e-06, "loss": 0.6386, "step": 16695 }, { "epoch": 0.5117077356871399, "grad_norm": 0.4368462103499998, "learning_rate": 5.053353155091491e-06, "loss": 0.3994, "step": 16696 }, { "epoch": 0.511738384209881, "grad_norm": 1.2204096329311984, "learning_rate": 5.052856865247806e-06, "loss": 0.6113, "step": 16697 }, { "epoch": 0.5117690327326223, "grad_norm": 1.3794069190999767, "learning_rate": 5.05236057488331e-06, "loss": 0.5943, "step": 16698 }, { "epoch": 0.5117996812553635, "grad_norm": 1.242372048205169, "learning_rate": 5.051864284002892e-06, "loss": 0.6509, "step": 16699 }, { "epoch": 0.5118303297781047, "grad_norm": 1.3575042084457114, "learning_rate": 5.051367992611442e-06, "loss": 0.689, "step": 16700 }, { "epoch": 0.5118609783008459, "grad_norm": 1.361637672575538, "learning_rate": 5.050871700713851e-06, "loss": 0.6514, "step": 16701 }, { "epoch": 0.5118916268235871, "grad_norm": 1.2888892297524277, "learning_rate": 5.050375408315006e-06, "loss": 0.6528, "step": 16702 }, { "epoch": 0.5119222753463283, "grad_norm": 1.3198460892487014, "learning_rate": 5.049879115419801e-06, "loss": 0.6199, "step": 16703 }, { "epoch": 0.5119529238690695, "grad_norm": 1.2142601778361535, "learning_rate": 5.049382822033124e-06, "loss": 0.6407, "step": 16704 }, { "epoch": 0.5119835723918107, "grad_norm": 1.2813874951913946, "learning_rate": 5.048886528159865e-06, "loss": 0.6454, "step": 16705 }, { "epoch": 0.512014220914552, "grad_norm": 1.3551151623021778, "learning_rate": 5.048390233804914e-06, "loss": 0.6173, "step": 16706 }, { "epoch": 0.5120448694372931, "grad_norm": 1.3675426482938517, "learning_rate": 5.047893938973163e-06, "loss": 0.6897, "step": 16707 }, { "epoch": 0.5120755179600344, "grad_norm": 1.3494606023053228, "learning_rate": 5.0473976436695e-06, "loss": 0.6912, "step": 16708 }, { "epoch": 0.5121061664827755, "grad_norm": 0.5023965832594773, "learning_rate": 5.046901347898816e-06, "loss": 0.414, "step": 16709 }, { "epoch": 0.5121368150055168, "grad_norm": 1.3207485692675514, "learning_rate": 5.046405051666001e-06, "loss": 0.6896, "step": 16710 }, { "epoch": 0.5121674635282579, "grad_norm": 1.3065095120026893, "learning_rate": 5.045908754975946e-06, "loss": 0.5743, "step": 16711 }, { "epoch": 0.5121981120509992, "grad_norm": 1.4706726542107593, "learning_rate": 5.0454124578335395e-06, "loss": 0.6655, "step": 16712 }, { "epoch": 0.5122287605737403, "grad_norm": 1.5070405816860877, "learning_rate": 5.044916160243673e-06, "loss": 0.6747, "step": 16713 }, { "epoch": 0.5122594090964816, "grad_norm": 1.3825312008506987, "learning_rate": 5.044419862211234e-06, "loss": 0.7233, "step": 16714 }, { "epoch": 0.5122900576192227, "grad_norm": 1.0534633323351708, "learning_rate": 5.043923563741117e-06, "loss": 0.7159, "step": 16715 }, { "epoch": 0.512320706141964, "grad_norm": 1.3272772645790045, "learning_rate": 5.043427264838209e-06, "loss": 0.6716, "step": 16716 }, { "epoch": 0.5123513546647052, "grad_norm": 1.1049949369256262, "learning_rate": 5.0429309655074e-06, "loss": 0.5784, "step": 16717 }, { "epoch": 0.5123820031874463, "grad_norm": 1.3300142371792851, "learning_rate": 5.042434665753582e-06, "loss": 0.7253, "step": 16718 }, { "epoch": 0.5124126517101876, "grad_norm": 1.1792160911488823, "learning_rate": 5.041938365581646e-06, "loss": 0.5609, "step": 16719 }, { "epoch": 0.5124433002329287, "grad_norm": 0.9873434859170812, "learning_rate": 5.041442064996479e-06, "loss": 0.5753, "step": 16720 }, { "epoch": 0.51247394875567, "grad_norm": 1.294407775006182, "learning_rate": 5.040945764002972e-06, "loss": 0.6729, "step": 16721 }, { "epoch": 0.5125045972784111, "grad_norm": 1.2328722135499972, "learning_rate": 5.0404494626060175e-06, "loss": 0.6274, "step": 16722 }, { "epoch": 0.5125352458011524, "grad_norm": 1.2971970289430084, "learning_rate": 5.039953160810504e-06, "loss": 0.6462, "step": 16723 }, { "epoch": 0.5125658943238935, "grad_norm": 1.2387970122455, "learning_rate": 5.039456858621322e-06, "loss": 0.6353, "step": 16724 }, { "epoch": 0.5125965428466348, "grad_norm": 1.2943080638688809, "learning_rate": 5.03896055604336e-06, "loss": 0.6038, "step": 16725 }, { "epoch": 0.512627191369376, "grad_norm": 1.4558622999411825, "learning_rate": 5.038464253081511e-06, "loss": 0.6986, "step": 16726 }, { "epoch": 0.5126578398921172, "grad_norm": 1.210465582570233, "learning_rate": 5.037967949740663e-06, "loss": 0.6007, "step": 16727 }, { "epoch": 0.5126884884148584, "grad_norm": 1.1820220379246427, "learning_rate": 5.037471646025707e-06, "loss": 0.5544, "step": 16728 }, { "epoch": 0.5127191369375996, "grad_norm": 0.4820845264355772, "learning_rate": 5.0369753419415335e-06, "loss": 0.3958, "step": 16729 }, { "epoch": 0.5127497854603408, "grad_norm": 1.3251230063271868, "learning_rate": 5.036479037493034e-06, "loss": 0.59, "step": 16730 }, { "epoch": 0.512780433983082, "grad_norm": 0.46892085894903657, "learning_rate": 5.035982732685095e-06, "loss": 0.4093, "step": 16731 }, { "epoch": 0.5128110825058232, "grad_norm": 1.378610109894953, "learning_rate": 5.03548642752261e-06, "loss": 0.6356, "step": 16732 }, { "epoch": 0.5128417310285645, "grad_norm": 1.314217270316051, "learning_rate": 5.034990122010468e-06, "loss": 0.5903, "step": 16733 }, { "epoch": 0.5128723795513056, "grad_norm": 1.1462328567266749, "learning_rate": 5.034493816153558e-06, "loss": 0.6198, "step": 16734 }, { "epoch": 0.5129030280740469, "grad_norm": 0.4460072586013435, "learning_rate": 5.033997509956775e-06, "loss": 0.4053, "step": 16735 }, { "epoch": 0.512933676596788, "grad_norm": 1.464212625569288, "learning_rate": 5.0335012034250034e-06, "loss": 0.6627, "step": 16736 }, { "epoch": 0.5129643251195293, "grad_norm": 1.2922244685495698, "learning_rate": 5.033004896563136e-06, "loss": 0.6996, "step": 16737 }, { "epoch": 0.5129949736422704, "grad_norm": 1.3229276776670365, "learning_rate": 5.0325085893760625e-06, "loss": 0.7359, "step": 16738 }, { "epoch": 0.5130256221650117, "grad_norm": 0.4607597002945122, "learning_rate": 5.032012281868676e-06, "loss": 0.4202, "step": 16739 }, { "epoch": 0.5130562706877528, "grad_norm": 1.3685719876974216, "learning_rate": 5.031515974045861e-06, "loss": 0.6296, "step": 16740 }, { "epoch": 0.5130869192104941, "grad_norm": 1.3126477079700336, "learning_rate": 5.031019665912513e-06, "loss": 0.6288, "step": 16741 }, { "epoch": 0.5131175677332352, "grad_norm": 1.2475737478535958, "learning_rate": 5.0305233574735205e-06, "loss": 0.7052, "step": 16742 }, { "epoch": 0.5131482162559765, "grad_norm": 0.4486843110665941, "learning_rate": 5.030027048733772e-06, "loss": 0.417, "step": 16743 }, { "epoch": 0.5131788647787177, "grad_norm": 1.4058932538046762, "learning_rate": 5.029530739698161e-06, "loss": 0.5883, "step": 16744 }, { "epoch": 0.5132095133014589, "grad_norm": 0.4528382022714537, "learning_rate": 5.029034430371574e-06, "loss": 0.429, "step": 16745 }, { "epoch": 0.5132401618242001, "grad_norm": 1.3562970903893563, "learning_rate": 5.0285381207589055e-06, "loss": 0.6481, "step": 16746 }, { "epoch": 0.5132708103469413, "grad_norm": 1.3572093424544218, "learning_rate": 5.028041810865042e-06, "loss": 0.5836, "step": 16747 }, { "epoch": 0.5133014588696825, "grad_norm": 1.250463204695014, "learning_rate": 5.027545500694877e-06, "loss": 0.5782, "step": 16748 }, { "epoch": 0.5133321073924236, "grad_norm": 1.2658389719095495, "learning_rate": 5.027049190253297e-06, "loss": 0.5748, "step": 16749 }, { "epoch": 0.5133627559151649, "grad_norm": 1.2954673644641546, "learning_rate": 5.026552879545197e-06, "loss": 0.6546, "step": 16750 }, { "epoch": 0.513393404437906, "grad_norm": 0.4705873887659697, "learning_rate": 5.026056568575462e-06, "loss": 0.4186, "step": 16751 }, { "epoch": 0.5134240529606473, "grad_norm": 0.4794712782977858, "learning_rate": 5.025560257348987e-06, "loss": 0.4366, "step": 16752 }, { "epoch": 0.5134547014833885, "grad_norm": 1.2999990917298927, "learning_rate": 5.025063945870659e-06, "loss": 0.631, "step": 16753 }, { "epoch": 0.5134853500061297, "grad_norm": 1.2271836578554915, "learning_rate": 5.02456763414537e-06, "loss": 0.6321, "step": 16754 }, { "epoch": 0.5135159985288709, "grad_norm": 1.1867704529270486, "learning_rate": 5.024071322178011e-06, "loss": 0.6385, "step": 16755 }, { "epoch": 0.5135466470516121, "grad_norm": 1.1943892150295625, "learning_rate": 5.0235750099734695e-06, "loss": 0.594, "step": 16756 }, { "epoch": 0.5135772955743533, "grad_norm": 1.3159889398164584, "learning_rate": 5.023078697536637e-06, "loss": 0.6748, "step": 16757 }, { "epoch": 0.5136079440970945, "grad_norm": 1.279300570847324, "learning_rate": 5.0225823848724054e-06, "loss": 0.6669, "step": 16758 }, { "epoch": 0.5136385926198357, "grad_norm": 1.2783958088288236, "learning_rate": 5.022086071985663e-06, "loss": 0.6336, "step": 16759 }, { "epoch": 0.513669241142577, "grad_norm": 1.0585707526831343, "learning_rate": 5.021589758881301e-06, "loss": 0.5975, "step": 16760 }, { "epoch": 0.5136998896653181, "grad_norm": 1.3871618663367238, "learning_rate": 5.02109344556421e-06, "loss": 0.668, "step": 16761 }, { "epoch": 0.5137305381880594, "grad_norm": 1.317378308822234, "learning_rate": 5.020597132039281e-06, "loss": 0.6598, "step": 16762 }, { "epoch": 0.5137611867108005, "grad_norm": 1.4524193030983674, "learning_rate": 5.020100818311402e-06, "loss": 0.5969, "step": 16763 }, { "epoch": 0.5137918352335418, "grad_norm": 1.3745117535503544, "learning_rate": 5.019604504385464e-06, "loss": 0.7526, "step": 16764 }, { "epoch": 0.5138224837562829, "grad_norm": 1.3477044093820272, "learning_rate": 5.019108190266358e-06, "loss": 0.6473, "step": 16765 }, { "epoch": 0.5138531322790242, "grad_norm": 1.3095515479652409, "learning_rate": 5.018611875958974e-06, "loss": 0.6451, "step": 16766 }, { "epoch": 0.5138837808017653, "grad_norm": 1.1549128350396518, "learning_rate": 5.0181155614682045e-06, "loss": 0.6033, "step": 16767 }, { "epoch": 0.5139144293245066, "grad_norm": 1.4417501367288494, "learning_rate": 5.017619246798935e-06, "loss": 0.6319, "step": 16768 }, { "epoch": 0.5139450778472477, "grad_norm": 1.1299198408772482, "learning_rate": 5.017122931956059e-06, "loss": 0.54, "step": 16769 }, { "epoch": 0.513975726369989, "grad_norm": 1.1944047860965898, "learning_rate": 5.0166266169444675e-06, "loss": 0.6392, "step": 16770 }, { "epoch": 0.5140063748927302, "grad_norm": 1.1986081065712104, "learning_rate": 5.016130301769049e-06, "loss": 0.6209, "step": 16771 }, { "epoch": 0.5140370234154714, "grad_norm": 1.217431080213836, "learning_rate": 5.015633986434695e-06, "loss": 0.7152, "step": 16772 }, { "epoch": 0.5140676719382126, "grad_norm": 1.4387670282706635, "learning_rate": 5.015137670946295e-06, "loss": 0.6432, "step": 16773 }, { "epoch": 0.5140983204609538, "grad_norm": 1.2168684627756952, "learning_rate": 5.014641355308739e-06, "loss": 0.6736, "step": 16774 }, { "epoch": 0.514128968983695, "grad_norm": 1.3269111524474366, "learning_rate": 5.0141450395269184e-06, "loss": 0.6603, "step": 16775 }, { "epoch": 0.5141596175064362, "grad_norm": 1.2051234512595492, "learning_rate": 5.013648723605724e-06, "loss": 0.6019, "step": 16776 }, { "epoch": 0.5141902660291774, "grad_norm": 1.177752707601175, "learning_rate": 5.013152407550045e-06, "loss": 0.5152, "step": 16777 }, { "epoch": 0.5142209145519187, "grad_norm": 1.1154637229766424, "learning_rate": 5.012656091364772e-06, "loss": 0.5795, "step": 16778 }, { "epoch": 0.5142515630746598, "grad_norm": 1.355918387928336, "learning_rate": 5.012159775054793e-06, "loss": 0.5701, "step": 16779 }, { "epoch": 0.514282211597401, "grad_norm": 1.1337321159151192, "learning_rate": 5.011663458625004e-06, "loss": 0.5009, "step": 16780 }, { "epoch": 0.5143128601201422, "grad_norm": 1.217950016192886, "learning_rate": 5.01116714208029e-06, "loss": 0.5954, "step": 16781 }, { "epoch": 0.5143435086428834, "grad_norm": 1.2848276477695912, "learning_rate": 5.010670825425543e-06, "loss": 0.5919, "step": 16782 }, { "epoch": 0.5143741571656246, "grad_norm": 0.5310328396653673, "learning_rate": 5.010174508665654e-06, "loss": 0.4218, "step": 16783 }, { "epoch": 0.5144048056883658, "grad_norm": 1.1697329294645302, "learning_rate": 5.009678191805514e-06, "loss": 0.616, "step": 16784 }, { "epoch": 0.514435454211107, "grad_norm": 1.198087240338803, "learning_rate": 5.009181874850012e-06, "loss": 0.604, "step": 16785 }, { "epoch": 0.5144661027338482, "grad_norm": 1.3928416018527294, "learning_rate": 5.008685557804038e-06, "loss": 0.7586, "step": 16786 }, { "epoch": 0.5144967512565894, "grad_norm": 1.3003205834247735, "learning_rate": 5.008189240672483e-06, "loss": 0.6648, "step": 16787 }, { "epoch": 0.5145273997793306, "grad_norm": 1.3979922775746596, "learning_rate": 5.007692923460236e-06, "loss": 0.6922, "step": 16788 }, { "epoch": 0.5145580483020719, "grad_norm": 1.3351118347235447, "learning_rate": 5.007196606172192e-06, "loss": 0.6758, "step": 16789 }, { "epoch": 0.514588696824813, "grad_norm": 1.2224457748751794, "learning_rate": 5.0067002888132355e-06, "loss": 0.6491, "step": 16790 }, { "epoch": 0.5146193453475543, "grad_norm": 1.3233002813338064, "learning_rate": 5.00620397138826e-06, "loss": 0.7365, "step": 16791 }, { "epoch": 0.5146499938702954, "grad_norm": 1.2738631742688218, "learning_rate": 5.005707653902155e-06, "loss": 0.5257, "step": 16792 }, { "epoch": 0.5146806423930367, "grad_norm": 1.149334053369962, "learning_rate": 5.005211336359812e-06, "loss": 0.5697, "step": 16793 }, { "epoch": 0.5147112909157778, "grad_norm": 1.228975524981019, "learning_rate": 5.004715018766119e-06, "loss": 0.6435, "step": 16794 }, { "epoch": 0.5147419394385191, "grad_norm": 1.1182949983924326, "learning_rate": 5.004218701125968e-06, "loss": 0.5943, "step": 16795 }, { "epoch": 0.5147725879612602, "grad_norm": 1.2848105273012835, "learning_rate": 5.00372238344425e-06, "loss": 0.5575, "step": 16796 }, { "epoch": 0.5148032364840015, "grad_norm": 1.233054746957283, "learning_rate": 5.003226065725853e-06, "loss": 0.6842, "step": 16797 }, { "epoch": 0.5148338850067427, "grad_norm": 1.1774303245754663, "learning_rate": 5.00272974797567e-06, "loss": 0.549, "step": 16798 }, { "epoch": 0.5148645335294839, "grad_norm": 1.2365441635242689, "learning_rate": 5.002233430198588e-06, "loss": 0.6903, "step": 16799 }, { "epoch": 0.5148951820522251, "grad_norm": 1.3947021633148622, "learning_rate": 5.001737112399503e-06, "loss": 0.6476, "step": 16800 }, { "epoch": 0.5149258305749663, "grad_norm": 1.3435527790706996, "learning_rate": 5.001240794583298e-06, "loss": 0.6005, "step": 16801 }, { "epoch": 0.5149564790977075, "grad_norm": 1.2665630970497763, "learning_rate": 5.000744476754871e-06, "loss": 0.7588, "step": 16802 }, { "epoch": 0.5149871276204487, "grad_norm": 1.2493296318242229, "learning_rate": 5.000248158919106e-06, "loss": 0.59, "step": 16803 }, { "epoch": 0.5150177761431899, "grad_norm": 1.3582356008060592, "learning_rate": 4.999751841080895e-06, "loss": 0.678, "step": 16804 }, { "epoch": 0.5150484246659311, "grad_norm": 1.344106076686554, "learning_rate": 4.999255523245132e-06, "loss": 0.7299, "step": 16805 }, { "epoch": 0.5150790731886723, "grad_norm": 1.234467119875469, "learning_rate": 4.9987592054167026e-06, "loss": 0.7381, "step": 16806 }, { "epoch": 0.5151097217114136, "grad_norm": 1.3092325061904302, "learning_rate": 4.998262887600499e-06, "loss": 0.5471, "step": 16807 }, { "epoch": 0.5151403702341547, "grad_norm": 1.3014145786931428, "learning_rate": 4.997766569801413e-06, "loss": 0.6655, "step": 16808 }, { "epoch": 0.515171018756896, "grad_norm": 1.2893387079063576, "learning_rate": 4.997270252024331e-06, "loss": 0.6899, "step": 16809 }, { "epoch": 0.5152016672796371, "grad_norm": 1.305396067989957, "learning_rate": 4.996773934274148e-06, "loss": 0.5812, "step": 16810 }, { "epoch": 0.5152323158023783, "grad_norm": 1.3564811616737853, "learning_rate": 4.996277616555753e-06, "loss": 0.6939, "step": 16811 }, { "epoch": 0.5152629643251195, "grad_norm": 0.5214939796411903, "learning_rate": 4.995781298874033e-06, "loss": 0.411, "step": 16812 }, { "epoch": 0.5152936128478607, "grad_norm": 1.1886484216064672, "learning_rate": 4.995284981233883e-06, "loss": 0.6487, "step": 16813 }, { "epoch": 0.5153242613706019, "grad_norm": 1.2869240799053538, "learning_rate": 4.99478866364019e-06, "loss": 0.6239, "step": 16814 }, { "epoch": 0.5153549098933431, "grad_norm": 1.3658308537053943, "learning_rate": 4.994292346097846e-06, "loss": 0.6123, "step": 16815 }, { "epoch": 0.5153855584160844, "grad_norm": 1.1370307114897622, "learning_rate": 4.9937960286117415e-06, "loss": 0.5947, "step": 16816 }, { "epoch": 0.5154162069388255, "grad_norm": 1.2653274464483264, "learning_rate": 4.993299711186768e-06, "loss": 0.5544, "step": 16817 }, { "epoch": 0.5154468554615668, "grad_norm": 1.3702862893492214, "learning_rate": 4.99280339382781e-06, "loss": 0.5976, "step": 16818 }, { "epoch": 0.5154775039843079, "grad_norm": 1.2924586483048386, "learning_rate": 4.992307076539765e-06, "loss": 0.6184, "step": 16819 }, { "epoch": 0.5155081525070492, "grad_norm": 1.2996191613903911, "learning_rate": 4.991810759327518e-06, "loss": 0.6813, "step": 16820 }, { "epoch": 0.5155388010297903, "grad_norm": 0.46372325143434745, "learning_rate": 4.991314442195964e-06, "loss": 0.4177, "step": 16821 }, { "epoch": 0.5155694495525316, "grad_norm": 1.1781999296998658, "learning_rate": 4.990818125149991e-06, "loss": 0.5812, "step": 16822 }, { "epoch": 0.5156000980752727, "grad_norm": 0.47131252213679437, "learning_rate": 4.990321808194488e-06, "loss": 0.4125, "step": 16823 }, { "epoch": 0.515630746598014, "grad_norm": 1.108253340896225, "learning_rate": 4.989825491334347e-06, "loss": 0.6193, "step": 16824 }, { "epoch": 0.5156613951207552, "grad_norm": 0.44870119179250695, "learning_rate": 4.9893291745744586e-06, "loss": 0.4428, "step": 16825 }, { "epoch": 0.5156920436434964, "grad_norm": 1.4130786014241659, "learning_rate": 4.988832857919711e-06, "loss": 0.6749, "step": 16826 }, { "epoch": 0.5157226921662376, "grad_norm": 1.2598552598393082, "learning_rate": 4.988336541374998e-06, "loss": 0.603, "step": 16827 }, { "epoch": 0.5157533406889788, "grad_norm": 0.4334335991182929, "learning_rate": 4.987840224945207e-06, "loss": 0.4174, "step": 16828 }, { "epoch": 0.51578398921172, "grad_norm": 1.4091011301851295, "learning_rate": 4.987343908635231e-06, "loss": 0.6419, "step": 16829 }, { "epoch": 0.5158146377344612, "grad_norm": 1.3685873794632697, "learning_rate": 4.986847592449958e-06, "loss": 0.7539, "step": 16830 }, { "epoch": 0.5158452862572024, "grad_norm": 1.353707718426964, "learning_rate": 4.986351276394277e-06, "loss": 0.6625, "step": 16831 }, { "epoch": 0.5158759347799436, "grad_norm": 1.3410692198385656, "learning_rate": 4.985854960473083e-06, "loss": 0.7264, "step": 16832 }, { "epoch": 0.5159065833026848, "grad_norm": 1.1696411561494025, "learning_rate": 4.985358644691263e-06, "loss": 0.6281, "step": 16833 }, { "epoch": 0.5159372318254261, "grad_norm": 1.3975127241706993, "learning_rate": 4.9848623290537065e-06, "loss": 0.7163, "step": 16834 }, { "epoch": 0.5159678803481672, "grad_norm": 1.3734212381417348, "learning_rate": 4.984366013565307e-06, "loss": 0.6839, "step": 16835 }, { "epoch": 0.5159985288709085, "grad_norm": 1.3420633078677418, "learning_rate": 4.983869698230952e-06, "loss": 0.6797, "step": 16836 }, { "epoch": 0.5160291773936496, "grad_norm": 1.1624885254631512, "learning_rate": 4.983373383055533e-06, "loss": 0.6415, "step": 16837 }, { "epoch": 0.5160598259163909, "grad_norm": 1.3414362240732531, "learning_rate": 4.982877068043942e-06, "loss": 0.664, "step": 16838 }, { "epoch": 0.516090474439132, "grad_norm": 1.3755022916084612, "learning_rate": 4.982380753201066e-06, "loss": 0.5983, "step": 16839 }, { "epoch": 0.5161211229618733, "grad_norm": 1.2261196118371855, "learning_rate": 4.981884438531798e-06, "loss": 0.6395, "step": 16840 }, { "epoch": 0.5161517714846144, "grad_norm": 1.3002047311262614, "learning_rate": 4.981388124041028e-06, "loss": 0.584, "step": 16841 }, { "epoch": 0.5161824200073556, "grad_norm": 1.1424040249388911, "learning_rate": 4.980891809733643e-06, "loss": 0.6551, "step": 16842 }, { "epoch": 0.5162130685300969, "grad_norm": 0.4701688984741225, "learning_rate": 4.980395495614538e-06, "loss": 0.4201, "step": 16843 }, { "epoch": 0.516243717052838, "grad_norm": 0.47758935307877615, "learning_rate": 4.9798991816886e-06, "loss": 0.4049, "step": 16844 }, { "epoch": 0.5162743655755793, "grad_norm": 1.2304351869045587, "learning_rate": 4.979402867960721e-06, "loss": 0.6229, "step": 16845 }, { "epoch": 0.5163050140983204, "grad_norm": 1.1425079446819704, "learning_rate": 4.978906554435791e-06, "loss": 0.5905, "step": 16846 }, { "epoch": 0.5163356626210617, "grad_norm": 1.2769411076816344, "learning_rate": 4.978410241118699e-06, "loss": 0.6576, "step": 16847 }, { "epoch": 0.5163663111438028, "grad_norm": 1.2328139460732808, "learning_rate": 4.977913928014338e-06, "loss": 0.6561, "step": 16848 }, { "epoch": 0.5163969596665441, "grad_norm": 0.45964291815502656, "learning_rate": 4.977417615127596e-06, "loss": 0.4063, "step": 16849 }, { "epoch": 0.5164276081892852, "grad_norm": 0.4652480370109621, "learning_rate": 4.976921302463364e-06, "loss": 0.4358, "step": 16850 }, { "epoch": 0.5164582567120265, "grad_norm": 1.285561096587483, "learning_rate": 4.976424990026532e-06, "loss": 0.5824, "step": 16851 }, { "epoch": 0.5164889052347676, "grad_norm": 0.4670089018513084, "learning_rate": 4.975928677821992e-06, "loss": 0.4158, "step": 16852 }, { "epoch": 0.5165195537575089, "grad_norm": 1.3211204054405656, "learning_rate": 4.975432365854631e-06, "loss": 0.7156, "step": 16853 }, { "epoch": 0.5165502022802501, "grad_norm": 1.227006003040457, "learning_rate": 4.974936054129343e-06, "loss": 0.6716, "step": 16854 }, { "epoch": 0.5165808508029913, "grad_norm": 1.2630033475665656, "learning_rate": 4.974439742651014e-06, "loss": 0.5798, "step": 16855 }, { "epoch": 0.5166114993257325, "grad_norm": 1.2617077017386948, "learning_rate": 4.9739434314245385e-06, "loss": 0.5912, "step": 16856 }, { "epoch": 0.5166421478484737, "grad_norm": 1.2783093828224874, "learning_rate": 4.973447120454805e-06, "loss": 0.6805, "step": 16857 }, { "epoch": 0.5166727963712149, "grad_norm": 1.1961950791052534, "learning_rate": 4.972950809746703e-06, "loss": 0.646, "step": 16858 }, { "epoch": 0.5167034448939561, "grad_norm": 0.49714114402724, "learning_rate": 4.972454499305125e-06, "loss": 0.4068, "step": 16859 }, { "epoch": 0.5167340934166973, "grad_norm": 1.2836811941380537, "learning_rate": 4.97195818913496e-06, "loss": 0.7025, "step": 16860 }, { "epoch": 0.5167647419394386, "grad_norm": 1.1712511013155242, "learning_rate": 4.971461879241095e-06, "loss": 0.5836, "step": 16861 }, { "epoch": 0.5167953904621797, "grad_norm": 1.1589394761097542, "learning_rate": 4.970965569628428e-06, "loss": 0.6058, "step": 16862 }, { "epoch": 0.516826038984921, "grad_norm": 1.3401926216462399, "learning_rate": 4.97046926030184e-06, "loss": 0.6497, "step": 16863 }, { "epoch": 0.5168566875076621, "grad_norm": 1.3353008231977144, "learning_rate": 4.96997295126623e-06, "loss": 0.6271, "step": 16864 }, { "epoch": 0.5168873360304034, "grad_norm": 1.2293484445597664, "learning_rate": 4.969476642526482e-06, "loss": 0.5766, "step": 16865 }, { "epoch": 0.5169179845531445, "grad_norm": 1.2981242789140803, "learning_rate": 4.9689803340874886e-06, "loss": 0.6498, "step": 16866 }, { "epoch": 0.5169486330758858, "grad_norm": 1.1589346587769889, "learning_rate": 4.96848402595414e-06, "loss": 0.5729, "step": 16867 }, { "epoch": 0.5169792815986269, "grad_norm": 1.350682401650715, "learning_rate": 4.967987718131327e-06, "loss": 0.6325, "step": 16868 }, { "epoch": 0.5170099301213682, "grad_norm": 0.4716758994009881, "learning_rate": 4.967491410623938e-06, "loss": 0.4309, "step": 16869 }, { "epoch": 0.5170405786441093, "grad_norm": 1.298221188020286, "learning_rate": 4.9669951034368655e-06, "loss": 0.6457, "step": 16870 }, { "epoch": 0.5170712271668506, "grad_norm": 0.48317920346226, "learning_rate": 4.966498796575e-06, "loss": 0.4242, "step": 16871 }, { "epoch": 0.5171018756895918, "grad_norm": 1.3191386986250617, "learning_rate": 4.9660024900432265e-06, "loss": 0.6706, "step": 16872 }, { "epoch": 0.5171325242123329, "grad_norm": 1.391852796530509, "learning_rate": 4.965506183846443e-06, "loss": 0.6523, "step": 16873 }, { "epoch": 0.5171631727350742, "grad_norm": 1.2658959069335698, "learning_rate": 4.965009877989532e-06, "loss": 0.6776, "step": 16874 }, { "epoch": 0.5171938212578153, "grad_norm": 1.3084188391175333, "learning_rate": 4.964513572477392e-06, "loss": 0.6529, "step": 16875 }, { "epoch": 0.5172244697805566, "grad_norm": 1.184496791829484, "learning_rate": 4.964017267314907e-06, "loss": 0.6898, "step": 16876 }, { "epoch": 0.5172551183032977, "grad_norm": 1.323389997665188, "learning_rate": 4.963520962506968e-06, "loss": 0.6272, "step": 16877 }, { "epoch": 0.517285766826039, "grad_norm": 1.4679211282634816, "learning_rate": 4.963024658058467e-06, "loss": 0.6804, "step": 16878 }, { "epoch": 0.5173164153487801, "grad_norm": 1.4415190039581636, "learning_rate": 4.962528353974295e-06, "loss": 0.629, "step": 16879 }, { "epoch": 0.5173470638715214, "grad_norm": 0.47574862166587595, "learning_rate": 4.962032050259339e-06, "loss": 0.4007, "step": 16880 }, { "epoch": 0.5173777123942626, "grad_norm": 0.4965882732685522, "learning_rate": 4.961535746918491e-06, "loss": 0.4313, "step": 16881 }, { "epoch": 0.5174083609170038, "grad_norm": 1.3300427781113235, "learning_rate": 4.96103944395664e-06, "loss": 0.6014, "step": 16882 }, { "epoch": 0.517439009439745, "grad_norm": 1.2497482424744073, "learning_rate": 4.9605431413786795e-06, "loss": 0.5789, "step": 16883 }, { "epoch": 0.5174696579624862, "grad_norm": 1.3205665633195067, "learning_rate": 4.960046839189498e-06, "loss": 0.7227, "step": 16884 }, { "epoch": 0.5175003064852274, "grad_norm": 1.3409647914836405, "learning_rate": 4.9595505373939825e-06, "loss": 0.6112, "step": 16885 }, { "epoch": 0.5175309550079686, "grad_norm": 1.4118143097545635, "learning_rate": 4.959054235997029e-06, "loss": 0.6682, "step": 16886 }, { "epoch": 0.5175616035307098, "grad_norm": 1.2987257594090529, "learning_rate": 4.958557935003523e-06, "loss": 0.6148, "step": 16887 }, { "epoch": 0.517592252053451, "grad_norm": 0.5056286912117358, "learning_rate": 4.958061634418356e-06, "loss": 0.4242, "step": 16888 }, { "epoch": 0.5176229005761922, "grad_norm": 1.679923648106264, "learning_rate": 4.957565334246418e-06, "loss": 0.7573, "step": 16889 }, { "epoch": 0.5176535490989335, "grad_norm": 1.323346049041976, "learning_rate": 4.9570690344926e-06, "loss": 0.6637, "step": 16890 }, { "epoch": 0.5176841976216746, "grad_norm": 1.17641903209057, "learning_rate": 4.956572735161793e-06, "loss": 0.7337, "step": 16891 }, { "epoch": 0.5177148461444159, "grad_norm": 1.2809038493008444, "learning_rate": 4.956076436258885e-06, "loss": 0.6935, "step": 16892 }, { "epoch": 0.517745494667157, "grad_norm": 1.3024225729380605, "learning_rate": 4.955580137788766e-06, "loss": 0.7535, "step": 16893 }, { "epoch": 0.5177761431898983, "grad_norm": 1.236545714903699, "learning_rate": 4.95508383975633e-06, "loss": 0.6915, "step": 16894 }, { "epoch": 0.5178067917126394, "grad_norm": 1.1891218709532894, "learning_rate": 4.954587542166464e-06, "loss": 0.6226, "step": 16895 }, { "epoch": 0.5178374402353807, "grad_norm": 1.2793788215454722, "learning_rate": 4.954091245024055e-06, "loss": 0.6422, "step": 16896 }, { "epoch": 0.5178680887581218, "grad_norm": 1.3860966731392892, "learning_rate": 4.9535949483340005e-06, "loss": 0.676, "step": 16897 }, { "epoch": 0.5178987372808631, "grad_norm": 1.6825966435362383, "learning_rate": 4.9530986521011855e-06, "loss": 0.6425, "step": 16898 }, { "epoch": 0.5179293858036043, "grad_norm": 1.2883820273290407, "learning_rate": 4.952602356330501e-06, "loss": 0.6628, "step": 16899 }, { "epoch": 0.5179600343263455, "grad_norm": 1.3889446315700509, "learning_rate": 4.9521060610268385e-06, "loss": 0.7064, "step": 16900 }, { "epoch": 0.5179906828490867, "grad_norm": 1.3290549168611565, "learning_rate": 4.951609766195086e-06, "loss": 0.6848, "step": 16901 }, { "epoch": 0.5180213313718279, "grad_norm": 1.2841180948310582, "learning_rate": 4.951113471840136e-06, "loss": 0.6357, "step": 16902 }, { "epoch": 0.5180519798945691, "grad_norm": 1.1133979407421697, "learning_rate": 4.9506171779668776e-06, "loss": 0.5473, "step": 16903 }, { "epoch": 0.5180826284173102, "grad_norm": 0.4742752389057251, "learning_rate": 4.9501208845802e-06, "loss": 0.4241, "step": 16904 }, { "epoch": 0.5181132769400515, "grad_norm": 1.2167899279836838, "learning_rate": 4.9496245916849955e-06, "loss": 0.6338, "step": 16905 }, { "epoch": 0.5181439254627926, "grad_norm": 0.4398964161926937, "learning_rate": 4.9491282992861515e-06, "loss": 0.3878, "step": 16906 }, { "epoch": 0.5181745739855339, "grad_norm": 1.4109027150902422, "learning_rate": 4.948632007388559e-06, "loss": 0.6268, "step": 16907 }, { "epoch": 0.518205222508275, "grad_norm": 1.184979612755171, "learning_rate": 4.9481357159971096e-06, "loss": 0.6144, "step": 16908 }, { "epoch": 0.5182358710310163, "grad_norm": 1.1128927964727853, "learning_rate": 4.947639425116691e-06, "loss": 0.6239, "step": 16909 }, { "epoch": 0.5182665195537575, "grad_norm": 0.4437727890857837, "learning_rate": 4.947143134752195e-06, "loss": 0.4074, "step": 16910 }, { "epoch": 0.5182971680764987, "grad_norm": 1.305775001233368, "learning_rate": 4.946646844908513e-06, "loss": 0.6499, "step": 16911 }, { "epoch": 0.5183278165992399, "grad_norm": 1.2877110228805153, "learning_rate": 4.946150555590531e-06, "loss": 0.6757, "step": 16912 }, { "epoch": 0.5183584651219811, "grad_norm": 1.210888077542827, "learning_rate": 4.945654266803141e-06, "loss": 0.6501, "step": 16913 }, { "epoch": 0.5183891136447223, "grad_norm": 1.2641578387235677, "learning_rate": 4.945157978551237e-06, "loss": 0.6306, "step": 16914 }, { "epoch": 0.5184197621674635, "grad_norm": 1.1251586101237923, "learning_rate": 4.944661690839701e-06, "loss": 0.595, "step": 16915 }, { "epoch": 0.5184504106902047, "grad_norm": 1.4262225321297815, "learning_rate": 4.944165403673431e-06, "loss": 0.6419, "step": 16916 }, { "epoch": 0.518481059212946, "grad_norm": 1.277747954062489, "learning_rate": 4.94366911705731e-06, "loss": 0.5676, "step": 16917 }, { "epoch": 0.5185117077356871, "grad_norm": 1.1335516396916205, "learning_rate": 4.9431728309962355e-06, "loss": 0.5734, "step": 16918 }, { "epoch": 0.5185423562584284, "grad_norm": 1.1852485445725591, "learning_rate": 4.942676545495092e-06, "loss": 0.57, "step": 16919 }, { "epoch": 0.5185730047811695, "grad_norm": 1.3527590963651637, "learning_rate": 4.94218026055877e-06, "loss": 0.7111, "step": 16920 }, { "epoch": 0.5186036533039108, "grad_norm": 1.294460040716214, "learning_rate": 4.941683976192162e-06, "loss": 0.6462, "step": 16921 }, { "epoch": 0.5186343018266519, "grad_norm": 1.4721615757863056, "learning_rate": 4.941187692400157e-06, "loss": 0.6553, "step": 16922 }, { "epoch": 0.5186649503493932, "grad_norm": 1.3476873357756327, "learning_rate": 4.940691409187643e-06, "loss": 0.7127, "step": 16923 }, { "epoch": 0.5186955988721343, "grad_norm": 1.3510302494353739, "learning_rate": 4.940195126559514e-06, "loss": 0.5959, "step": 16924 }, { "epoch": 0.5187262473948756, "grad_norm": 1.2521324493732013, "learning_rate": 4.9396988445206575e-06, "loss": 0.7303, "step": 16925 }, { "epoch": 0.5187568959176168, "grad_norm": 1.2943749920491858, "learning_rate": 4.93920256307596e-06, "loss": 0.6572, "step": 16926 }, { "epoch": 0.518787544440358, "grad_norm": 1.1810205625983263, "learning_rate": 4.93870628223032e-06, "loss": 0.5685, "step": 16927 }, { "epoch": 0.5188181929630992, "grad_norm": 1.2162730356530636, "learning_rate": 4.9382100019886185e-06, "loss": 0.6499, "step": 16928 }, { "epoch": 0.5188488414858404, "grad_norm": 1.305648232005941, "learning_rate": 4.937713722355754e-06, "loss": 0.6376, "step": 16929 }, { "epoch": 0.5188794900085816, "grad_norm": 1.134769854800611, "learning_rate": 4.937217443336609e-06, "loss": 0.5793, "step": 16930 }, { "epoch": 0.5189101385313228, "grad_norm": 1.240843502720526, "learning_rate": 4.936721164936077e-06, "loss": 0.58, "step": 16931 }, { "epoch": 0.518940787054064, "grad_norm": 1.1400672257985662, "learning_rate": 4.936224887159049e-06, "loss": 0.6775, "step": 16932 }, { "epoch": 0.5189714355768053, "grad_norm": 1.3934228845312908, "learning_rate": 4.935728610010412e-06, "loss": 0.6893, "step": 16933 }, { "epoch": 0.5190020840995464, "grad_norm": 1.2921379560385529, "learning_rate": 4.935232333495058e-06, "loss": 0.6783, "step": 16934 }, { "epoch": 0.5190327326222876, "grad_norm": 1.2405430509369153, "learning_rate": 4.934736057617877e-06, "loss": 0.6684, "step": 16935 }, { "epoch": 0.5190633811450288, "grad_norm": 1.2806412383742274, "learning_rate": 4.9342397823837565e-06, "loss": 0.5993, "step": 16936 }, { "epoch": 0.51909402966777, "grad_norm": 1.133834839698501, "learning_rate": 4.93374350779759e-06, "loss": 0.5636, "step": 16937 }, { "epoch": 0.5191246781905112, "grad_norm": 1.3338946952214767, "learning_rate": 4.933247233864267e-06, "loss": 0.6556, "step": 16938 }, { "epoch": 0.5191553267132524, "grad_norm": 1.2725441850415176, "learning_rate": 4.932750960588672e-06, "loss": 0.6127, "step": 16939 }, { "epoch": 0.5191859752359936, "grad_norm": 1.1542044651583265, "learning_rate": 4.932254687975703e-06, "loss": 0.6901, "step": 16940 }, { "epoch": 0.5192166237587348, "grad_norm": 1.2108700299573478, "learning_rate": 4.931758416030244e-06, "loss": 0.6396, "step": 16941 }, { "epoch": 0.519247272281476, "grad_norm": 1.3261165915405693, "learning_rate": 4.931262144757186e-06, "loss": 0.6927, "step": 16942 }, { "epoch": 0.5192779208042172, "grad_norm": 1.2226380490366093, "learning_rate": 4.9307658741614216e-06, "loss": 0.5701, "step": 16943 }, { "epoch": 0.5193085693269585, "grad_norm": 1.1489889713375319, "learning_rate": 4.930269604247836e-06, "loss": 0.6337, "step": 16944 }, { "epoch": 0.5193392178496996, "grad_norm": 0.5135159468463637, "learning_rate": 4.929773335021324e-06, "loss": 0.4159, "step": 16945 }, { "epoch": 0.5193698663724409, "grad_norm": 1.3000694666291346, "learning_rate": 4.929277066486774e-06, "loss": 0.7264, "step": 16946 }, { "epoch": 0.519400514895182, "grad_norm": 1.2900855609092174, "learning_rate": 4.928780798649073e-06, "loss": 0.6618, "step": 16947 }, { "epoch": 0.5194311634179233, "grad_norm": 1.2758082047650454, "learning_rate": 4.928284531513114e-06, "loss": 0.658, "step": 16948 }, { "epoch": 0.5194618119406644, "grad_norm": 1.3079166812526164, "learning_rate": 4.927788265083787e-06, "loss": 0.6436, "step": 16949 }, { "epoch": 0.5194924604634057, "grad_norm": 1.2977715140205042, "learning_rate": 4.927291999365979e-06, "loss": 0.7415, "step": 16950 }, { "epoch": 0.5195231089861468, "grad_norm": 1.1950182268659406, "learning_rate": 4.9267957343645824e-06, "loss": 0.635, "step": 16951 }, { "epoch": 0.5195537575088881, "grad_norm": 1.2360647543094, "learning_rate": 4.926299470084486e-06, "loss": 0.5656, "step": 16952 }, { "epoch": 0.5195844060316293, "grad_norm": 1.1965690664077817, "learning_rate": 4.925803206530579e-06, "loss": 0.6421, "step": 16953 }, { "epoch": 0.5196150545543705, "grad_norm": 1.1971733955702293, "learning_rate": 4.925306943707753e-06, "loss": 0.6589, "step": 16954 }, { "epoch": 0.5196457030771117, "grad_norm": 1.428573134389761, "learning_rate": 4.924810681620896e-06, "loss": 0.7063, "step": 16955 }, { "epoch": 0.5196763515998529, "grad_norm": 1.20685426478846, "learning_rate": 4.924314420274899e-06, "loss": 0.6108, "step": 16956 }, { "epoch": 0.5197070001225941, "grad_norm": 1.388088791577573, "learning_rate": 4.923818159674653e-06, "loss": 0.5938, "step": 16957 }, { "epoch": 0.5197376486453353, "grad_norm": 0.455573952499691, "learning_rate": 4.923321899825043e-06, "loss": 0.4122, "step": 16958 }, { "epoch": 0.5197682971680765, "grad_norm": 1.392774848325522, "learning_rate": 4.922825640730965e-06, "loss": 0.7082, "step": 16959 }, { "epoch": 0.5197989456908177, "grad_norm": 0.4524717996810823, "learning_rate": 4.9223293823973045e-06, "loss": 0.4108, "step": 16960 }, { "epoch": 0.5198295942135589, "grad_norm": 1.3253147678765005, "learning_rate": 4.921833124828952e-06, "loss": 0.6607, "step": 16961 }, { "epoch": 0.5198602427363002, "grad_norm": 1.3512281010595022, "learning_rate": 4.921336868030797e-06, "loss": 0.6834, "step": 16962 }, { "epoch": 0.5198908912590413, "grad_norm": 1.1703988797153373, "learning_rate": 4.920840612007731e-06, "loss": 0.6558, "step": 16963 }, { "epoch": 0.5199215397817826, "grad_norm": 1.2458209529946769, "learning_rate": 4.9203443567646434e-06, "loss": 0.6302, "step": 16964 }, { "epoch": 0.5199521883045237, "grad_norm": 1.224894626401778, "learning_rate": 4.919848102306422e-06, "loss": 0.515, "step": 16965 }, { "epoch": 0.5199828368272649, "grad_norm": 1.2945102009064304, "learning_rate": 4.919351848637958e-06, "loss": 0.6445, "step": 16966 }, { "epoch": 0.5200134853500061, "grad_norm": 1.3304913242873437, "learning_rate": 4.918855595764141e-06, "loss": 0.6488, "step": 16967 }, { "epoch": 0.5200441338727473, "grad_norm": 1.168821162923395, "learning_rate": 4.9183593436898615e-06, "loss": 0.6563, "step": 16968 }, { "epoch": 0.5200747823954885, "grad_norm": 1.2307153315363188, "learning_rate": 4.917863092420006e-06, "loss": 0.6814, "step": 16969 }, { "epoch": 0.5201054309182297, "grad_norm": 1.3236921529648678, "learning_rate": 4.91736684195947e-06, "loss": 0.5887, "step": 16970 }, { "epoch": 0.520136079440971, "grad_norm": 1.2431246445107669, "learning_rate": 4.916870592313135e-06, "loss": 0.7177, "step": 16971 }, { "epoch": 0.5201667279637121, "grad_norm": 1.0028430635063583, "learning_rate": 4.9163743434858994e-06, "loss": 0.6749, "step": 16972 }, { "epoch": 0.5201973764864534, "grad_norm": 1.259213169249742, "learning_rate": 4.9158780954826475e-06, "loss": 0.5924, "step": 16973 }, { "epoch": 0.5202280250091945, "grad_norm": 1.3195041360595765, "learning_rate": 4.915381848308269e-06, "loss": 0.5981, "step": 16974 }, { "epoch": 0.5202586735319358, "grad_norm": 1.2360673662821788, "learning_rate": 4.914885601967656e-06, "loss": 0.6093, "step": 16975 }, { "epoch": 0.5202893220546769, "grad_norm": 1.1881080490505875, "learning_rate": 4.914389356465698e-06, "loss": 0.6609, "step": 16976 }, { "epoch": 0.5203199705774182, "grad_norm": 1.3027364882127332, "learning_rate": 4.913893111807281e-06, "loss": 0.6456, "step": 16977 }, { "epoch": 0.5203506191001593, "grad_norm": 1.2914152767683238, "learning_rate": 4.913396867997299e-06, "loss": 0.6349, "step": 16978 }, { "epoch": 0.5203812676229006, "grad_norm": 1.0838503697194457, "learning_rate": 4.912900625040641e-06, "loss": 0.6261, "step": 16979 }, { "epoch": 0.5204119161456418, "grad_norm": 1.2530207116141414, "learning_rate": 4.912404382942191e-06, "loss": 0.584, "step": 16980 }, { "epoch": 0.520442564668383, "grad_norm": 0.4870832180999887, "learning_rate": 4.9119081417068474e-06, "loss": 0.4052, "step": 16981 }, { "epoch": 0.5204732131911242, "grad_norm": 0.5089410228279302, "learning_rate": 4.9114119013394925e-06, "loss": 0.4068, "step": 16982 }, { "epoch": 0.5205038617138654, "grad_norm": 1.2699892642127255, "learning_rate": 4.910915661845021e-06, "loss": 0.5943, "step": 16983 }, { "epoch": 0.5205345102366066, "grad_norm": 1.1856235616505075, "learning_rate": 4.91041942322832e-06, "loss": 0.5851, "step": 16984 }, { "epoch": 0.5205651587593478, "grad_norm": 1.2846158699101382, "learning_rate": 4.909923185494278e-06, "loss": 0.6507, "step": 16985 }, { "epoch": 0.520595807282089, "grad_norm": 1.449606842782342, "learning_rate": 4.909426948647787e-06, "loss": 0.6104, "step": 16986 }, { "epoch": 0.5206264558048302, "grad_norm": 1.5144583866370738, "learning_rate": 4.908930712693735e-06, "loss": 0.6723, "step": 16987 }, { "epoch": 0.5206571043275714, "grad_norm": 1.2608725112204124, "learning_rate": 4.908434477637011e-06, "loss": 0.6442, "step": 16988 }, { "epoch": 0.5206877528503127, "grad_norm": 1.3085573156840302, "learning_rate": 4.907938243482507e-06, "loss": 0.6656, "step": 16989 }, { "epoch": 0.5207184013730538, "grad_norm": 1.2229466934165305, "learning_rate": 4.90744201023511e-06, "loss": 0.6173, "step": 16990 }, { "epoch": 0.5207490498957951, "grad_norm": 1.4241070922151229, "learning_rate": 4.906945777899712e-06, "loss": 0.6779, "step": 16991 }, { "epoch": 0.5207796984185362, "grad_norm": 1.3902804604891574, "learning_rate": 4.906449546481201e-06, "loss": 0.6134, "step": 16992 }, { "epoch": 0.5208103469412775, "grad_norm": 1.3590486495615137, "learning_rate": 4.905953315984465e-06, "loss": 0.6488, "step": 16993 }, { "epoch": 0.5208409954640186, "grad_norm": 1.2718579418121165, "learning_rate": 4.905457086414397e-06, "loss": 0.6875, "step": 16994 }, { "epoch": 0.5208716439867599, "grad_norm": 0.4762874625755349, "learning_rate": 4.904960857775883e-06, "loss": 0.413, "step": 16995 }, { "epoch": 0.520902292509501, "grad_norm": 1.166154146406899, "learning_rate": 4.904464630073814e-06, "loss": 0.6549, "step": 16996 }, { "epoch": 0.5209329410322422, "grad_norm": 1.3194712136116646, "learning_rate": 4.90396840331308e-06, "loss": 0.6043, "step": 16997 }, { "epoch": 0.5209635895549835, "grad_norm": 1.289163060264457, "learning_rate": 4.90347217749857e-06, "loss": 0.5901, "step": 16998 }, { "epoch": 0.5209942380777246, "grad_norm": 1.2487022369277114, "learning_rate": 4.9029759526351726e-06, "loss": 0.6713, "step": 16999 }, { "epoch": 0.5210248866004659, "grad_norm": 1.2491588312918516, "learning_rate": 4.902479728727778e-06, "loss": 0.6031, "step": 17000 }, { "epoch": 0.521055535123207, "grad_norm": 0.4601517931215147, "learning_rate": 4.901983505781276e-06, "loss": 0.4304, "step": 17001 }, { "epoch": 0.5210861836459483, "grad_norm": 1.2945946792962777, "learning_rate": 4.901487283800556e-06, "loss": 0.5617, "step": 17002 }, { "epoch": 0.5211168321686894, "grad_norm": 1.2723730384923664, "learning_rate": 4.900991062790507e-06, "loss": 0.6783, "step": 17003 }, { "epoch": 0.5211474806914307, "grad_norm": 1.1604230578030368, "learning_rate": 4.900494842756017e-06, "loss": 0.672, "step": 17004 }, { "epoch": 0.5211781292141718, "grad_norm": 1.2121065588240663, "learning_rate": 4.899998623701979e-06, "loss": 0.5636, "step": 17005 }, { "epoch": 0.5212087777369131, "grad_norm": 1.3671593309686438, "learning_rate": 4.899502405633279e-06, "loss": 0.6254, "step": 17006 }, { "epoch": 0.5212394262596542, "grad_norm": 1.5089689280372485, "learning_rate": 4.899006188554807e-06, "loss": 0.6343, "step": 17007 }, { "epoch": 0.5212700747823955, "grad_norm": 1.285794081072297, "learning_rate": 4.898509972471453e-06, "loss": 0.587, "step": 17008 }, { "epoch": 0.5213007233051367, "grad_norm": 1.2430566939396004, "learning_rate": 4.898013757388106e-06, "loss": 0.6499, "step": 17009 }, { "epoch": 0.5213313718278779, "grad_norm": 0.4599108106584794, "learning_rate": 4.897517543309656e-06, "loss": 0.4137, "step": 17010 }, { "epoch": 0.5213620203506191, "grad_norm": 1.2256625358046118, "learning_rate": 4.897021330240993e-06, "loss": 0.6257, "step": 17011 }, { "epoch": 0.5213926688733603, "grad_norm": 1.3510448199048937, "learning_rate": 4.896525118187002e-06, "loss": 0.6372, "step": 17012 }, { "epoch": 0.5214233173961015, "grad_norm": 1.1560592994308876, "learning_rate": 4.896028907152579e-06, "loss": 0.5477, "step": 17013 }, { "epoch": 0.5214539659188427, "grad_norm": 1.1383663287279109, "learning_rate": 4.8955326971426085e-06, "loss": 0.6153, "step": 17014 }, { "epoch": 0.5214846144415839, "grad_norm": 1.1780954833698485, "learning_rate": 4.895036488161981e-06, "loss": 0.7073, "step": 17015 }, { "epoch": 0.5215152629643252, "grad_norm": 1.2934991107915603, "learning_rate": 4.894540280215586e-06, "loss": 0.6733, "step": 17016 }, { "epoch": 0.5215459114870663, "grad_norm": 1.2731661882635037, "learning_rate": 4.894044073308311e-06, "loss": 0.6102, "step": 17017 }, { "epoch": 0.5215765600098076, "grad_norm": 1.082579524489912, "learning_rate": 4.893547867445049e-06, "loss": 0.6046, "step": 17018 }, { "epoch": 0.5216072085325487, "grad_norm": 1.4970949342414532, "learning_rate": 4.893051662630686e-06, "loss": 0.5704, "step": 17019 }, { "epoch": 0.52163785705529, "grad_norm": 1.1505982120734488, "learning_rate": 4.892555458870112e-06, "loss": 0.5779, "step": 17020 }, { "epoch": 0.5216685055780311, "grad_norm": 1.2249993284053922, "learning_rate": 4.892059256168217e-06, "loss": 0.6257, "step": 17021 }, { "epoch": 0.5216991541007724, "grad_norm": 1.3268903773844827, "learning_rate": 4.891563054529892e-06, "loss": 0.7159, "step": 17022 }, { "epoch": 0.5217298026235135, "grad_norm": 1.3655644920369951, "learning_rate": 4.89106685396002e-06, "loss": 0.7134, "step": 17023 }, { "epoch": 0.5217604511462548, "grad_norm": 0.4735686064277634, "learning_rate": 4.890570654463497e-06, "loss": 0.4264, "step": 17024 }, { "epoch": 0.521791099668996, "grad_norm": 1.2924327938633726, "learning_rate": 4.890074456045209e-06, "loss": 0.7386, "step": 17025 }, { "epoch": 0.5218217481917372, "grad_norm": 1.555050645753377, "learning_rate": 4.8895782587100434e-06, "loss": 0.7602, "step": 17026 }, { "epoch": 0.5218523967144784, "grad_norm": 0.47593642308037376, "learning_rate": 4.889082062462894e-06, "loss": 0.4118, "step": 17027 }, { "epoch": 0.5218830452372195, "grad_norm": 1.2005141176989962, "learning_rate": 4.888585867308646e-06, "loss": 0.5668, "step": 17028 }, { "epoch": 0.5219136937599608, "grad_norm": 0.44436422549205273, "learning_rate": 4.8880896732521905e-06, "loss": 0.4126, "step": 17029 }, { "epoch": 0.5219443422827019, "grad_norm": 1.242260086842511, "learning_rate": 4.887593480298416e-06, "loss": 0.5713, "step": 17030 }, { "epoch": 0.5219749908054432, "grad_norm": 1.2160783984976113, "learning_rate": 4.88709728845221e-06, "loss": 0.6138, "step": 17031 }, { "epoch": 0.5220056393281843, "grad_norm": 1.2165200074255949, "learning_rate": 4.886601097718466e-06, "loss": 0.6462, "step": 17032 }, { "epoch": 0.5220362878509256, "grad_norm": 1.300604099243954, "learning_rate": 4.886104908102071e-06, "loss": 0.6953, "step": 17033 }, { "epoch": 0.5220669363736667, "grad_norm": 1.2818506816782633, "learning_rate": 4.885608719607909e-06, "loss": 0.6545, "step": 17034 }, { "epoch": 0.522097584896408, "grad_norm": 1.2865095885119295, "learning_rate": 4.8851125322408786e-06, "loss": 0.6292, "step": 17035 }, { "epoch": 0.5221282334191492, "grad_norm": 0.4869294577638424, "learning_rate": 4.884616346005859e-06, "loss": 0.4414, "step": 17036 }, { "epoch": 0.5221588819418904, "grad_norm": 1.2676688065056738, "learning_rate": 4.884120160907749e-06, "loss": 0.6564, "step": 17037 }, { "epoch": 0.5221895304646316, "grad_norm": 1.22805892052172, "learning_rate": 4.8836239769514305e-06, "loss": 0.6151, "step": 17038 }, { "epoch": 0.5222201789873728, "grad_norm": 1.2338287278353475, "learning_rate": 4.883127794141794e-06, "loss": 0.6964, "step": 17039 }, { "epoch": 0.522250827510114, "grad_norm": 0.48222814291203964, "learning_rate": 4.88263161248373e-06, "loss": 0.4233, "step": 17040 }, { "epoch": 0.5222814760328552, "grad_norm": 1.2183476427334172, "learning_rate": 4.882135431982127e-06, "loss": 0.7067, "step": 17041 }, { "epoch": 0.5223121245555964, "grad_norm": 1.2673967477737345, "learning_rate": 4.881639252641872e-06, "loss": 0.659, "step": 17042 }, { "epoch": 0.5223427730783377, "grad_norm": 0.43953323248655174, "learning_rate": 4.881143074467857e-06, "loss": 0.3848, "step": 17043 }, { "epoch": 0.5223734216010788, "grad_norm": 1.1874159497794767, "learning_rate": 4.88064689746497e-06, "loss": 0.6477, "step": 17044 }, { "epoch": 0.5224040701238201, "grad_norm": 1.2353864330004913, "learning_rate": 4.8801507216381e-06, "loss": 0.5527, "step": 17045 }, { "epoch": 0.5224347186465612, "grad_norm": 1.2445134041368477, "learning_rate": 4.879654546992136e-06, "loss": 0.7092, "step": 17046 }, { "epoch": 0.5224653671693025, "grad_norm": 1.2429425359487076, "learning_rate": 4.879158373531964e-06, "loss": 0.6604, "step": 17047 }, { "epoch": 0.5224960156920436, "grad_norm": 1.3522315072832825, "learning_rate": 4.878662201262478e-06, "loss": 0.7059, "step": 17048 }, { "epoch": 0.5225266642147849, "grad_norm": 1.2636568404496655, "learning_rate": 4.878166030188564e-06, "loss": 0.519, "step": 17049 }, { "epoch": 0.522557312737526, "grad_norm": 1.2628666481211361, "learning_rate": 4.877669860315109e-06, "loss": 0.6592, "step": 17050 }, { "epoch": 0.5225879612602673, "grad_norm": 1.1633349177082275, "learning_rate": 4.877173691647006e-06, "loss": 0.6435, "step": 17051 }, { "epoch": 0.5226186097830084, "grad_norm": 1.2609876166452783, "learning_rate": 4.876677524189144e-06, "loss": 0.7391, "step": 17052 }, { "epoch": 0.5226492583057497, "grad_norm": 1.1863040280060393, "learning_rate": 4.876181357946406e-06, "loss": 0.6853, "step": 17053 }, { "epoch": 0.5226799068284909, "grad_norm": 1.2047671969663332, "learning_rate": 4.875685192923688e-06, "loss": 0.5888, "step": 17054 }, { "epoch": 0.5227105553512321, "grad_norm": 1.308634942035658, "learning_rate": 4.875189029125872e-06, "loss": 0.7005, "step": 17055 }, { "epoch": 0.5227412038739733, "grad_norm": 1.2196162389718699, "learning_rate": 4.8746928665578545e-06, "loss": 0.647, "step": 17056 }, { "epoch": 0.5227718523967145, "grad_norm": 1.4572257830768724, "learning_rate": 4.874196705224518e-06, "loss": 0.6475, "step": 17057 }, { "epoch": 0.5228025009194557, "grad_norm": 1.3730315308935703, "learning_rate": 4.8737005451307524e-06, "loss": 0.5636, "step": 17058 }, { "epoch": 0.5228331494421968, "grad_norm": 1.1891212884033096, "learning_rate": 4.87320438628145e-06, "loss": 0.6782, "step": 17059 }, { "epoch": 0.5228637979649381, "grad_norm": 1.1998785341188696, "learning_rate": 4.872708228681497e-06, "loss": 0.5902, "step": 17060 }, { "epoch": 0.5228944464876792, "grad_norm": 1.2248969968196024, "learning_rate": 4.872212072335781e-06, "loss": 0.5817, "step": 17061 }, { "epoch": 0.5229250950104205, "grad_norm": 1.2651756179101068, "learning_rate": 4.871715917249193e-06, "loss": 0.6882, "step": 17062 }, { "epoch": 0.5229557435331617, "grad_norm": 1.202972437318352, "learning_rate": 4.87121976342662e-06, "loss": 0.5961, "step": 17063 }, { "epoch": 0.5229863920559029, "grad_norm": 1.319617418139317, "learning_rate": 4.870723610872953e-06, "loss": 0.6771, "step": 17064 }, { "epoch": 0.5230170405786441, "grad_norm": 1.384783969775118, "learning_rate": 4.87022745959308e-06, "loss": 0.5963, "step": 17065 }, { "epoch": 0.5230476891013853, "grad_norm": 1.25589457343093, "learning_rate": 4.8697313095918856e-06, "loss": 0.6786, "step": 17066 }, { "epoch": 0.5230783376241265, "grad_norm": 0.5255950039190204, "learning_rate": 4.8692351608742654e-06, "loss": 0.4059, "step": 17067 }, { "epoch": 0.5231089861468677, "grad_norm": 1.3093157182285002, "learning_rate": 4.868739013445104e-06, "loss": 0.5948, "step": 17068 }, { "epoch": 0.5231396346696089, "grad_norm": 1.1878337748315755, "learning_rate": 4.868242867309289e-06, "loss": 0.6049, "step": 17069 }, { "epoch": 0.5231702831923501, "grad_norm": 1.2877549312731167, "learning_rate": 4.8677467224717125e-06, "loss": 0.6615, "step": 17070 }, { "epoch": 0.5232009317150913, "grad_norm": 1.3805037355733958, "learning_rate": 4.86725057893726e-06, "loss": 0.6639, "step": 17071 }, { "epoch": 0.5232315802378326, "grad_norm": 0.4719227082459899, "learning_rate": 4.8667544367108225e-06, "loss": 0.4291, "step": 17072 }, { "epoch": 0.5232622287605737, "grad_norm": 1.3290137182984159, "learning_rate": 4.866258295797287e-06, "loss": 0.6316, "step": 17073 }, { "epoch": 0.523292877283315, "grad_norm": 1.4530407631695281, "learning_rate": 4.865762156201543e-06, "loss": 0.715, "step": 17074 }, { "epoch": 0.5233235258060561, "grad_norm": 1.0379545689777976, "learning_rate": 4.865266017928478e-06, "loss": 0.6385, "step": 17075 }, { "epoch": 0.5233541743287974, "grad_norm": 1.3933928814962944, "learning_rate": 4.864769880982984e-06, "loss": 0.6732, "step": 17076 }, { "epoch": 0.5233848228515385, "grad_norm": 0.49682840095076675, "learning_rate": 4.8642737453699434e-06, "loss": 0.4329, "step": 17077 }, { "epoch": 0.5234154713742798, "grad_norm": 1.2436257730330025, "learning_rate": 4.863777611094251e-06, "loss": 0.5413, "step": 17078 }, { "epoch": 0.523446119897021, "grad_norm": 1.2018668757261233, "learning_rate": 4.863281478160793e-06, "loss": 0.6066, "step": 17079 }, { "epoch": 0.5234767684197622, "grad_norm": 1.1565527491012548, "learning_rate": 4.8627853465744555e-06, "loss": 0.5476, "step": 17080 }, { "epoch": 0.5235074169425034, "grad_norm": 1.2994438587308108, "learning_rate": 4.862289216340129e-06, "loss": 0.6947, "step": 17081 }, { "epoch": 0.5235380654652446, "grad_norm": 1.3224867988372702, "learning_rate": 4.861793087462703e-06, "loss": 0.7613, "step": 17082 }, { "epoch": 0.5235687139879858, "grad_norm": 1.2707288595136434, "learning_rate": 4.861296959947066e-06, "loss": 0.6029, "step": 17083 }, { "epoch": 0.523599362510727, "grad_norm": 1.2017371808307398, "learning_rate": 4.860800833798104e-06, "loss": 0.5804, "step": 17084 }, { "epoch": 0.5236300110334682, "grad_norm": 1.2838970522171909, "learning_rate": 4.860304709020707e-06, "loss": 0.5916, "step": 17085 }, { "epoch": 0.5236606595562094, "grad_norm": 1.2049900234619795, "learning_rate": 4.8598085856197644e-06, "loss": 0.6347, "step": 17086 }, { "epoch": 0.5236913080789506, "grad_norm": 1.2485310839043746, "learning_rate": 4.859312463600165e-06, "loss": 0.6413, "step": 17087 }, { "epoch": 0.5237219566016919, "grad_norm": 1.3388986947147181, "learning_rate": 4.8588163429667926e-06, "loss": 0.6315, "step": 17088 }, { "epoch": 0.523752605124433, "grad_norm": 1.2799474864778984, "learning_rate": 4.858320223724542e-06, "loss": 0.7315, "step": 17089 }, { "epoch": 0.5237832536471742, "grad_norm": 1.3479284081663776, "learning_rate": 4.857824105878296e-06, "loss": 0.6345, "step": 17090 }, { "epoch": 0.5238139021699154, "grad_norm": 1.2573496653576668, "learning_rate": 4.857327989432946e-06, "loss": 0.7206, "step": 17091 }, { "epoch": 0.5238445506926566, "grad_norm": 1.1510261395633483, "learning_rate": 4.856831874393382e-06, "loss": 0.5935, "step": 17092 }, { "epoch": 0.5238751992153978, "grad_norm": 1.289275141030456, "learning_rate": 4.856335760764488e-06, "loss": 0.6487, "step": 17093 }, { "epoch": 0.523905847738139, "grad_norm": 1.0998506888886614, "learning_rate": 4.855839648551154e-06, "loss": 0.6297, "step": 17094 }, { "epoch": 0.5239364962608802, "grad_norm": 1.3340878525153719, "learning_rate": 4.8553435377582704e-06, "loss": 0.568, "step": 17095 }, { "epoch": 0.5239671447836214, "grad_norm": 1.3231311254733438, "learning_rate": 4.854847428390723e-06, "loss": 0.6714, "step": 17096 }, { "epoch": 0.5239977933063626, "grad_norm": 1.2279869837832242, "learning_rate": 4.854351320453402e-06, "loss": 0.7346, "step": 17097 }, { "epoch": 0.5240284418291038, "grad_norm": 1.2026280131850913, "learning_rate": 4.853855213951193e-06, "loss": 0.6095, "step": 17098 }, { "epoch": 0.5240590903518451, "grad_norm": 1.1853942194784854, "learning_rate": 4.8533591088889885e-06, "loss": 0.577, "step": 17099 }, { "epoch": 0.5240897388745862, "grad_norm": 1.382907626585923, "learning_rate": 4.852863005271673e-06, "loss": 0.6618, "step": 17100 }, { "epoch": 0.5241203873973275, "grad_norm": 1.266065579232441, "learning_rate": 4.8523669031041345e-06, "loss": 0.6223, "step": 17101 }, { "epoch": 0.5241510359200686, "grad_norm": 1.21283721049982, "learning_rate": 4.851870802391264e-06, "loss": 0.5363, "step": 17102 }, { "epoch": 0.5241816844428099, "grad_norm": 0.48594234753674853, "learning_rate": 4.851374703137949e-06, "loss": 0.4311, "step": 17103 }, { "epoch": 0.524212332965551, "grad_norm": 1.2598564532187186, "learning_rate": 4.850878605349076e-06, "loss": 0.6376, "step": 17104 }, { "epoch": 0.5242429814882923, "grad_norm": 1.203719846761111, "learning_rate": 4.8503825090295345e-06, "loss": 0.5834, "step": 17105 }, { "epoch": 0.5242736300110334, "grad_norm": 1.1965944628906293, "learning_rate": 4.8498864141842136e-06, "loss": 0.581, "step": 17106 }, { "epoch": 0.5243042785337747, "grad_norm": 0.4700824261739678, "learning_rate": 4.849390320817998e-06, "loss": 0.4141, "step": 17107 }, { "epoch": 0.5243349270565159, "grad_norm": 1.1232012977631582, "learning_rate": 4.84889422893578e-06, "loss": 0.6197, "step": 17108 }, { "epoch": 0.5243655755792571, "grad_norm": 1.4300955772259683, "learning_rate": 4.848398138542444e-06, "loss": 0.684, "step": 17109 }, { "epoch": 0.5243962241019983, "grad_norm": 1.2967536149112189, "learning_rate": 4.847902049642882e-06, "loss": 0.6057, "step": 17110 }, { "epoch": 0.5244268726247395, "grad_norm": 1.2300280130893428, "learning_rate": 4.847405962241979e-06, "loss": 0.6609, "step": 17111 }, { "epoch": 0.5244575211474807, "grad_norm": 1.258991442293285, "learning_rate": 4.846909876344624e-06, "loss": 0.6032, "step": 17112 }, { "epoch": 0.5244881696702219, "grad_norm": 1.188496030091245, "learning_rate": 4.846413791955706e-06, "loss": 0.7098, "step": 17113 }, { "epoch": 0.5245188181929631, "grad_norm": 0.48974728999207257, "learning_rate": 4.845917709080111e-06, "loss": 0.4136, "step": 17114 }, { "epoch": 0.5245494667157043, "grad_norm": 1.380675867198381, "learning_rate": 4.845421627722728e-06, "loss": 0.6411, "step": 17115 }, { "epoch": 0.5245801152384455, "grad_norm": 0.432814347709694, "learning_rate": 4.8449255478884465e-06, "loss": 0.4043, "step": 17116 }, { "epoch": 0.5246107637611868, "grad_norm": 1.3191585780614725, "learning_rate": 4.844429469582153e-06, "loss": 0.6247, "step": 17117 }, { "epoch": 0.5246414122839279, "grad_norm": 1.1649710724971511, "learning_rate": 4.843933392808735e-06, "loss": 0.6667, "step": 17118 }, { "epoch": 0.5246720608066692, "grad_norm": 1.245874975050428, "learning_rate": 4.843437317573083e-06, "loss": 0.6098, "step": 17119 }, { "epoch": 0.5247027093294103, "grad_norm": 0.4735991048658074, "learning_rate": 4.84294124388008e-06, "loss": 0.407, "step": 17120 }, { "epoch": 0.5247333578521515, "grad_norm": 1.4708962872835922, "learning_rate": 4.842445171734621e-06, "loss": 0.6302, "step": 17121 }, { "epoch": 0.5247640063748927, "grad_norm": 1.2751619790027076, "learning_rate": 4.841949101141588e-06, "loss": 0.6066, "step": 17122 }, { "epoch": 0.5247946548976339, "grad_norm": 1.265757493603402, "learning_rate": 4.841453032105871e-06, "loss": 0.6288, "step": 17123 }, { "epoch": 0.5248253034203751, "grad_norm": 1.265888845554569, "learning_rate": 4.840956964632358e-06, "loss": 0.6398, "step": 17124 }, { "epoch": 0.5248559519431163, "grad_norm": 1.2045199702359335, "learning_rate": 4.840460898725935e-06, "loss": 0.6262, "step": 17125 }, { "epoch": 0.5248866004658576, "grad_norm": 1.3491261535984642, "learning_rate": 4.839964834391494e-06, "loss": 0.7403, "step": 17126 }, { "epoch": 0.5249172489885987, "grad_norm": 1.304829525020719, "learning_rate": 4.83946877163392e-06, "loss": 0.7679, "step": 17127 }, { "epoch": 0.52494789751134, "grad_norm": 1.2146757908020756, "learning_rate": 4.8389727104581e-06, "loss": 0.603, "step": 17128 }, { "epoch": 0.5249785460340811, "grad_norm": 1.2853933543322171, "learning_rate": 4.838476650868924e-06, "loss": 0.569, "step": 17129 }, { "epoch": 0.5250091945568224, "grad_norm": 1.2078728732744015, "learning_rate": 4.83798059287128e-06, "loss": 0.5563, "step": 17130 }, { "epoch": 0.5250398430795635, "grad_norm": 0.43082282303614367, "learning_rate": 4.837484536470052e-06, "loss": 0.3896, "step": 17131 }, { "epoch": 0.5250704916023048, "grad_norm": 1.3251794280340214, "learning_rate": 4.836988481670133e-06, "loss": 0.6773, "step": 17132 }, { "epoch": 0.5251011401250459, "grad_norm": 1.2702688661436405, "learning_rate": 4.836492428476407e-06, "loss": 0.658, "step": 17133 }, { "epoch": 0.5251317886477872, "grad_norm": 1.376073708466179, "learning_rate": 4.835996376893763e-06, "loss": 0.6124, "step": 17134 }, { "epoch": 0.5251624371705284, "grad_norm": 1.6719334123295575, "learning_rate": 4.835500326927089e-06, "loss": 0.6717, "step": 17135 }, { "epoch": 0.5251930856932696, "grad_norm": 1.1877307810042859, "learning_rate": 4.835004278581271e-06, "loss": 0.6649, "step": 17136 }, { "epoch": 0.5252237342160108, "grad_norm": 1.2751758823746635, "learning_rate": 4.8345082318611995e-06, "loss": 0.656, "step": 17137 }, { "epoch": 0.525254382738752, "grad_norm": 1.2029083772363325, "learning_rate": 4.834012186771761e-06, "loss": 0.6082, "step": 17138 }, { "epoch": 0.5252850312614932, "grad_norm": 1.387944105856145, "learning_rate": 4.833516143317841e-06, "loss": 0.707, "step": 17139 }, { "epoch": 0.5253156797842344, "grad_norm": 0.4633220369038562, "learning_rate": 4.833020101504331e-06, "loss": 0.394, "step": 17140 }, { "epoch": 0.5253463283069756, "grad_norm": 1.2455412461319437, "learning_rate": 4.832524061336118e-06, "loss": 0.5833, "step": 17141 }, { "epoch": 0.5253769768297168, "grad_norm": 0.47496990149249513, "learning_rate": 4.832028022818085e-06, "loss": 0.4023, "step": 17142 }, { "epoch": 0.525407625352458, "grad_norm": 1.1856538897231645, "learning_rate": 4.831531985955124e-06, "loss": 0.6495, "step": 17143 }, { "epoch": 0.5254382738751993, "grad_norm": 1.1296086313863989, "learning_rate": 4.8310359507521205e-06, "loss": 0.5911, "step": 17144 }, { "epoch": 0.5254689223979404, "grad_norm": 1.6974644045119838, "learning_rate": 4.830539917213965e-06, "loss": 0.5935, "step": 17145 }, { "epoch": 0.5254995709206817, "grad_norm": 1.1123632017919989, "learning_rate": 4.830043885345543e-06, "loss": 0.5229, "step": 17146 }, { "epoch": 0.5255302194434228, "grad_norm": 0.449048198635123, "learning_rate": 4.82954785515174e-06, "loss": 0.4189, "step": 17147 }, { "epoch": 0.5255608679661641, "grad_norm": 1.3862122768991811, "learning_rate": 4.829051826637447e-06, "loss": 0.6145, "step": 17148 }, { "epoch": 0.5255915164889052, "grad_norm": 1.285089772041035, "learning_rate": 4.828555799807552e-06, "loss": 0.6948, "step": 17149 }, { "epoch": 0.5256221650116465, "grad_norm": 1.290975702437061, "learning_rate": 4.828059774666936e-06, "loss": 0.6168, "step": 17150 }, { "epoch": 0.5256528135343876, "grad_norm": 1.3313842037359753, "learning_rate": 4.827563751220495e-06, "loss": 0.6926, "step": 17151 }, { "epoch": 0.5256834620571288, "grad_norm": 1.317702594269716, "learning_rate": 4.827067729473111e-06, "loss": 0.6143, "step": 17152 }, { "epoch": 0.52571411057987, "grad_norm": 1.2853088715249565, "learning_rate": 4.826571709429673e-06, "loss": 0.6234, "step": 17153 }, { "epoch": 0.5257447591026112, "grad_norm": 1.306604564812665, "learning_rate": 4.826075691095068e-06, "loss": 0.6834, "step": 17154 }, { "epoch": 0.5257754076253525, "grad_norm": 1.170995619812175, "learning_rate": 4.825579674474183e-06, "loss": 0.6204, "step": 17155 }, { "epoch": 0.5258060561480936, "grad_norm": 1.3564847699499427, "learning_rate": 4.825083659571907e-06, "loss": 0.6757, "step": 17156 }, { "epoch": 0.5258367046708349, "grad_norm": 1.2382096189428156, "learning_rate": 4.824587646393127e-06, "loss": 0.6333, "step": 17157 }, { "epoch": 0.525867353193576, "grad_norm": 0.46407910103203165, "learning_rate": 4.824091634942728e-06, "loss": 0.4105, "step": 17158 }, { "epoch": 0.5258980017163173, "grad_norm": 1.2483228326180005, "learning_rate": 4.823595625225602e-06, "loss": 0.6219, "step": 17159 }, { "epoch": 0.5259286502390584, "grad_norm": 1.0375504156668252, "learning_rate": 4.8230996172466325e-06, "loss": 0.6535, "step": 17160 }, { "epoch": 0.5259592987617997, "grad_norm": 1.2816961977147114, "learning_rate": 4.8226036110107055e-06, "loss": 0.6422, "step": 17161 }, { "epoch": 0.5259899472845408, "grad_norm": 1.3088726448453323, "learning_rate": 4.822107606522713e-06, "loss": 0.6618, "step": 17162 }, { "epoch": 0.5260205958072821, "grad_norm": 1.1804711021571992, "learning_rate": 4.821611603787538e-06, "loss": 0.617, "step": 17163 }, { "epoch": 0.5260512443300233, "grad_norm": 1.2510738457829116, "learning_rate": 4.821115602810072e-06, "loss": 0.5074, "step": 17164 }, { "epoch": 0.5260818928527645, "grad_norm": 1.3006460315978345, "learning_rate": 4.8206196035951975e-06, "loss": 0.5998, "step": 17165 }, { "epoch": 0.5261125413755057, "grad_norm": 1.139382246569026, "learning_rate": 4.820123606147804e-06, "loss": 0.6202, "step": 17166 }, { "epoch": 0.5261431898982469, "grad_norm": 1.1420076583947945, "learning_rate": 4.8196276104727795e-06, "loss": 0.5419, "step": 17167 }, { "epoch": 0.5261738384209881, "grad_norm": 1.302328045226448, "learning_rate": 4.8191316165750105e-06, "loss": 0.6279, "step": 17168 }, { "epoch": 0.5262044869437293, "grad_norm": 1.2862296817055516, "learning_rate": 4.818635624459383e-06, "loss": 0.7011, "step": 17169 }, { "epoch": 0.5262351354664705, "grad_norm": 1.1939801346004484, "learning_rate": 4.818139634130785e-06, "loss": 0.7064, "step": 17170 }, { "epoch": 0.5262657839892118, "grad_norm": 1.4597641373261672, "learning_rate": 4.817643645594104e-06, "loss": 0.6996, "step": 17171 }, { "epoch": 0.5262964325119529, "grad_norm": 1.1643027062671973, "learning_rate": 4.817147658854227e-06, "loss": 0.6103, "step": 17172 }, { "epoch": 0.5263270810346942, "grad_norm": 1.1960125025528725, "learning_rate": 4.816651673916043e-06, "loss": 0.6245, "step": 17173 }, { "epoch": 0.5263577295574353, "grad_norm": 1.322247435630845, "learning_rate": 4.816155690784433e-06, "loss": 0.6969, "step": 17174 }, { "epoch": 0.5263883780801766, "grad_norm": 1.2567365435139648, "learning_rate": 4.815659709464291e-06, "loss": 0.7642, "step": 17175 }, { "epoch": 0.5264190266029177, "grad_norm": 1.3132354294951676, "learning_rate": 4.8151637299605e-06, "loss": 0.6088, "step": 17176 }, { "epoch": 0.526449675125659, "grad_norm": 1.2030351599851963, "learning_rate": 4.814667752277948e-06, "loss": 0.6243, "step": 17177 }, { "epoch": 0.5264803236484001, "grad_norm": 1.2742980597983005, "learning_rate": 4.814171776421521e-06, "loss": 0.6414, "step": 17178 }, { "epoch": 0.5265109721711414, "grad_norm": 1.2205562607019302, "learning_rate": 4.813675802396109e-06, "loss": 0.5661, "step": 17179 }, { "epoch": 0.5265416206938826, "grad_norm": 1.3457803722785622, "learning_rate": 4.813179830206595e-06, "loss": 0.669, "step": 17180 }, { "epoch": 0.5265722692166238, "grad_norm": 1.3475906049510447, "learning_rate": 4.81268385985787e-06, "loss": 0.6138, "step": 17181 }, { "epoch": 0.526602917739365, "grad_norm": 1.2854689909516013, "learning_rate": 4.8121878913548175e-06, "loss": 0.6634, "step": 17182 }, { "epoch": 0.5266335662621061, "grad_norm": 1.076922813782401, "learning_rate": 4.811691924702327e-06, "loss": 0.631, "step": 17183 }, { "epoch": 0.5266642147848474, "grad_norm": 1.354276313492178, "learning_rate": 4.811195959905284e-06, "loss": 0.6435, "step": 17184 }, { "epoch": 0.5266948633075885, "grad_norm": 1.2867601454181463, "learning_rate": 4.810699996968573e-06, "loss": 0.6316, "step": 17185 }, { "epoch": 0.5267255118303298, "grad_norm": 1.234256676337016, "learning_rate": 4.810204035897087e-06, "loss": 0.6588, "step": 17186 }, { "epoch": 0.5267561603530709, "grad_norm": 0.4766211835983282, "learning_rate": 4.809708076695708e-06, "loss": 0.4062, "step": 17187 }, { "epoch": 0.5267868088758122, "grad_norm": 1.2914068447940739, "learning_rate": 4.8092121193693225e-06, "loss": 0.662, "step": 17188 }, { "epoch": 0.5268174573985533, "grad_norm": 1.2422494754777895, "learning_rate": 4.80871616392282e-06, "loss": 0.6445, "step": 17189 }, { "epoch": 0.5268481059212946, "grad_norm": 1.2231729484548493, "learning_rate": 4.8082202103610844e-06, "loss": 0.5943, "step": 17190 }, { "epoch": 0.5268787544440358, "grad_norm": 1.1911449272436272, "learning_rate": 4.807724258689006e-06, "loss": 0.6345, "step": 17191 }, { "epoch": 0.526909402966777, "grad_norm": 1.2874312101648775, "learning_rate": 4.807228308911469e-06, "loss": 0.7164, "step": 17192 }, { "epoch": 0.5269400514895182, "grad_norm": 0.47138919000561025, "learning_rate": 4.806732361033361e-06, "loss": 0.427, "step": 17193 }, { "epoch": 0.5269707000122594, "grad_norm": 1.3322773673115473, "learning_rate": 4.8062364150595704e-06, "loss": 0.6007, "step": 17194 }, { "epoch": 0.5270013485350006, "grad_norm": 1.3451784712929524, "learning_rate": 4.80574047099498e-06, "loss": 0.5269, "step": 17195 }, { "epoch": 0.5270319970577418, "grad_norm": 1.1379328043398171, "learning_rate": 4.805244528844477e-06, "loss": 0.6105, "step": 17196 }, { "epoch": 0.527062645580483, "grad_norm": 0.44361135556823356, "learning_rate": 4.8047485886129516e-06, "loss": 0.43, "step": 17197 }, { "epoch": 0.5270932941032243, "grad_norm": 0.44910555326843116, "learning_rate": 4.8042526503052875e-06, "loss": 0.4096, "step": 17198 }, { "epoch": 0.5271239426259654, "grad_norm": 1.1645272514708915, "learning_rate": 4.803756713926373e-06, "loss": 0.6374, "step": 17199 }, { "epoch": 0.5271545911487067, "grad_norm": 1.2066590836091937, "learning_rate": 4.803260779481093e-06, "loss": 0.6752, "step": 17200 }, { "epoch": 0.5271852396714478, "grad_norm": 0.4458481694627809, "learning_rate": 4.802764846974334e-06, "loss": 0.4308, "step": 17201 }, { "epoch": 0.5272158881941891, "grad_norm": 1.2099464786162597, "learning_rate": 4.8022689164109855e-06, "loss": 0.5784, "step": 17202 }, { "epoch": 0.5272465367169302, "grad_norm": 1.141417882125527, "learning_rate": 4.801772987795932e-06, "loss": 0.668, "step": 17203 }, { "epoch": 0.5272771852396715, "grad_norm": 1.3488806695839295, "learning_rate": 4.801277061134057e-06, "loss": 0.6897, "step": 17204 }, { "epoch": 0.5273078337624126, "grad_norm": 1.334979250940613, "learning_rate": 4.800781136430254e-06, "loss": 0.5773, "step": 17205 }, { "epoch": 0.5273384822851539, "grad_norm": 1.1307797717305725, "learning_rate": 4.800285213689403e-06, "loss": 0.6909, "step": 17206 }, { "epoch": 0.527369130807895, "grad_norm": 1.253920115877386, "learning_rate": 4.799789292916392e-06, "loss": 0.639, "step": 17207 }, { "epoch": 0.5273997793306363, "grad_norm": 1.188331650976839, "learning_rate": 4.79929337411611e-06, "loss": 0.6436, "step": 17208 }, { "epoch": 0.5274304278533775, "grad_norm": 1.2196731898128363, "learning_rate": 4.798797457293441e-06, "loss": 0.6446, "step": 17209 }, { "epoch": 0.5274610763761187, "grad_norm": 1.2725829636465493, "learning_rate": 4.798301542453273e-06, "loss": 0.6159, "step": 17210 }, { "epoch": 0.5274917248988599, "grad_norm": 1.4912971784533124, "learning_rate": 4.7978056296004905e-06, "loss": 0.6437, "step": 17211 }, { "epoch": 0.5275223734216011, "grad_norm": 1.375073720226535, "learning_rate": 4.797309718739981e-06, "loss": 0.6581, "step": 17212 }, { "epoch": 0.5275530219443423, "grad_norm": 1.3598674082266198, "learning_rate": 4.796813809876631e-06, "loss": 0.6384, "step": 17213 }, { "epoch": 0.5275836704670834, "grad_norm": 0.4665057649482499, "learning_rate": 4.7963179030153275e-06, "loss": 0.428, "step": 17214 }, { "epoch": 0.5276143189898247, "grad_norm": 1.3987307013850718, "learning_rate": 4.7958219981609535e-06, "loss": 0.6413, "step": 17215 }, { "epoch": 0.5276449675125658, "grad_norm": 1.2698139601832503, "learning_rate": 4.7953260953184e-06, "loss": 0.753, "step": 17216 }, { "epoch": 0.5276756160353071, "grad_norm": 1.3764421134857985, "learning_rate": 4.794830194492548e-06, "loss": 0.7444, "step": 17217 }, { "epoch": 0.5277062645580483, "grad_norm": 1.2419700076732891, "learning_rate": 4.79433429568829e-06, "loss": 0.6063, "step": 17218 }, { "epoch": 0.5277369130807895, "grad_norm": 1.366170297831499, "learning_rate": 4.7938383989105065e-06, "loss": 0.6461, "step": 17219 }, { "epoch": 0.5277675616035307, "grad_norm": 1.2819098422914685, "learning_rate": 4.793342504164086e-06, "loss": 0.6784, "step": 17220 }, { "epoch": 0.5277982101262719, "grad_norm": 1.16982006726247, "learning_rate": 4.792846611453916e-06, "loss": 0.6813, "step": 17221 }, { "epoch": 0.5278288586490131, "grad_norm": 1.1648085838711757, "learning_rate": 4.792350720784881e-06, "loss": 0.6178, "step": 17222 }, { "epoch": 0.5278595071717543, "grad_norm": 1.324832080968389, "learning_rate": 4.791854832161867e-06, "loss": 0.6836, "step": 17223 }, { "epoch": 0.5278901556944955, "grad_norm": 1.257251121129043, "learning_rate": 4.79135894558976e-06, "loss": 0.7029, "step": 17224 }, { "epoch": 0.5279208042172367, "grad_norm": 1.2889982913329174, "learning_rate": 4.790863061073447e-06, "loss": 0.6933, "step": 17225 }, { "epoch": 0.5279514527399779, "grad_norm": 1.3969623581639403, "learning_rate": 4.790367178617815e-06, "loss": 0.5963, "step": 17226 }, { "epoch": 0.5279821012627192, "grad_norm": 1.2313720318673682, "learning_rate": 4.789871298227749e-06, "loss": 0.604, "step": 17227 }, { "epoch": 0.5280127497854603, "grad_norm": 1.2398913002350824, "learning_rate": 4.789375419908132e-06, "loss": 0.7641, "step": 17228 }, { "epoch": 0.5280433983082016, "grad_norm": 1.2885815096711883, "learning_rate": 4.788879543663856e-06, "loss": 0.6323, "step": 17229 }, { "epoch": 0.5280740468309427, "grad_norm": 1.3958485044571072, "learning_rate": 4.788383669499803e-06, "loss": 0.7331, "step": 17230 }, { "epoch": 0.528104695353684, "grad_norm": 1.1125141655457984, "learning_rate": 4.7878877974208585e-06, "loss": 0.6225, "step": 17231 }, { "epoch": 0.5281353438764251, "grad_norm": 0.448796063702918, "learning_rate": 4.7873919274319116e-06, "loss": 0.4173, "step": 17232 }, { "epoch": 0.5281659923991664, "grad_norm": 1.2385737122348925, "learning_rate": 4.7868960595378465e-06, "loss": 0.6202, "step": 17233 }, { "epoch": 0.5281966409219075, "grad_norm": 1.2785171596535791, "learning_rate": 4.7864001937435465e-06, "loss": 0.5553, "step": 17234 }, { "epoch": 0.5282272894446488, "grad_norm": 1.1320765279560654, "learning_rate": 4.785904330053902e-06, "loss": 0.6667, "step": 17235 }, { "epoch": 0.52825793796739, "grad_norm": 1.251813318686508, "learning_rate": 4.785408468473796e-06, "loss": 0.6476, "step": 17236 }, { "epoch": 0.5282885864901312, "grad_norm": 1.4908115639089692, "learning_rate": 4.784912609008116e-06, "loss": 0.7075, "step": 17237 }, { "epoch": 0.5283192350128724, "grad_norm": 1.168115667661384, "learning_rate": 4.784416751661749e-06, "loss": 0.6461, "step": 17238 }, { "epoch": 0.5283498835356136, "grad_norm": 1.1638525599311633, "learning_rate": 4.783920896439576e-06, "loss": 0.5887, "step": 17239 }, { "epoch": 0.5283805320583548, "grad_norm": 1.2345951848856358, "learning_rate": 4.7834250433464866e-06, "loss": 0.5712, "step": 17240 }, { "epoch": 0.528411180581096, "grad_norm": 1.1456812073744522, "learning_rate": 4.782929192387366e-06, "loss": 0.5404, "step": 17241 }, { "epoch": 0.5284418291038372, "grad_norm": 1.1827378141510654, "learning_rate": 4.782433343567099e-06, "loss": 0.5431, "step": 17242 }, { "epoch": 0.5284724776265785, "grad_norm": 1.2254897919235301, "learning_rate": 4.7819374968905725e-06, "loss": 0.6183, "step": 17243 }, { "epoch": 0.5285031261493196, "grad_norm": 0.4568742258299558, "learning_rate": 4.78144165236267e-06, "loss": 0.4124, "step": 17244 }, { "epoch": 0.5285337746720608, "grad_norm": 0.44412886798764717, "learning_rate": 4.780945809988281e-06, "loss": 0.3898, "step": 17245 }, { "epoch": 0.528564423194802, "grad_norm": 1.2190912027329852, "learning_rate": 4.780449969772289e-06, "loss": 0.7154, "step": 17246 }, { "epoch": 0.5285950717175432, "grad_norm": 1.4010234111089652, "learning_rate": 4.779954131719577e-06, "loss": 0.615, "step": 17247 }, { "epoch": 0.5286257202402844, "grad_norm": 0.4416668095336596, "learning_rate": 4.779458295835037e-06, "loss": 0.4124, "step": 17248 }, { "epoch": 0.5286563687630256, "grad_norm": 1.3911253560851846, "learning_rate": 4.778962462123549e-06, "loss": 0.689, "step": 17249 }, { "epoch": 0.5286870172857668, "grad_norm": 1.24307608498169, "learning_rate": 4.7784666305899995e-06, "loss": 0.5603, "step": 17250 }, { "epoch": 0.528717665808508, "grad_norm": 1.420468408771726, "learning_rate": 4.777970801239276e-06, "loss": 0.6114, "step": 17251 }, { "epoch": 0.5287483143312492, "grad_norm": 1.109046523374279, "learning_rate": 4.777474974076261e-06, "loss": 0.6139, "step": 17252 }, { "epoch": 0.5287789628539904, "grad_norm": 1.1960709026002276, "learning_rate": 4.776979149105845e-06, "loss": 0.6749, "step": 17253 }, { "epoch": 0.5288096113767317, "grad_norm": 1.2570394002104706, "learning_rate": 4.776483326332909e-06, "loss": 0.7016, "step": 17254 }, { "epoch": 0.5288402598994728, "grad_norm": 1.1669787358010537, "learning_rate": 4.775987505762339e-06, "loss": 0.549, "step": 17255 }, { "epoch": 0.5288709084222141, "grad_norm": 1.3855981093760243, "learning_rate": 4.775491687399024e-06, "loss": 0.725, "step": 17256 }, { "epoch": 0.5289015569449552, "grad_norm": 1.2711310603755854, "learning_rate": 4.774995871247847e-06, "loss": 0.5998, "step": 17257 }, { "epoch": 0.5289322054676965, "grad_norm": 1.2461048189152937, "learning_rate": 4.77450005731369e-06, "loss": 0.5337, "step": 17258 }, { "epoch": 0.5289628539904376, "grad_norm": 1.1982520658793172, "learning_rate": 4.774004245601444e-06, "loss": 0.6624, "step": 17259 }, { "epoch": 0.5289935025131789, "grad_norm": 1.1285068373553482, "learning_rate": 4.773508436115992e-06, "loss": 0.6077, "step": 17260 }, { "epoch": 0.52902415103592, "grad_norm": 0.4565075154943275, "learning_rate": 4.773012628862218e-06, "loss": 0.402, "step": 17261 }, { "epoch": 0.5290547995586613, "grad_norm": 1.1938250045135335, "learning_rate": 4.7725168238450096e-06, "loss": 0.6177, "step": 17262 }, { "epoch": 0.5290854480814025, "grad_norm": 1.3457418610832155, "learning_rate": 4.772021021069249e-06, "loss": 0.639, "step": 17263 }, { "epoch": 0.5291160966041437, "grad_norm": 0.4426603950097963, "learning_rate": 4.771525220539826e-06, "loss": 0.3961, "step": 17264 }, { "epoch": 0.5291467451268849, "grad_norm": 1.1850983255045189, "learning_rate": 4.771029422261624e-06, "loss": 0.5278, "step": 17265 }, { "epoch": 0.5291773936496261, "grad_norm": 1.1853229580370592, "learning_rate": 4.770533626239526e-06, "loss": 0.6105, "step": 17266 }, { "epoch": 0.5292080421723673, "grad_norm": 1.4291489925375327, "learning_rate": 4.7700378324784195e-06, "loss": 0.6827, "step": 17267 }, { "epoch": 0.5292386906951085, "grad_norm": 1.2876391861673364, "learning_rate": 4.769542040983191e-06, "loss": 0.5885, "step": 17268 }, { "epoch": 0.5292693392178497, "grad_norm": 0.44510309482203886, "learning_rate": 4.76904625175872e-06, "loss": 0.4153, "step": 17269 }, { "epoch": 0.529299987740591, "grad_norm": 1.3219980547139223, "learning_rate": 4.7685504648098984e-06, "loss": 0.6433, "step": 17270 }, { "epoch": 0.5293306362633321, "grad_norm": 1.1719980407964383, "learning_rate": 4.768054680141605e-06, "loss": 0.6962, "step": 17271 }, { "epoch": 0.5293612847860734, "grad_norm": 1.084535194621991, "learning_rate": 4.767558897758732e-06, "loss": 0.6329, "step": 17272 }, { "epoch": 0.5293919333088145, "grad_norm": 0.43738731907574213, "learning_rate": 4.767063117666159e-06, "loss": 0.4, "step": 17273 }, { "epoch": 0.5294225818315558, "grad_norm": 1.1177083344293333, "learning_rate": 4.766567339868772e-06, "loss": 0.5895, "step": 17274 }, { "epoch": 0.5294532303542969, "grad_norm": 1.1561919339728601, "learning_rate": 4.766071564371458e-06, "loss": 0.5968, "step": 17275 }, { "epoch": 0.5294838788770381, "grad_norm": 1.2048731746231474, "learning_rate": 4.7655757911791e-06, "loss": 0.6099, "step": 17276 }, { "epoch": 0.5295145273997793, "grad_norm": 1.2994069338725165, "learning_rate": 4.765080020296583e-06, "loss": 0.677, "step": 17277 }, { "epoch": 0.5295451759225205, "grad_norm": 1.3219695317730273, "learning_rate": 4.764584251728794e-06, "loss": 0.6288, "step": 17278 }, { "epoch": 0.5295758244452617, "grad_norm": 1.4037124373657215, "learning_rate": 4.764088485480615e-06, "loss": 0.6415, "step": 17279 }, { "epoch": 0.5296064729680029, "grad_norm": 0.4491151476816863, "learning_rate": 4.763592721556934e-06, "loss": 0.4267, "step": 17280 }, { "epoch": 0.5296371214907442, "grad_norm": 1.3007514876515638, "learning_rate": 4.763096959962635e-06, "loss": 0.5822, "step": 17281 }, { "epoch": 0.5296677700134853, "grad_norm": 1.3089609220786154, "learning_rate": 4.7626012007026e-06, "loss": 0.6236, "step": 17282 }, { "epoch": 0.5296984185362266, "grad_norm": 1.273706730540881, "learning_rate": 4.762105443781719e-06, "loss": 0.6149, "step": 17283 }, { "epoch": 0.5297290670589677, "grad_norm": 1.3605509180288908, "learning_rate": 4.761609689204872e-06, "loss": 0.6819, "step": 17284 }, { "epoch": 0.529759715581709, "grad_norm": 1.2781415987360853, "learning_rate": 4.7611139369769455e-06, "loss": 0.6509, "step": 17285 }, { "epoch": 0.5297903641044501, "grad_norm": 1.3475396041432157, "learning_rate": 4.760618187102825e-06, "loss": 0.7305, "step": 17286 }, { "epoch": 0.5298210126271914, "grad_norm": 1.328129642052757, "learning_rate": 4.7601224395873955e-06, "loss": 0.6588, "step": 17287 }, { "epoch": 0.5298516611499325, "grad_norm": 1.1322389711077738, "learning_rate": 4.75962669443554e-06, "loss": 0.7006, "step": 17288 }, { "epoch": 0.5298823096726738, "grad_norm": 1.2201621519681494, "learning_rate": 4.759130951652144e-06, "loss": 0.6097, "step": 17289 }, { "epoch": 0.529912958195415, "grad_norm": 0.43222934857921097, "learning_rate": 4.758635211242092e-06, "loss": 0.4014, "step": 17290 }, { "epoch": 0.5299436067181562, "grad_norm": 1.3763484306249543, "learning_rate": 4.7581394732102714e-06, "loss": 0.5354, "step": 17291 }, { "epoch": 0.5299742552408974, "grad_norm": 1.1645398758893641, "learning_rate": 4.757643737561563e-06, "loss": 0.4813, "step": 17292 }, { "epoch": 0.5300049037636386, "grad_norm": 0.43119823768606624, "learning_rate": 4.757148004300852e-06, "loss": 0.427, "step": 17293 }, { "epoch": 0.5300355522863798, "grad_norm": 0.46646320249499246, "learning_rate": 4.756652273433025e-06, "loss": 0.4082, "step": 17294 }, { "epoch": 0.530066200809121, "grad_norm": 1.3263345233841322, "learning_rate": 4.756156544962966e-06, "loss": 0.5654, "step": 17295 }, { "epoch": 0.5300968493318622, "grad_norm": 1.229985701755971, "learning_rate": 4.755660818895557e-06, "loss": 0.7254, "step": 17296 }, { "epoch": 0.5301274978546034, "grad_norm": 1.450862406224209, "learning_rate": 4.755165095235685e-06, "loss": 0.6341, "step": 17297 }, { "epoch": 0.5301581463773446, "grad_norm": 0.980360845268835, "learning_rate": 4.7546693739882335e-06, "loss": 0.609, "step": 17298 }, { "epoch": 0.5301887949000859, "grad_norm": 1.2322790277237705, "learning_rate": 4.7541736551580885e-06, "loss": 0.6394, "step": 17299 }, { "epoch": 0.530219443422827, "grad_norm": 1.2529574518679754, "learning_rate": 4.753677938750135e-06, "loss": 0.6296, "step": 17300 }, { "epoch": 0.5302500919455683, "grad_norm": 1.2853374779059448, "learning_rate": 4.753182224769252e-06, "loss": 0.609, "step": 17301 }, { "epoch": 0.5302807404683094, "grad_norm": 1.30114973092145, "learning_rate": 4.752686513220331e-06, "loss": 0.7185, "step": 17302 }, { "epoch": 0.5303113889910507, "grad_norm": 0.485914162531899, "learning_rate": 4.752190804108252e-06, "loss": 0.4298, "step": 17303 }, { "epoch": 0.5303420375137918, "grad_norm": 0.4593023348848127, "learning_rate": 4.751695097437899e-06, "loss": 0.4175, "step": 17304 }, { "epoch": 0.5303726860365331, "grad_norm": 1.3026799502359647, "learning_rate": 4.75119939321416e-06, "loss": 0.6743, "step": 17305 }, { "epoch": 0.5304033345592742, "grad_norm": 0.4502863556095817, "learning_rate": 4.750703691441915e-06, "loss": 0.3981, "step": 17306 }, { "epoch": 0.5304339830820154, "grad_norm": 1.393461484290174, "learning_rate": 4.750207992126051e-06, "loss": 0.7008, "step": 17307 }, { "epoch": 0.5304646316047567, "grad_norm": 1.2405208206322167, "learning_rate": 4.749712295271453e-06, "loss": 0.6236, "step": 17308 }, { "epoch": 0.5304952801274978, "grad_norm": 1.3348309777765361, "learning_rate": 4.749216600883002e-06, "loss": 0.69, "step": 17309 }, { "epoch": 0.5305259286502391, "grad_norm": 1.1987498458800012, "learning_rate": 4.748720908965584e-06, "loss": 0.6268, "step": 17310 }, { "epoch": 0.5305565771729802, "grad_norm": 1.24198420949965, "learning_rate": 4.748225219524085e-06, "loss": 0.5332, "step": 17311 }, { "epoch": 0.5305872256957215, "grad_norm": 1.20985309080796, "learning_rate": 4.747729532563384e-06, "loss": 0.6147, "step": 17312 }, { "epoch": 0.5306178742184626, "grad_norm": 1.3752925843510848, "learning_rate": 4.747233848088373e-06, "loss": 0.6723, "step": 17313 }, { "epoch": 0.5306485227412039, "grad_norm": 1.3550967133796132, "learning_rate": 4.746738166103929e-06, "loss": 0.6415, "step": 17314 }, { "epoch": 0.530679171263945, "grad_norm": 1.2133060713596444, "learning_rate": 4.746242486614938e-06, "loss": 0.6043, "step": 17315 }, { "epoch": 0.5307098197866863, "grad_norm": 1.5505825522248988, "learning_rate": 4.745746809626286e-06, "loss": 0.7073, "step": 17316 }, { "epoch": 0.5307404683094274, "grad_norm": 0.48688499032480725, "learning_rate": 4.745251135142854e-06, "loss": 0.4114, "step": 17317 }, { "epoch": 0.5307711168321687, "grad_norm": 1.1513809357693603, "learning_rate": 4.744755463169529e-06, "loss": 0.6094, "step": 17318 }, { "epoch": 0.5308017653549099, "grad_norm": 1.2322887994722884, "learning_rate": 4.744259793711195e-06, "loss": 0.6429, "step": 17319 }, { "epoch": 0.5308324138776511, "grad_norm": 1.3587205911371303, "learning_rate": 4.743764126772732e-06, "loss": 0.6345, "step": 17320 }, { "epoch": 0.5308630624003923, "grad_norm": 1.2756669670813159, "learning_rate": 4.743268462359029e-06, "loss": 0.6869, "step": 17321 }, { "epoch": 0.5308937109231335, "grad_norm": 1.1923571938872481, "learning_rate": 4.742772800474967e-06, "loss": 0.617, "step": 17322 }, { "epoch": 0.5309243594458747, "grad_norm": 1.217413581366451, "learning_rate": 4.742277141125428e-06, "loss": 0.5809, "step": 17323 }, { "epoch": 0.5309550079686159, "grad_norm": 1.1825999507658425, "learning_rate": 4.741781484315302e-06, "loss": 0.6819, "step": 17324 }, { "epoch": 0.5309856564913571, "grad_norm": 1.2111977085877357, "learning_rate": 4.741285830049465e-06, "loss": 0.6129, "step": 17325 }, { "epoch": 0.5310163050140984, "grad_norm": 1.181355562623363, "learning_rate": 4.7407901783328096e-06, "loss": 0.6047, "step": 17326 }, { "epoch": 0.5310469535368395, "grad_norm": 1.4174387963870365, "learning_rate": 4.740294529170212e-06, "loss": 0.6663, "step": 17327 }, { "epoch": 0.5310776020595808, "grad_norm": 1.381290804054529, "learning_rate": 4.739798882566558e-06, "loss": 0.7153, "step": 17328 }, { "epoch": 0.5311082505823219, "grad_norm": 1.2596104673912762, "learning_rate": 4.739303238526735e-06, "loss": 0.6843, "step": 17329 }, { "epoch": 0.5311388991050632, "grad_norm": 1.3862314317845112, "learning_rate": 4.738807597055623e-06, "loss": 0.707, "step": 17330 }, { "epoch": 0.5311695476278043, "grad_norm": 1.3042979307905913, "learning_rate": 4.738311958158104e-06, "loss": 0.6654, "step": 17331 }, { "epoch": 0.5312001961505456, "grad_norm": 1.427705873149921, "learning_rate": 4.7378163218390674e-06, "loss": 0.6993, "step": 17332 }, { "epoch": 0.5312308446732867, "grad_norm": 1.1855730329786975, "learning_rate": 4.737320688103394e-06, "loss": 0.7097, "step": 17333 }, { "epoch": 0.531261493196028, "grad_norm": 1.3753620381500833, "learning_rate": 4.736825056955964e-06, "loss": 0.6681, "step": 17334 }, { "epoch": 0.5312921417187692, "grad_norm": 1.143020582225234, "learning_rate": 4.736329428401667e-06, "loss": 0.5573, "step": 17335 }, { "epoch": 0.5313227902415104, "grad_norm": 1.247830419199668, "learning_rate": 4.735833802445381e-06, "loss": 0.6104, "step": 17336 }, { "epoch": 0.5313534387642516, "grad_norm": 1.4847032065815888, "learning_rate": 4.735338179091994e-06, "loss": 0.71, "step": 17337 }, { "epoch": 0.5313840872869927, "grad_norm": 1.278280076182051, "learning_rate": 4.734842558346387e-06, "loss": 0.6027, "step": 17338 }, { "epoch": 0.531414735809734, "grad_norm": 0.4745381027334219, "learning_rate": 4.734346940213443e-06, "loss": 0.4229, "step": 17339 }, { "epoch": 0.5314453843324751, "grad_norm": 1.2752645209186344, "learning_rate": 4.733851324698048e-06, "loss": 0.5481, "step": 17340 }, { "epoch": 0.5314760328552164, "grad_norm": 1.1355858368703424, "learning_rate": 4.733355711805085e-06, "loss": 0.6553, "step": 17341 }, { "epoch": 0.5315066813779575, "grad_norm": 1.4270512308561398, "learning_rate": 4.732860101539434e-06, "loss": 0.6782, "step": 17342 }, { "epoch": 0.5315373299006988, "grad_norm": 1.1605627879788167, "learning_rate": 4.732364493905983e-06, "loss": 0.5995, "step": 17343 }, { "epoch": 0.53156797842344, "grad_norm": 1.2259279548983195, "learning_rate": 4.73186888890961e-06, "loss": 0.5932, "step": 17344 }, { "epoch": 0.5315986269461812, "grad_norm": 1.2289517404319465, "learning_rate": 4.731373286555205e-06, "loss": 0.6312, "step": 17345 }, { "epoch": 0.5316292754689224, "grad_norm": 0.4462914067345495, "learning_rate": 4.730877686847647e-06, "loss": 0.4113, "step": 17346 }, { "epoch": 0.5316599239916636, "grad_norm": 1.1852824914120599, "learning_rate": 4.730382089791818e-06, "loss": 0.6657, "step": 17347 }, { "epoch": 0.5316905725144048, "grad_norm": 0.4503265375620813, "learning_rate": 4.729886495392604e-06, "loss": 0.4405, "step": 17348 }, { "epoch": 0.531721221037146, "grad_norm": 1.3860192929406676, "learning_rate": 4.729390903654888e-06, "loss": 0.6429, "step": 17349 }, { "epoch": 0.5317518695598872, "grad_norm": 1.2480111688568958, "learning_rate": 4.728895314583553e-06, "loss": 0.6144, "step": 17350 }, { "epoch": 0.5317825180826284, "grad_norm": 1.3729491689336202, "learning_rate": 4.728399728183481e-06, "loss": 0.6402, "step": 17351 }, { "epoch": 0.5318131666053696, "grad_norm": 1.2353903634545342, "learning_rate": 4.727904144459556e-06, "loss": 0.6711, "step": 17352 }, { "epoch": 0.5318438151281109, "grad_norm": 1.2564968891773058, "learning_rate": 4.727408563416661e-06, "loss": 0.63, "step": 17353 }, { "epoch": 0.531874463650852, "grad_norm": 1.1392952289213296, "learning_rate": 4.7269129850596815e-06, "loss": 0.6691, "step": 17354 }, { "epoch": 0.5319051121735933, "grad_norm": 0.46769303603289547, "learning_rate": 4.726417409393494e-06, "loss": 0.3934, "step": 17355 }, { "epoch": 0.5319357606963344, "grad_norm": 1.4465176760337624, "learning_rate": 4.7259218364229896e-06, "loss": 0.6837, "step": 17356 }, { "epoch": 0.5319664092190757, "grad_norm": 1.4668276611816644, "learning_rate": 4.725426266153046e-06, "loss": 0.7034, "step": 17357 }, { "epoch": 0.5319970577418168, "grad_norm": 1.382975975265735, "learning_rate": 4.724930698588548e-06, "loss": 0.6356, "step": 17358 }, { "epoch": 0.5320277062645581, "grad_norm": 1.19333675482216, "learning_rate": 4.724435133734377e-06, "loss": 0.5459, "step": 17359 }, { "epoch": 0.5320583547872992, "grad_norm": 1.2264954309746035, "learning_rate": 4.723939571595419e-06, "loss": 0.6644, "step": 17360 }, { "epoch": 0.5320890033100405, "grad_norm": 1.1852802586119422, "learning_rate": 4.7234440121765525e-06, "loss": 0.5411, "step": 17361 }, { "epoch": 0.5321196518327816, "grad_norm": 0.44888375122862506, "learning_rate": 4.722948455482665e-06, "loss": 0.3964, "step": 17362 }, { "epoch": 0.5321503003555229, "grad_norm": 1.2401277193166744, "learning_rate": 4.722452901518636e-06, "loss": 0.6597, "step": 17363 }, { "epoch": 0.5321809488782641, "grad_norm": 1.271666935583959, "learning_rate": 4.721957350289351e-06, "loss": 0.659, "step": 17364 }, { "epoch": 0.5322115974010053, "grad_norm": 1.2635311156632498, "learning_rate": 4.721461801799692e-06, "loss": 0.5267, "step": 17365 }, { "epoch": 0.5322422459237465, "grad_norm": 1.2857346475829603, "learning_rate": 4.720966256054538e-06, "loss": 0.6146, "step": 17366 }, { "epoch": 0.5322728944464877, "grad_norm": 0.4789534428289721, "learning_rate": 4.720470713058777e-06, "loss": 0.4341, "step": 17367 }, { "epoch": 0.5323035429692289, "grad_norm": 1.333649124133189, "learning_rate": 4.71997517281729e-06, "loss": 0.6172, "step": 17368 }, { "epoch": 0.53233419149197, "grad_norm": 1.2023512298437713, "learning_rate": 4.719479635334958e-06, "loss": 0.5729, "step": 17369 }, { "epoch": 0.5323648400147113, "grad_norm": 1.2833187542625195, "learning_rate": 4.718984100616665e-06, "loss": 0.5888, "step": 17370 }, { "epoch": 0.5323954885374524, "grad_norm": 1.3054051290803592, "learning_rate": 4.718488568667294e-06, "loss": 0.6671, "step": 17371 }, { "epoch": 0.5324261370601937, "grad_norm": 1.1871681523361184, "learning_rate": 4.7179930394917274e-06, "loss": 0.6209, "step": 17372 }, { "epoch": 0.5324567855829349, "grad_norm": 1.2754101687753026, "learning_rate": 4.717497513094847e-06, "loss": 0.6515, "step": 17373 }, { "epoch": 0.5324874341056761, "grad_norm": 1.3786664054651014, "learning_rate": 4.717001989481536e-06, "loss": 0.6976, "step": 17374 }, { "epoch": 0.5325180826284173, "grad_norm": 0.4378473141705784, "learning_rate": 4.716506468656677e-06, "loss": 0.4015, "step": 17375 }, { "epoch": 0.5325487311511585, "grad_norm": 1.3457045584814988, "learning_rate": 4.716010950625153e-06, "loss": 0.6659, "step": 17376 }, { "epoch": 0.5325793796738997, "grad_norm": 1.1094605610420705, "learning_rate": 4.7155154353918436e-06, "loss": 0.5664, "step": 17377 }, { "epoch": 0.5326100281966409, "grad_norm": 1.6125257074130845, "learning_rate": 4.715019922961636e-06, "loss": 0.7402, "step": 17378 }, { "epoch": 0.5326406767193821, "grad_norm": 1.17682321071109, "learning_rate": 4.7145244133394085e-06, "loss": 0.7343, "step": 17379 }, { "epoch": 0.5326713252421234, "grad_norm": 0.47796255748840527, "learning_rate": 4.714028906530046e-06, "loss": 0.4261, "step": 17380 }, { "epoch": 0.5327019737648645, "grad_norm": 0.45621718622032065, "learning_rate": 4.713533402538429e-06, "loss": 0.4046, "step": 17381 }, { "epoch": 0.5327326222876058, "grad_norm": 1.2811493883703615, "learning_rate": 4.71303790136944e-06, "loss": 0.6888, "step": 17382 }, { "epoch": 0.5327632708103469, "grad_norm": 0.45781586348950226, "learning_rate": 4.712542403027963e-06, "loss": 0.413, "step": 17383 }, { "epoch": 0.5327939193330882, "grad_norm": 1.2312273386904087, "learning_rate": 4.71204690751888e-06, "loss": 0.5447, "step": 17384 }, { "epoch": 0.5328245678558293, "grad_norm": 1.1629980047357218, "learning_rate": 4.71155141484707e-06, "loss": 0.6398, "step": 17385 }, { "epoch": 0.5328552163785706, "grad_norm": 0.45838205616744926, "learning_rate": 4.711055925017421e-06, "loss": 0.413, "step": 17386 }, { "epoch": 0.5328858649013117, "grad_norm": 1.1611193062173997, "learning_rate": 4.710560438034811e-06, "loss": 0.5227, "step": 17387 }, { "epoch": 0.532916513424053, "grad_norm": 1.1733428897177371, "learning_rate": 4.710064953904121e-06, "loss": 0.5594, "step": 17388 }, { "epoch": 0.5329471619467941, "grad_norm": 1.1041051053891957, "learning_rate": 4.7095694726302365e-06, "loss": 0.5799, "step": 17389 }, { "epoch": 0.5329778104695354, "grad_norm": 1.2881446635130709, "learning_rate": 4.709073994218038e-06, "loss": 0.6723, "step": 17390 }, { "epoch": 0.5330084589922766, "grad_norm": 1.067259002143129, "learning_rate": 4.708578518672408e-06, "loss": 0.6024, "step": 17391 }, { "epoch": 0.5330391075150178, "grad_norm": 1.2589085479478044, "learning_rate": 4.708083045998229e-06, "loss": 0.6279, "step": 17392 }, { "epoch": 0.533069756037759, "grad_norm": 1.400993670301611, "learning_rate": 4.707587576200381e-06, "loss": 0.6174, "step": 17393 }, { "epoch": 0.5331004045605002, "grad_norm": 1.1996425725113837, "learning_rate": 4.707092109283749e-06, "loss": 0.5498, "step": 17394 }, { "epoch": 0.5331310530832414, "grad_norm": 1.351920010458129, "learning_rate": 4.706596645253214e-06, "loss": 0.6515, "step": 17395 }, { "epoch": 0.5331617016059826, "grad_norm": 1.2132344861233348, "learning_rate": 4.706101184113655e-06, "loss": 0.6266, "step": 17396 }, { "epoch": 0.5331923501287238, "grad_norm": 1.1169087021910808, "learning_rate": 4.7056057258699585e-06, "loss": 0.5815, "step": 17397 }, { "epoch": 0.533222998651465, "grad_norm": 1.2613453197639326, "learning_rate": 4.7051102705270004e-06, "loss": 0.6403, "step": 17398 }, { "epoch": 0.5332536471742062, "grad_norm": 0.9749740603413332, "learning_rate": 4.70461481808967e-06, "loss": 0.604, "step": 17399 }, { "epoch": 0.5332842956969474, "grad_norm": 1.2508055220988314, "learning_rate": 4.704119368562845e-06, "loss": 0.6669, "step": 17400 }, { "epoch": 0.5333149442196886, "grad_norm": 1.2032639187641228, "learning_rate": 4.703623921951406e-06, "loss": 0.6357, "step": 17401 }, { "epoch": 0.5333455927424298, "grad_norm": 1.3045899103415033, "learning_rate": 4.703128478260237e-06, "loss": 0.6149, "step": 17402 }, { "epoch": 0.533376241265171, "grad_norm": 1.3762718590883956, "learning_rate": 4.70263303749422e-06, "loss": 0.6537, "step": 17403 }, { "epoch": 0.5334068897879122, "grad_norm": 0.5359701282724686, "learning_rate": 4.702137599658234e-06, "loss": 0.3966, "step": 17404 }, { "epoch": 0.5334375383106534, "grad_norm": 1.3093148622393453, "learning_rate": 4.701642164757164e-06, "loss": 0.6897, "step": 17405 }, { "epoch": 0.5334681868333946, "grad_norm": 1.3293536034018025, "learning_rate": 4.7011467327958886e-06, "loss": 0.6764, "step": 17406 }, { "epoch": 0.5334988353561358, "grad_norm": 1.3950148861259213, "learning_rate": 4.700651303779291e-06, "loss": 0.6345, "step": 17407 }, { "epoch": 0.533529483878877, "grad_norm": 1.239401161684864, "learning_rate": 4.700155877712256e-06, "loss": 0.6746, "step": 17408 }, { "epoch": 0.5335601324016183, "grad_norm": 1.3677108361355397, "learning_rate": 4.699660454599657e-06, "loss": 0.649, "step": 17409 }, { "epoch": 0.5335907809243594, "grad_norm": 0.47424755042249594, "learning_rate": 4.699165034446384e-06, "loss": 0.4239, "step": 17410 }, { "epoch": 0.5336214294471007, "grad_norm": 1.19770136606705, "learning_rate": 4.698669617257314e-06, "loss": 0.6355, "step": 17411 }, { "epoch": 0.5336520779698418, "grad_norm": 1.2773042572263245, "learning_rate": 4.698174203037328e-06, "loss": 0.5503, "step": 17412 }, { "epoch": 0.5336827264925831, "grad_norm": 1.2828483383639866, "learning_rate": 4.697678791791311e-06, "loss": 0.6276, "step": 17413 }, { "epoch": 0.5337133750153242, "grad_norm": 1.242241170128869, "learning_rate": 4.697183383524141e-06, "loss": 0.6382, "step": 17414 }, { "epoch": 0.5337440235380655, "grad_norm": 1.261911081308271, "learning_rate": 4.696687978240699e-06, "loss": 0.7121, "step": 17415 }, { "epoch": 0.5337746720608066, "grad_norm": 1.4193969126321735, "learning_rate": 4.69619257594587e-06, "loss": 0.6789, "step": 17416 }, { "epoch": 0.5338053205835479, "grad_norm": 1.118603046739376, "learning_rate": 4.695697176644532e-06, "loss": 0.5347, "step": 17417 }, { "epoch": 0.533835969106289, "grad_norm": 1.2648479783351345, "learning_rate": 4.695201780341569e-06, "loss": 0.5905, "step": 17418 }, { "epoch": 0.5338666176290303, "grad_norm": 0.4814669805285067, "learning_rate": 4.694706387041861e-06, "loss": 0.4019, "step": 17419 }, { "epoch": 0.5338972661517715, "grad_norm": 1.455778019021506, "learning_rate": 4.694210996750287e-06, "loss": 0.7481, "step": 17420 }, { "epoch": 0.5339279146745127, "grad_norm": 1.199056834955849, "learning_rate": 4.693715609471733e-06, "loss": 0.5413, "step": 17421 }, { "epoch": 0.5339585631972539, "grad_norm": 1.2664321838281216, "learning_rate": 4.693220225211076e-06, "loss": 0.7139, "step": 17422 }, { "epoch": 0.5339892117199951, "grad_norm": 1.1359809185438672, "learning_rate": 4.692724843973198e-06, "loss": 0.6262, "step": 17423 }, { "epoch": 0.5340198602427363, "grad_norm": 0.4533279841165964, "learning_rate": 4.692229465762982e-06, "loss": 0.3963, "step": 17424 }, { "epoch": 0.5340505087654775, "grad_norm": 1.5365139701392123, "learning_rate": 4.691734090585306e-06, "loss": 0.6567, "step": 17425 }, { "epoch": 0.5340811572882187, "grad_norm": 2.332961121600634, "learning_rate": 4.691238718445055e-06, "loss": 0.6229, "step": 17426 }, { "epoch": 0.53411180581096, "grad_norm": 0.45344987751432947, "learning_rate": 4.690743349347107e-06, "loss": 0.4246, "step": 17427 }, { "epoch": 0.5341424543337011, "grad_norm": 1.2649839753967507, "learning_rate": 4.690247983296343e-06, "loss": 0.5501, "step": 17428 }, { "epoch": 0.5341731028564424, "grad_norm": 1.436505755066177, "learning_rate": 4.689752620297647e-06, "loss": 0.5857, "step": 17429 }, { "epoch": 0.5342037513791835, "grad_norm": 1.3333888750203005, "learning_rate": 4.6892572603558975e-06, "loss": 0.6359, "step": 17430 }, { "epoch": 0.5342343999019247, "grad_norm": 1.2962848806709417, "learning_rate": 4.6887619034759735e-06, "loss": 0.5464, "step": 17431 }, { "epoch": 0.5342650484246659, "grad_norm": 1.452162642582122, "learning_rate": 4.68826654966276e-06, "loss": 0.6261, "step": 17432 }, { "epoch": 0.5342956969474071, "grad_norm": 1.2300557028057721, "learning_rate": 4.687771198921134e-06, "loss": 0.5714, "step": 17433 }, { "epoch": 0.5343263454701483, "grad_norm": 0.45721823304291803, "learning_rate": 4.68727585125598e-06, "loss": 0.4101, "step": 17434 }, { "epoch": 0.5343569939928895, "grad_norm": 1.3035817581573728, "learning_rate": 4.686780506672177e-06, "loss": 0.6257, "step": 17435 }, { "epoch": 0.5343876425156308, "grad_norm": 0.48889707710114483, "learning_rate": 4.686285165174605e-06, "loss": 0.4337, "step": 17436 }, { "epoch": 0.5344182910383719, "grad_norm": 1.2255046049521396, "learning_rate": 4.6857898267681465e-06, "loss": 0.6823, "step": 17437 }, { "epoch": 0.5344489395611132, "grad_norm": 1.3744157803046837, "learning_rate": 4.685294491457682e-06, "loss": 0.7467, "step": 17438 }, { "epoch": 0.5344795880838543, "grad_norm": 1.4474982691370377, "learning_rate": 4.684799159248088e-06, "loss": 0.6581, "step": 17439 }, { "epoch": 0.5345102366065956, "grad_norm": 1.3487090301918097, "learning_rate": 4.684303830144252e-06, "loss": 0.6799, "step": 17440 }, { "epoch": 0.5345408851293367, "grad_norm": 1.3649182650925717, "learning_rate": 4.683808504151051e-06, "loss": 0.6598, "step": 17441 }, { "epoch": 0.534571533652078, "grad_norm": 1.3270266848972865, "learning_rate": 4.683313181273363e-06, "loss": 0.633, "step": 17442 }, { "epoch": 0.5346021821748191, "grad_norm": 0.4384355774614961, "learning_rate": 4.682817861516073e-06, "loss": 0.3858, "step": 17443 }, { "epoch": 0.5346328306975604, "grad_norm": 1.4568301289369088, "learning_rate": 4.682322544884059e-06, "loss": 0.7258, "step": 17444 }, { "epoch": 0.5346634792203016, "grad_norm": 1.597196807162414, "learning_rate": 4.681827231382203e-06, "loss": 0.6717, "step": 17445 }, { "epoch": 0.5346941277430428, "grad_norm": 0.44891461133814325, "learning_rate": 4.681331921015385e-06, "loss": 0.4006, "step": 17446 }, { "epoch": 0.534724776265784, "grad_norm": 1.2356747318659256, "learning_rate": 4.680836613788483e-06, "loss": 0.5088, "step": 17447 }, { "epoch": 0.5347554247885252, "grad_norm": 1.0984842453230745, "learning_rate": 4.680341309706382e-06, "loss": 0.5517, "step": 17448 }, { "epoch": 0.5347860733112664, "grad_norm": 1.1189710203492425, "learning_rate": 4.679846008773961e-06, "loss": 0.5654, "step": 17449 }, { "epoch": 0.5348167218340076, "grad_norm": 1.207332022878676, "learning_rate": 4.679350710996094e-06, "loss": 0.6439, "step": 17450 }, { "epoch": 0.5348473703567488, "grad_norm": 1.3385150605379612, "learning_rate": 4.678855416377672e-06, "loss": 0.7155, "step": 17451 }, { "epoch": 0.53487801887949, "grad_norm": 1.3011594729319131, "learning_rate": 4.6783601249235655e-06, "loss": 0.668, "step": 17452 }, { "epoch": 0.5349086674022312, "grad_norm": 1.2716030733503367, "learning_rate": 4.677864836638662e-06, "loss": 0.6733, "step": 17453 }, { "epoch": 0.5349393159249725, "grad_norm": 1.331547159608512, "learning_rate": 4.6773695515278376e-06, "loss": 0.6457, "step": 17454 }, { "epoch": 0.5349699644477136, "grad_norm": 1.1340312643999078, "learning_rate": 4.676874269595973e-06, "loss": 0.6587, "step": 17455 }, { "epoch": 0.5350006129704549, "grad_norm": 1.2117098373958335, "learning_rate": 4.6763789908479495e-06, "loss": 0.5356, "step": 17456 }, { "epoch": 0.535031261493196, "grad_norm": 1.2962408352056702, "learning_rate": 4.6758837152886475e-06, "loss": 0.6722, "step": 17457 }, { "epoch": 0.5350619100159373, "grad_norm": 1.3695281961387524, "learning_rate": 4.675388442922944e-06, "loss": 0.6578, "step": 17458 }, { "epoch": 0.5350925585386784, "grad_norm": 1.2333975979221008, "learning_rate": 4.674893173755723e-06, "loss": 0.5784, "step": 17459 }, { "epoch": 0.5351232070614197, "grad_norm": 1.2062135028711287, "learning_rate": 4.674397907791861e-06, "loss": 0.598, "step": 17460 }, { "epoch": 0.5351538555841608, "grad_norm": 1.1021083211724199, "learning_rate": 4.673902645036242e-06, "loss": 0.5215, "step": 17461 }, { "epoch": 0.535184504106902, "grad_norm": 1.410816040666604, "learning_rate": 4.673407385493743e-06, "loss": 0.7331, "step": 17462 }, { "epoch": 0.5352151526296433, "grad_norm": 1.2586091259875418, "learning_rate": 4.672912129169242e-06, "loss": 0.6011, "step": 17463 }, { "epoch": 0.5352458011523844, "grad_norm": 1.2068352440687833, "learning_rate": 4.672416876067626e-06, "loss": 0.6405, "step": 17464 }, { "epoch": 0.5352764496751257, "grad_norm": 0.47712408451764776, "learning_rate": 4.6719216261937685e-06, "loss": 0.4163, "step": 17465 }, { "epoch": 0.5353070981978668, "grad_norm": 1.2353614428373199, "learning_rate": 4.671426379552549e-06, "loss": 0.6411, "step": 17466 }, { "epoch": 0.5353377467206081, "grad_norm": 0.4840744388343064, "learning_rate": 4.6709311361488515e-06, "loss": 0.423, "step": 17467 }, { "epoch": 0.5353683952433492, "grad_norm": 1.2965713206677763, "learning_rate": 4.670435895987554e-06, "loss": 0.5529, "step": 17468 }, { "epoch": 0.5353990437660905, "grad_norm": 1.3566759789956881, "learning_rate": 4.6699406590735345e-06, "loss": 0.6685, "step": 17469 }, { "epoch": 0.5354296922888316, "grad_norm": 0.45952492844462933, "learning_rate": 4.669445425411675e-06, "loss": 0.4214, "step": 17470 }, { "epoch": 0.5354603408115729, "grad_norm": 1.3370190717114883, "learning_rate": 4.668950195006854e-06, "loss": 0.6237, "step": 17471 }, { "epoch": 0.535490989334314, "grad_norm": 1.2728232168792335, "learning_rate": 4.668454967863952e-06, "loss": 0.6268, "step": 17472 }, { "epoch": 0.5355216378570553, "grad_norm": 1.18509806292966, "learning_rate": 4.667959743987848e-06, "loss": 0.6241, "step": 17473 }, { "epoch": 0.5355522863797965, "grad_norm": 1.353017182015306, "learning_rate": 4.6674645233834196e-06, "loss": 0.6284, "step": 17474 }, { "epoch": 0.5355829349025377, "grad_norm": 0.47299539981874955, "learning_rate": 4.666969306055552e-06, "loss": 0.3964, "step": 17475 }, { "epoch": 0.5356135834252789, "grad_norm": 1.2115316368302094, "learning_rate": 4.666474092009119e-06, "loss": 0.5902, "step": 17476 }, { "epoch": 0.5356442319480201, "grad_norm": 1.2004053563580994, "learning_rate": 4.665978881249001e-06, "loss": 0.6199, "step": 17477 }, { "epoch": 0.5356748804707613, "grad_norm": 1.3279128269077884, "learning_rate": 4.66548367378008e-06, "loss": 0.7115, "step": 17478 }, { "epoch": 0.5357055289935025, "grad_norm": 1.129667203833843, "learning_rate": 4.664988469607233e-06, "loss": 0.5778, "step": 17479 }, { "epoch": 0.5357361775162437, "grad_norm": 1.2631512748580866, "learning_rate": 4.664493268735341e-06, "loss": 0.6595, "step": 17480 }, { "epoch": 0.535766826038985, "grad_norm": 1.2097236588248965, "learning_rate": 4.663998071169283e-06, "loss": 0.6409, "step": 17481 }, { "epoch": 0.5357974745617261, "grad_norm": 1.1876340943954262, "learning_rate": 4.663502876913937e-06, "loss": 0.5728, "step": 17482 }, { "epoch": 0.5358281230844674, "grad_norm": 1.3065650729790155, "learning_rate": 4.663007685974185e-06, "loss": 0.6636, "step": 17483 }, { "epoch": 0.5358587716072085, "grad_norm": 1.22423528482114, "learning_rate": 4.662512498354904e-06, "loss": 0.6579, "step": 17484 }, { "epoch": 0.5358894201299498, "grad_norm": 1.3941933094209495, "learning_rate": 4.662017314060972e-06, "loss": 0.6976, "step": 17485 }, { "epoch": 0.5359200686526909, "grad_norm": 1.1134920980606595, "learning_rate": 4.6615221330972715e-06, "loss": 0.6894, "step": 17486 }, { "epoch": 0.5359507171754322, "grad_norm": 1.3591035471497255, "learning_rate": 4.661026955468678e-06, "loss": 0.7203, "step": 17487 }, { "epoch": 0.5359813656981733, "grad_norm": 1.2659841375099958, "learning_rate": 4.660531781180075e-06, "loss": 0.718, "step": 17488 }, { "epoch": 0.5360120142209146, "grad_norm": 1.3042715644098042, "learning_rate": 4.660036610236339e-06, "loss": 0.5318, "step": 17489 }, { "epoch": 0.5360426627436558, "grad_norm": 1.5307077346285218, "learning_rate": 4.659541442642348e-06, "loss": 0.6374, "step": 17490 }, { "epoch": 0.536073311266397, "grad_norm": 1.072543925986253, "learning_rate": 4.659046278402982e-06, "loss": 0.6474, "step": 17491 }, { "epoch": 0.5361039597891382, "grad_norm": 1.4539703727846132, "learning_rate": 4.658551117523123e-06, "loss": 0.6465, "step": 17492 }, { "epoch": 0.5361346083118793, "grad_norm": 1.409264934964154, "learning_rate": 4.6580559600076435e-06, "loss": 0.6087, "step": 17493 }, { "epoch": 0.5361652568346206, "grad_norm": 1.3722912379800047, "learning_rate": 4.657560805861429e-06, "loss": 0.6783, "step": 17494 }, { "epoch": 0.5361959053573617, "grad_norm": 1.2742598034709804, "learning_rate": 4.657065655089355e-06, "loss": 0.6768, "step": 17495 }, { "epoch": 0.536226553880103, "grad_norm": 1.3396569969586103, "learning_rate": 4.6565705076962995e-06, "loss": 0.629, "step": 17496 }, { "epoch": 0.5362572024028441, "grad_norm": 1.5309085509280467, "learning_rate": 4.6560753636871435e-06, "loss": 0.7025, "step": 17497 }, { "epoch": 0.5362878509255854, "grad_norm": 1.2042478158815284, "learning_rate": 4.655580223066764e-06, "loss": 0.5642, "step": 17498 }, { "epoch": 0.5363184994483265, "grad_norm": 1.3744390458885751, "learning_rate": 4.655085085840042e-06, "loss": 0.6424, "step": 17499 }, { "epoch": 0.5363491479710678, "grad_norm": 1.1873310810253306, "learning_rate": 4.6545899520118545e-06, "loss": 0.5296, "step": 17500 }, { "epoch": 0.536379796493809, "grad_norm": 1.4114624558914401, "learning_rate": 4.654094821587079e-06, "loss": 0.7484, "step": 17501 }, { "epoch": 0.5364104450165502, "grad_norm": 1.1930898079018417, "learning_rate": 4.653599694570598e-06, "loss": 0.646, "step": 17502 }, { "epoch": 0.5364410935392914, "grad_norm": 1.2196441786688121, "learning_rate": 4.6531045709672886e-06, "loss": 0.6007, "step": 17503 }, { "epoch": 0.5364717420620326, "grad_norm": 1.2388881637607267, "learning_rate": 4.652609450782026e-06, "loss": 0.6962, "step": 17504 }, { "epoch": 0.5365023905847738, "grad_norm": 1.6620145209047708, "learning_rate": 4.652114334019693e-06, "loss": 0.6035, "step": 17505 }, { "epoch": 0.536533039107515, "grad_norm": 1.1628885503697814, "learning_rate": 4.6516192206851644e-06, "loss": 0.6771, "step": 17506 }, { "epoch": 0.5365636876302562, "grad_norm": 1.2646700808248124, "learning_rate": 4.651124110783324e-06, "loss": 0.6014, "step": 17507 }, { "epoch": 0.5365943361529975, "grad_norm": 1.3075218048249397, "learning_rate": 4.650629004319046e-06, "loss": 0.5986, "step": 17508 }, { "epoch": 0.5366249846757386, "grad_norm": 1.3265646396767692, "learning_rate": 4.6501339012972076e-06, "loss": 0.565, "step": 17509 }, { "epoch": 0.5366556331984799, "grad_norm": 1.2323098813091455, "learning_rate": 4.649638801722691e-06, "loss": 0.6289, "step": 17510 }, { "epoch": 0.536686281721221, "grad_norm": 1.3016274030737345, "learning_rate": 4.649143705600373e-06, "loss": 0.569, "step": 17511 }, { "epoch": 0.5367169302439623, "grad_norm": 1.3144084025055673, "learning_rate": 4.648648612935131e-06, "loss": 0.6787, "step": 17512 }, { "epoch": 0.5367475787667034, "grad_norm": 1.2527143373286658, "learning_rate": 4.648153523731846e-06, "loss": 0.6552, "step": 17513 }, { "epoch": 0.5367782272894447, "grad_norm": 1.263319677840828, "learning_rate": 4.647658437995394e-06, "loss": 0.6358, "step": 17514 }, { "epoch": 0.5368088758121858, "grad_norm": 1.5595774623016794, "learning_rate": 4.647163355730651e-06, "loss": 0.5829, "step": 17515 }, { "epoch": 0.5368395243349271, "grad_norm": 1.1657044276435107, "learning_rate": 4.6466682769425e-06, "loss": 0.518, "step": 17516 }, { "epoch": 0.5368701728576682, "grad_norm": 1.3552444301956215, "learning_rate": 4.646173201635815e-06, "loss": 0.6631, "step": 17517 }, { "epoch": 0.5369008213804095, "grad_norm": 1.1768507291514072, "learning_rate": 4.6456781298154784e-06, "loss": 0.5823, "step": 17518 }, { "epoch": 0.5369314699031507, "grad_norm": 1.2913248489882725, "learning_rate": 4.645183061486365e-06, "loss": 0.6244, "step": 17519 }, { "epoch": 0.5369621184258919, "grad_norm": 1.4187866650351384, "learning_rate": 4.644687996653353e-06, "loss": 0.6747, "step": 17520 }, { "epoch": 0.5369927669486331, "grad_norm": 1.4704287253834851, "learning_rate": 4.644192935321322e-06, "loss": 0.6047, "step": 17521 }, { "epoch": 0.5370234154713743, "grad_norm": 0.4733332421980515, "learning_rate": 4.643697877495148e-06, "loss": 0.4088, "step": 17522 }, { "epoch": 0.5370540639941155, "grad_norm": 1.2750976359926118, "learning_rate": 4.6432028231797095e-06, "loss": 0.6492, "step": 17523 }, { "epoch": 0.5370847125168566, "grad_norm": 1.3047877123103753, "learning_rate": 4.642707772379887e-06, "loss": 0.6799, "step": 17524 }, { "epoch": 0.5371153610395979, "grad_norm": 1.2352100886985886, "learning_rate": 4.642212725100554e-06, "loss": 0.6899, "step": 17525 }, { "epoch": 0.537146009562339, "grad_norm": 1.2605633524814375, "learning_rate": 4.641717681346592e-06, "loss": 0.6732, "step": 17526 }, { "epoch": 0.5371766580850803, "grad_norm": 1.2944612944045668, "learning_rate": 4.641222641122878e-06, "loss": 0.6271, "step": 17527 }, { "epoch": 0.5372073066078215, "grad_norm": 1.345259999737063, "learning_rate": 4.640727604434288e-06, "loss": 0.6963, "step": 17528 }, { "epoch": 0.5372379551305627, "grad_norm": 1.2446400924774332, "learning_rate": 4.640232571285701e-06, "loss": 0.6852, "step": 17529 }, { "epoch": 0.5372686036533039, "grad_norm": 1.2541834556858071, "learning_rate": 4.639737541681995e-06, "loss": 0.6048, "step": 17530 }, { "epoch": 0.5372992521760451, "grad_norm": 0.449882027628884, "learning_rate": 4.639242515628046e-06, "loss": 0.4187, "step": 17531 }, { "epoch": 0.5373299006987863, "grad_norm": 1.2679454352751856, "learning_rate": 4.638747493128733e-06, "loss": 0.5776, "step": 17532 }, { "epoch": 0.5373605492215275, "grad_norm": 1.2874024747130353, "learning_rate": 4.638252474188934e-06, "loss": 0.6827, "step": 17533 }, { "epoch": 0.5373911977442687, "grad_norm": 1.3529774270654802, "learning_rate": 4.637757458813526e-06, "loss": 0.5825, "step": 17534 }, { "epoch": 0.53742184626701, "grad_norm": 1.452299350660416, "learning_rate": 4.637262447007387e-06, "loss": 0.6455, "step": 17535 }, { "epoch": 0.5374524947897511, "grad_norm": 1.2321068969335944, "learning_rate": 4.636767438775392e-06, "loss": 0.6088, "step": 17536 }, { "epoch": 0.5374831433124924, "grad_norm": 1.3198572084752949, "learning_rate": 4.636272434122422e-06, "loss": 0.6936, "step": 17537 }, { "epoch": 0.5375137918352335, "grad_norm": 1.246040362534581, "learning_rate": 4.635777433053353e-06, "loss": 0.5693, "step": 17538 }, { "epoch": 0.5375444403579748, "grad_norm": 1.179395809188452, "learning_rate": 4.63528243557306e-06, "loss": 0.5742, "step": 17539 }, { "epoch": 0.5375750888807159, "grad_norm": 1.1868391130826828, "learning_rate": 4.634787441686425e-06, "loss": 0.6507, "step": 17540 }, { "epoch": 0.5376057374034572, "grad_norm": 1.268792838434009, "learning_rate": 4.634292451398322e-06, "loss": 0.6128, "step": 17541 }, { "epoch": 0.5376363859261983, "grad_norm": 0.45624006011379026, "learning_rate": 4.633797464713628e-06, "loss": 0.3955, "step": 17542 }, { "epoch": 0.5376670344489396, "grad_norm": 1.1648154139472917, "learning_rate": 4.633302481637222e-06, "loss": 0.667, "step": 17543 }, { "epoch": 0.5376976829716807, "grad_norm": 1.4843483992090996, "learning_rate": 4.63280750217398e-06, "loss": 0.7415, "step": 17544 }, { "epoch": 0.537728331494422, "grad_norm": 1.1925108592033784, "learning_rate": 4.6323125263287805e-06, "loss": 0.5993, "step": 17545 }, { "epoch": 0.5377589800171632, "grad_norm": 1.2874432348645026, "learning_rate": 4.631817554106501e-06, "loss": 0.7124, "step": 17546 }, { "epoch": 0.5377896285399044, "grad_norm": 0.4527802429923246, "learning_rate": 4.631322585512014e-06, "loss": 0.4023, "step": 17547 }, { "epoch": 0.5378202770626456, "grad_norm": 1.1974422792185977, "learning_rate": 4.6308276205502026e-06, "loss": 0.5899, "step": 17548 }, { "epoch": 0.5378509255853868, "grad_norm": 1.1142883262351764, "learning_rate": 4.6303326592259405e-06, "loss": 0.4797, "step": 17549 }, { "epoch": 0.537881574108128, "grad_norm": 1.250901712201103, "learning_rate": 4.629837701544104e-06, "loss": 0.5431, "step": 17550 }, { "epoch": 0.5379122226308692, "grad_norm": 1.3441582375502539, "learning_rate": 4.6293427475095725e-06, "loss": 0.6596, "step": 17551 }, { "epoch": 0.5379428711536104, "grad_norm": 1.2583051702393822, "learning_rate": 4.62884779712722e-06, "loss": 0.6614, "step": 17552 }, { "epoch": 0.5379735196763517, "grad_norm": 1.4104021419144295, "learning_rate": 4.628352850401928e-06, "loss": 0.6067, "step": 17553 }, { "epoch": 0.5380041681990928, "grad_norm": 1.2418288068528116, "learning_rate": 4.6278579073385685e-06, "loss": 0.62, "step": 17554 }, { "epoch": 0.538034816721834, "grad_norm": 1.3838059708535841, "learning_rate": 4.627362967942021e-06, "loss": 0.7002, "step": 17555 }, { "epoch": 0.5380654652445752, "grad_norm": 1.1661580327816652, "learning_rate": 4.626868032217161e-06, "loss": 0.5551, "step": 17556 }, { "epoch": 0.5380961137673164, "grad_norm": 1.2349855520779323, "learning_rate": 4.6263731001688676e-06, "loss": 0.6015, "step": 17557 }, { "epoch": 0.5381267622900576, "grad_norm": 1.3519166376205816, "learning_rate": 4.625878171802012e-06, "loss": 0.702, "step": 17558 }, { "epoch": 0.5381574108127988, "grad_norm": 1.3589587005064931, "learning_rate": 4.625383247121479e-06, "loss": 0.7116, "step": 17559 }, { "epoch": 0.53818805933554, "grad_norm": 1.1913725189627464, "learning_rate": 4.624888326132136e-06, "loss": 0.6899, "step": 17560 }, { "epoch": 0.5382187078582812, "grad_norm": 1.297311498604791, "learning_rate": 4.624393408838868e-06, "loss": 0.643, "step": 17561 }, { "epoch": 0.5382493563810224, "grad_norm": 1.3590981382923017, "learning_rate": 4.623898495246547e-06, "loss": 0.6843, "step": 17562 }, { "epoch": 0.5382800049037636, "grad_norm": 0.49708496496989735, "learning_rate": 4.6234035853600486e-06, "loss": 0.4313, "step": 17563 }, { "epoch": 0.5383106534265049, "grad_norm": 1.2291223415806118, "learning_rate": 4.622908679184253e-06, "loss": 0.5943, "step": 17564 }, { "epoch": 0.538341301949246, "grad_norm": 1.516996419159008, "learning_rate": 4.6224137767240344e-06, "loss": 0.5523, "step": 17565 }, { "epoch": 0.5383719504719873, "grad_norm": 1.215150877304875, "learning_rate": 4.621918877984268e-06, "loss": 0.6604, "step": 17566 }, { "epoch": 0.5384025989947284, "grad_norm": 1.287374657944454, "learning_rate": 4.621423982969833e-06, "loss": 0.6417, "step": 17567 }, { "epoch": 0.5384332475174697, "grad_norm": 1.1227252006095183, "learning_rate": 4.620929091685605e-06, "loss": 0.6239, "step": 17568 }, { "epoch": 0.5384638960402108, "grad_norm": 1.3986513035740273, "learning_rate": 4.620434204136457e-06, "loss": 0.6719, "step": 17569 }, { "epoch": 0.5384945445629521, "grad_norm": 1.2955075192359833, "learning_rate": 4.619939320327271e-06, "loss": 0.695, "step": 17570 }, { "epoch": 0.5385251930856932, "grad_norm": 1.4391415501489386, "learning_rate": 4.619444440262918e-06, "loss": 0.6743, "step": 17571 }, { "epoch": 0.5385558416084345, "grad_norm": 1.2687148174156018, "learning_rate": 4.618949563948277e-06, "loss": 0.7453, "step": 17572 }, { "epoch": 0.5385864901311757, "grad_norm": 0.4519476092014881, "learning_rate": 4.618454691388224e-06, "loss": 0.3994, "step": 17573 }, { "epoch": 0.5386171386539169, "grad_norm": 1.34718016540776, "learning_rate": 4.617959822587633e-06, "loss": 0.6917, "step": 17574 }, { "epoch": 0.5386477871766581, "grad_norm": 0.4647345400133991, "learning_rate": 4.617464957551383e-06, "loss": 0.4074, "step": 17575 }, { "epoch": 0.5386784356993993, "grad_norm": 1.378035169256132, "learning_rate": 4.616970096284348e-06, "loss": 0.6714, "step": 17576 }, { "epoch": 0.5387090842221405, "grad_norm": 1.336343429335418, "learning_rate": 4.616475238791405e-06, "loss": 0.6648, "step": 17577 }, { "epoch": 0.5387397327448817, "grad_norm": 1.4676249249471796, "learning_rate": 4.61598038507743e-06, "loss": 0.7123, "step": 17578 }, { "epoch": 0.5387703812676229, "grad_norm": 1.2835656293918074, "learning_rate": 4.615485535147296e-06, "loss": 0.6441, "step": 17579 }, { "epoch": 0.5388010297903641, "grad_norm": 1.2349593369541543, "learning_rate": 4.6149906890058855e-06, "loss": 0.5949, "step": 17580 }, { "epoch": 0.5388316783131053, "grad_norm": 0.4421747160717666, "learning_rate": 4.614495846658068e-06, "loss": 0.3991, "step": 17581 }, { "epoch": 0.5388623268358466, "grad_norm": 0.45814189126239196, "learning_rate": 4.614001008108721e-06, "loss": 0.4178, "step": 17582 }, { "epoch": 0.5388929753585877, "grad_norm": 1.214163699201521, "learning_rate": 4.613506173362722e-06, "loss": 0.6705, "step": 17583 }, { "epoch": 0.538923623881329, "grad_norm": 1.2033151689565256, "learning_rate": 4.613011342424945e-06, "loss": 0.5917, "step": 17584 }, { "epoch": 0.5389542724040701, "grad_norm": 1.2088443776378652, "learning_rate": 4.612516515300266e-06, "loss": 0.6853, "step": 17585 }, { "epoch": 0.5389849209268113, "grad_norm": 1.2066522582074168, "learning_rate": 4.612021691993561e-06, "loss": 0.6597, "step": 17586 }, { "epoch": 0.5390155694495525, "grad_norm": 1.2515705160366946, "learning_rate": 4.611526872509705e-06, "loss": 0.6744, "step": 17587 }, { "epoch": 0.5390462179722937, "grad_norm": 1.2704776925369634, "learning_rate": 4.611032056853575e-06, "loss": 0.684, "step": 17588 }, { "epoch": 0.539076866495035, "grad_norm": 0.4372366288673427, "learning_rate": 4.610537245030048e-06, "loss": 0.3896, "step": 17589 }, { "epoch": 0.5391075150177761, "grad_norm": 1.4280106425179013, "learning_rate": 4.610042437043993e-06, "loss": 0.6608, "step": 17590 }, { "epoch": 0.5391381635405174, "grad_norm": 0.4520744567474566, "learning_rate": 4.609547632900292e-06, "loss": 0.4126, "step": 17591 }, { "epoch": 0.5391688120632585, "grad_norm": 1.4739742123352264, "learning_rate": 4.609052832603818e-06, "loss": 0.7058, "step": 17592 }, { "epoch": 0.5391994605859998, "grad_norm": 1.1247438172719324, "learning_rate": 4.608558036159445e-06, "loss": 0.5847, "step": 17593 }, { "epoch": 0.5392301091087409, "grad_norm": 1.4545648982263661, "learning_rate": 4.608063243572051e-06, "loss": 0.6976, "step": 17594 }, { "epoch": 0.5392607576314822, "grad_norm": 1.1168550321651234, "learning_rate": 4.60756845484651e-06, "loss": 0.6209, "step": 17595 }, { "epoch": 0.5392914061542233, "grad_norm": 1.3269281330884828, "learning_rate": 4.607073669987698e-06, "loss": 0.6739, "step": 17596 }, { "epoch": 0.5393220546769646, "grad_norm": 0.44038737176457, "learning_rate": 4.606578889000489e-06, "loss": 0.3989, "step": 17597 }, { "epoch": 0.5393527031997057, "grad_norm": 1.2433950173821269, "learning_rate": 4.606084111889758e-06, "loss": 0.6133, "step": 17598 }, { "epoch": 0.539383351722447, "grad_norm": 1.1651529720802571, "learning_rate": 4.605589338660382e-06, "loss": 0.5195, "step": 17599 }, { "epoch": 0.5394140002451882, "grad_norm": 1.2086155381368877, "learning_rate": 4.605094569317236e-06, "loss": 0.5667, "step": 17600 }, { "epoch": 0.5394446487679294, "grad_norm": 1.3771597869024697, "learning_rate": 4.604599803865192e-06, "loss": 0.6057, "step": 17601 }, { "epoch": 0.5394752972906706, "grad_norm": 1.2533347579694405, "learning_rate": 4.60410504230913e-06, "loss": 0.6253, "step": 17602 }, { "epoch": 0.5395059458134118, "grad_norm": 0.478032495382939, "learning_rate": 4.603610284653921e-06, "loss": 0.4233, "step": 17603 }, { "epoch": 0.539536594336153, "grad_norm": 1.4813029007894603, "learning_rate": 4.603115530904441e-06, "loss": 0.6586, "step": 17604 }, { "epoch": 0.5395672428588942, "grad_norm": 1.2803071221704412, "learning_rate": 4.602620781065565e-06, "loss": 0.6792, "step": 17605 }, { "epoch": 0.5395978913816354, "grad_norm": 0.4513772191016302, "learning_rate": 4.602126035142168e-06, "loss": 0.4088, "step": 17606 }, { "epoch": 0.5396285399043766, "grad_norm": 1.3947704504807847, "learning_rate": 4.601631293139126e-06, "loss": 0.6773, "step": 17607 }, { "epoch": 0.5396591884271178, "grad_norm": 1.2270611791001103, "learning_rate": 4.601136555061312e-06, "loss": 0.6304, "step": 17608 }, { "epoch": 0.5396898369498591, "grad_norm": 1.3243947142655963, "learning_rate": 4.600641820913601e-06, "loss": 0.6223, "step": 17609 }, { "epoch": 0.5397204854726002, "grad_norm": 1.1831654900361577, "learning_rate": 4.60014709070087e-06, "loss": 0.6523, "step": 17610 }, { "epoch": 0.5397511339953415, "grad_norm": 1.0956470917794203, "learning_rate": 4.599652364427993e-06, "loss": 0.5268, "step": 17611 }, { "epoch": 0.5397817825180826, "grad_norm": 1.224076506622304, "learning_rate": 4.59915764209984e-06, "loss": 0.5931, "step": 17612 }, { "epoch": 0.5398124310408239, "grad_norm": 1.2904509218363067, "learning_rate": 4.598662923721293e-06, "loss": 0.5863, "step": 17613 }, { "epoch": 0.539843079563565, "grad_norm": 1.1546650154891964, "learning_rate": 4.598168209297218e-06, "loss": 0.5709, "step": 17614 }, { "epoch": 0.5398737280863063, "grad_norm": 1.3912693964058742, "learning_rate": 4.5976734988325e-06, "loss": 0.6486, "step": 17615 }, { "epoch": 0.5399043766090474, "grad_norm": 1.3057544537763346, "learning_rate": 4.5971787923320055e-06, "loss": 0.6786, "step": 17616 }, { "epoch": 0.5399350251317886, "grad_norm": 1.1923642708628963, "learning_rate": 4.59668408980061e-06, "loss": 0.7077, "step": 17617 }, { "epoch": 0.5399656736545299, "grad_norm": 1.2435402563413729, "learning_rate": 4.596189391243192e-06, "loss": 0.6109, "step": 17618 }, { "epoch": 0.539996322177271, "grad_norm": 1.3299384306104076, "learning_rate": 4.595694696664622e-06, "loss": 0.6237, "step": 17619 }, { "epoch": 0.5400269707000123, "grad_norm": 1.1090115135706289, "learning_rate": 4.595200006069775e-06, "loss": 0.7215, "step": 17620 }, { "epoch": 0.5400576192227534, "grad_norm": 1.1526796551691458, "learning_rate": 4.5947053194635275e-06, "loss": 0.637, "step": 17621 }, { "epoch": 0.5400882677454947, "grad_norm": 0.48856672631703896, "learning_rate": 4.594210636850752e-06, "loss": 0.3983, "step": 17622 }, { "epoch": 0.5401189162682358, "grad_norm": 0.469202689867047, "learning_rate": 4.593715958236322e-06, "loss": 0.4109, "step": 17623 }, { "epoch": 0.5401495647909771, "grad_norm": 1.4566901337280191, "learning_rate": 4.593221283625113e-06, "loss": 0.7547, "step": 17624 }, { "epoch": 0.5401802133137182, "grad_norm": 1.3023185065195761, "learning_rate": 4.592726613021997e-06, "loss": 0.6246, "step": 17625 }, { "epoch": 0.5402108618364595, "grad_norm": 1.3756116802742013, "learning_rate": 4.5922319464318524e-06, "loss": 0.7067, "step": 17626 }, { "epoch": 0.5402415103592006, "grad_norm": 1.3415335118144192, "learning_rate": 4.59173728385955e-06, "loss": 0.6709, "step": 17627 }, { "epoch": 0.5402721588819419, "grad_norm": 1.2035754408658517, "learning_rate": 4.591242625309963e-06, "loss": 0.6512, "step": 17628 }, { "epoch": 0.5403028074046831, "grad_norm": 0.45465051652426425, "learning_rate": 4.5907479707879694e-06, "loss": 0.4042, "step": 17629 }, { "epoch": 0.5403334559274243, "grad_norm": 1.2540272442685485, "learning_rate": 4.59025332029844e-06, "loss": 0.6554, "step": 17630 }, { "epoch": 0.5403641044501655, "grad_norm": 0.4683896246460707, "learning_rate": 4.589758673846249e-06, "loss": 0.4231, "step": 17631 }, { "epoch": 0.5403947529729067, "grad_norm": 1.2511229576429668, "learning_rate": 4.589264031436272e-06, "loss": 0.6325, "step": 17632 }, { "epoch": 0.5404254014956479, "grad_norm": 1.5120027134425493, "learning_rate": 4.588769393073379e-06, "loss": 0.6151, "step": 17633 }, { "epoch": 0.5404560500183891, "grad_norm": 0.44435403919776884, "learning_rate": 4.588274758762449e-06, "loss": 0.3948, "step": 17634 }, { "epoch": 0.5404866985411303, "grad_norm": 1.0927506248768937, "learning_rate": 4.587780128508352e-06, "loss": 0.5957, "step": 17635 }, { "epoch": 0.5405173470638716, "grad_norm": 1.2586537958744077, "learning_rate": 4.587285502315963e-06, "loss": 0.6165, "step": 17636 }, { "epoch": 0.5405479955866127, "grad_norm": 1.133199456636975, "learning_rate": 4.586790880190155e-06, "loss": 0.5859, "step": 17637 }, { "epoch": 0.540578644109354, "grad_norm": 1.1686084127899987, "learning_rate": 4.586296262135804e-06, "loss": 0.5645, "step": 17638 }, { "epoch": 0.5406092926320951, "grad_norm": 1.2841830405500885, "learning_rate": 4.58580164815778e-06, "loss": 0.6794, "step": 17639 }, { "epoch": 0.5406399411548364, "grad_norm": 0.45539832631819954, "learning_rate": 4.5853070382609584e-06, "loss": 0.4234, "step": 17640 }, { "epoch": 0.5406705896775775, "grad_norm": 1.1539208623492252, "learning_rate": 4.584812432450212e-06, "loss": 0.6462, "step": 17641 }, { "epoch": 0.5407012382003188, "grad_norm": 1.187849726895834, "learning_rate": 4.584317830730417e-06, "loss": 0.6063, "step": 17642 }, { "epoch": 0.5407318867230599, "grad_norm": 1.4015848907533872, "learning_rate": 4.583823233106445e-06, "loss": 0.6845, "step": 17643 }, { "epoch": 0.5407625352458012, "grad_norm": 1.1972464105403595, "learning_rate": 4.583328639583166e-06, "loss": 0.5732, "step": 17644 }, { "epoch": 0.5407931837685424, "grad_norm": 1.360277598135018, "learning_rate": 4.58283405016546e-06, "loss": 0.6673, "step": 17645 }, { "epoch": 0.5408238322912836, "grad_norm": 1.230142976473805, "learning_rate": 4.582339464858195e-06, "loss": 0.5601, "step": 17646 }, { "epoch": 0.5408544808140248, "grad_norm": 1.1787431829240251, "learning_rate": 4.581844883666246e-06, "loss": 0.6035, "step": 17647 }, { "epoch": 0.5408851293367659, "grad_norm": 1.1430051190340742, "learning_rate": 4.581350306594487e-06, "loss": 0.6103, "step": 17648 }, { "epoch": 0.5409157778595072, "grad_norm": 1.2103206090894978, "learning_rate": 4.580855733647791e-06, "loss": 0.6181, "step": 17649 }, { "epoch": 0.5409464263822483, "grad_norm": 1.3130451526012348, "learning_rate": 4.5803611648310295e-06, "loss": 0.6841, "step": 17650 }, { "epoch": 0.5409770749049896, "grad_norm": 1.205225037859385, "learning_rate": 4.579866600149077e-06, "loss": 0.6408, "step": 17651 }, { "epoch": 0.5410077234277307, "grad_norm": 1.4012649948067089, "learning_rate": 4.579372039606806e-06, "loss": 0.6978, "step": 17652 }, { "epoch": 0.541038371950472, "grad_norm": 1.2429663201915184, "learning_rate": 4.578877483209091e-06, "loss": 0.5886, "step": 17653 }, { "epoch": 0.5410690204732131, "grad_norm": 1.3303395593350158, "learning_rate": 4.578382930960805e-06, "loss": 0.6117, "step": 17654 }, { "epoch": 0.5410996689959544, "grad_norm": 1.4365399264826644, "learning_rate": 4.5778883828668165e-06, "loss": 0.7779, "step": 17655 }, { "epoch": 0.5411303175186956, "grad_norm": 1.3677248018993116, "learning_rate": 4.577393838932006e-06, "loss": 0.7285, "step": 17656 }, { "epoch": 0.5411609660414368, "grad_norm": 1.484759666990474, "learning_rate": 4.576899299161239e-06, "loss": 0.5718, "step": 17657 }, { "epoch": 0.541191614564178, "grad_norm": 1.2922385643060676, "learning_rate": 4.576404763559392e-06, "loss": 0.6573, "step": 17658 }, { "epoch": 0.5412222630869192, "grad_norm": 1.2984002773258885, "learning_rate": 4.575910232131338e-06, "loss": 0.5823, "step": 17659 }, { "epoch": 0.5412529116096604, "grad_norm": 1.3069648393120008, "learning_rate": 4.575415704881947e-06, "loss": 0.562, "step": 17660 }, { "epoch": 0.5412835601324016, "grad_norm": 1.1960905481344843, "learning_rate": 4.5749211818160964e-06, "loss": 0.5452, "step": 17661 }, { "epoch": 0.5413142086551428, "grad_norm": 0.5091707579774253, "learning_rate": 4.574426662938655e-06, "loss": 0.4148, "step": 17662 }, { "epoch": 0.541344857177884, "grad_norm": 0.4915550207710418, "learning_rate": 4.573932148254496e-06, "loss": 0.4146, "step": 17663 }, { "epoch": 0.5413755057006252, "grad_norm": 1.416675258760297, "learning_rate": 4.573437637768493e-06, "loss": 0.5993, "step": 17664 }, { "epoch": 0.5414061542233665, "grad_norm": 1.1997639837054612, "learning_rate": 4.57294313148552e-06, "loss": 0.5654, "step": 17665 }, { "epoch": 0.5414368027461076, "grad_norm": 0.4381040666271135, "learning_rate": 4.572448629410444e-06, "loss": 0.4048, "step": 17666 }, { "epoch": 0.5414674512688489, "grad_norm": 1.334581410475643, "learning_rate": 4.571954131548144e-06, "loss": 0.6247, "step": 17667 }, { "epoch": 0.54149809979159, "grad_norm": 1.3415361357669453, "learning_rate": 4.571459637903489e-06, "loss": 0.5943, "step": 17668 }, { "epoch": 0.5415287483143313, "grad_norm": 0.4545505226604671, "learning_rate": 4.57096514848135e-06, "loss": 0.406, "step": 17669 }, { "epoch": 0.5415593968370724, "grad_norm": 1.1191858855871504, "learning_rate": 4.570470663286603e-06, "loss": 0.588, "step": 17670 }, { "epoch": 0.5415900453598137, "grad_norm": 1.2526366414448882, "learning_rate": 4.569976182324116e-06, "loss": 0.6622, "step": 17671 }, { "epoch": 0.5416206938825548, "grad_norm": 1.2611686053758209, "learning_rate": 4.569481705598766e-06, "loss": 0.667, "step": 17672 }, { "epoch": 0.5416513424052961, "grad_norm": 1.2808552661971033, "learning_rate": 4.568987233115423e-06, "loss": 0.6604, "step": 17673 }, { "epoch": 0.5416819909280373, "grad_norm": 0.4666313351391256, "learning_rate": 4.568492764878958e-06, "loss": 0.4141, "step": 17674 }, { "epoch": 0.5417126394507785, "grad_norm": 1.2035196879417587, "learning_rate": 4.567998300894245e-06, "loss": 0.5456, "step": 17675 }, { "epoch": 0.5417432879735197, "grad_norm": 1.1634117591029585, "learning_rate": 4.567503841166155e-06, "loss": 0.5945, "step": 17676 }, { "epoch": 0.5417739364962609, "grad_norm": 0.4781273130158733, "learning_rate": 4.56700938569956e-06, "loss": 0.4307, "step": 17677 }, { "epoch": 0.5418045850190021, "grad_norm": 1.356448716603784, "learning_rate": 4.566514934499333e-06, "loss": 0.6482, "step": 17678 }, { "epoch": 0.5418352335417432, "grad_norm": 0.444227102302666, "learning_rate": 4.566020487570344e-06, "loss": 0.4186, "step": 17679 }, { "epoch": 0.5418658820644845, "grad_norm": 0.470472165358254, "learning_rate": 4.565526044917467e-06, "loss": 0.4208, "step": 17680 }, { "epoch": 0.5418965305872256, "grad_norm": 1.1526793557652646, "learning_rate": 4.565031606545574e-06, "loss": 0.6029, "step": 17681 }, { "epoch": 0.5419271791099669, "grad_norm": 1.398948053933204, "learning_rate": 4.564537172459533e-06, "loss": 0.6493, "step": 17682 }, { "epoch": 0.5419578276327081, "grad_norm": 1.3576855611615652, "learning_rate": 4.564042742664221e-06, "loss": 0.6015, "step": 17683 }, { "epoch": 0.5419884761554493, "grad_norm": 1.237822911993113, "learning_rate": 4.563548317164509e-06, "loss": 0.6301, "step": 17684 }, { "epoch": 0.5420191246781905, "grad_norm": 0.43491215751693246, "learning_rate": 4.563053895965263e-06, "loss": 0.4071, "step": 17685 }, { "epoch": 0.5420497732009317, "grad_norm": 0.45303687873668586, "learning_rate": 4.562559479071362e-06, "loss": 0.4056, "step": 17686 }, { "epoch": 0.5420804217236729, "grad_norm": 1.3160020365088096, "learning_rate": 4.562065066487672e-06, "loss": 0.6436, "step": 17687 }, { "epoch": 0.5421110702464141, "grad_norm": 1.2977936598635884, "learning_rate": 4.561570658219069e-06, "loss": 0.5692, "step": 17688 }, { "epoch": 0.5421417187691553, "grad_norm": 1.3100795589305976, "learning_rate": 4.561076254270422e-06, "loss": 0.7316, "step": 17689 }, { "epoch": 0.5421723672918966, "grad_norm": 1.2118472415408172, "learning_rate": 4.560581854646602e-06, "loss": 0.7103, "step": 17690 }, { "epoch": 0.5422030158146377, "grad_norm": 0.45860399351140946, "learning_rate": 4.560087459352482e-06, "loss": 0.4092, "step": 17691 }, { "epoch": 0.542233664337379, "grad_norm": 1.183091274238156, "learning_rate": 4.559593068392933e-06, "loss": 0.7355, "step": 17692 }, { "epoch": 0.5422643128601201, "grad_norm": 0.4436957119861174, "learning_rate": 4.559098681772826e-06, "loss": 0.4241, "step": 17693 }, { "epoch": 0.5422949613828614, "grad_norm": 1.276976619498005, "learning_rate": 4.558604299497032e-06, "loss": 0.652, "step": 17694 }, { "epoch": 0.5423256099056025, "grad_norm": 1.0809352115611743, "learning_rate": 4.558109921570425e-06, "loss": 0.5811, "step": 17695 }, { "epoch": 0.5423562584283438, "grad_norm": 1.2005008862600974, "learning_rate": 4.55761554799787e-06, "loss": 0.6252, "step": 17696 }, { "epoch": 0.5423869069510849, "grad_norm": 1.100154087242885, "learning_rate": 4.557121178784246e-06, "loss": 0.6076, "step": 17697 }, { "epoch": 0.5424175554738262, "grad_norm": 1.4104039719622545, "learning_rate": 4.556626813934417e-06, "loss": 0.754, "step": 17698 }, { "epoch": 0.5424482039965673, "grad_norm": 1.4620524261639085, "learning_rate": 4.55613245345326e-06, "loss": 0.6634, "step": 17699 }, { "epoch": 0.5424788525193086, "grad_norm": 1.4598409735031597, "learning_rate": 4.555638097345644e-06, "loss": 0.8014, "step": 17700 }, { "epoch": 0.5425095010420498, "grad_norm": 1.1324044385326313, "learning_rate": 4.555143745616437e-06, "loss": 0.5753, "step": 17701 }, { "epoch": 0.542540149564791, "grad_norm": 1.1392458638205247, "learning_rate": 4.554649398270515e-06, "loss": 0.7085, "step": 17702 }, { "epoch": 0.5425707980875322, "grad_norm": 1.1791512043750318, "learning_rate": 4.5541550553127445e-06, "loss": 0.6326, "step": 17703 }, { "epoch": 0.5426014466102734, "grad_norm": 0.48253567054300756, "learning_rate": 4.553660716747998e-06, "loss": 0.4081, "step": 17704 }, { "epoch": 0.5426320951330146, "grad_norm": 1.2534358856473515, "learning_rate": 4.5531663825811486e-06, "loss": 0.6965, "step": 17705 }, { "epoch": 0.5426627436557558, "grad_norm": 1.222386059404931, "learning_rate": 4.5526720528170635e-06, "loss": 0.646, "step": 17706 }, { "epoch": 0.542693392178497, "grad_norm": 0.45318606514834137, "learning_rate": 4.552177727460616e-06, "loss": 0.4188, "step": 17707 }, { "epoch": 0.5427240407012383, "grad_norm": 1.31177045922093, "learning_rate": 4.551683406516677e-06, "loss": 0.6846, "step": 17708 }, { "epoch": 0.5427546892239794, "grad_norm": 1.2769869010464594, "learning_rate": 4.551189089990113e-06, "loss": 0.6251, "step": 17709 }, { "epoch": 0.5427853377467206, "grad_norm": 1.2583471744081753, "learning_rate": 4.550694777885801e-06, "loss": 0.5838, "step": 17710 }, { "epoch": 0.5428159862694618, "grad_norm": 0.4477001850715939, "learning_rate": 4.5502004702086076e-06, "loss": 0.4331, "step": 17711 }, { "epoch": 0.542846634792203, "grad_norm": 1.2719571437778459, "learning_rate": 4.549706166963402e-06, "loss": 0.647, "step": 17712 }, { "epoch": 0.5428772833149442, "grad_norm": 1.1369659405758106, "learning_rate": 4.549211868155059e-06, "loss": 0.6499, "step": 17713 }, { "epoch": 0.5429079318376854, "grad_norm": 1.3623020254404663, "learning_rate": 4.548717573788445e-06, "loss": 0.5888, "step": 17714 }, { "epoch": 0.5429385803604266, "grad_norm": 1.2369437433844173, "learning_rate": 4.548223283868433e-06, "loss": 0.6537, "step": 17715 }, { "epoch": 0.5429692288831678, "grad_norm": 1.1764155839151575, "learning_rate": 4.547728998399894e-06, "loss": 0.6728, "step": 17716 }, { "epoch": 0.542999877405909, "grad_norm": 1.196191321499815, "learning_rate": 4.547234717387694e-06, "loss": 0.5575, "step": 17717 }, { "epoch": 0.5430305259286502, "grad_norm": 1.2324305513988292, "learning_rate": 4.546740440836709e-06, "loss": 0.622, "step": 17718 }, { "epoch": 0.5430611744513915, "grad_norm": 0.4578328125725578, "learning_rate": 4.546246168751806e-06, "loss": 0.4157, "step": 17719 }, { "epoch": 0.5430918229741326, "grad_norm": 0.45696673976200963, "learning_rate": 4.545751901137853e-06, "loss": 0.4209, "step": 17720 }, { "epoch": 0.5431224714968739, "grad_norm": 1.2843758533361975, "learning_rate": 4.545257637999726e-06, "loss": 0.5813, "step": 17721 }, { "epoch": 0.543153120019615, "grad_norm": 1.439777453382194, "learning_rate": 4.544763379342291e-06, "loss": 0.59, "step": 17722 }, { "epoch": 0.5431837685423563, "grad_norm": 1.3342070842783953, "learning_rate": 4.544269125170417e-06, "loss": 0.6295, "step": 17723 }, { "epoch": 0.5432144170650974, "grad_norm": 1.4046122567861257, "learning_rate": 4.543774875488978e-06, "loss": 0.5704, "step": 17724 }, { "epoch": 0.5432450655878387, "grad_norm": 1.3391359072014553, "learning_rate": 4.54328063030284e-06, "loss": 0.7102, "step": 17725 }, { "epoch": 0.5432757141105798, "grad_norm": 1.2183055143320272, "learning_rate": 4.542786389616876e-06, "loss": 0.62, "step": 17726 }, { "epoch": 0.5433063626333211, "grad_norm": 1.4542971445426083, "learning_rate": 4.542292153435956e-06, "loss": 0.5983, "step": 17727 }, { "epoch": 0.5433370111560623, "grad_norm": 1.1597382161363001, "learning_rate": 4.541797921764945e-06, "loss": 0.6732, "step": 17728 }, { "epoch": 0.5433676596788035, "grad_norm": 1.5613074348183886, "learning_rate": 4.5413036946087195e-06, "loss": 0.7392, "step": 17729 }, { "epoch": 0.5433983082015447, "grad_norm": 0.4671825023014878, "learning_rate": 4.540809471972146e-06, "loss": 0.4103, "step": 17730 }, { "epoch": 0.5434289567242859, "grad_norm": 1.3058352553596053, "learning_rate": 4.540315253860092e-06, "loss": 0.6594, "step": 17731 }, { "epoch": 0.5434596052470271, "grad_norm": 1.2301807652987813, "learning_rate": 4.5398210402774315e-06, "loss": 0.6401, "step": 17732 }, { "epoch": 0.5434902537697683, "grad_norm": 0.4454883206403993, "learning_rate": 4.5393268312290304e-06, "loss": 0.3723, "step": 17733 }, { "epoch": 0.5435209022925095, "grad_norm": 1.3681633655840686, "learning_rate": 4.53883262671976e-06, "loss": 0.7879, "step": 17734 }, { "epoch": 0.5435515508152508, "grad_norm": 1.4275523008964845, "learning_rate": 4.538338426754491e-06, "loss": 0.7758, "step": 17735 }, { "epoch": 0.5435821993379919, "grad_norm": 1.3075651395011123, "learning_rate": 4.537844231338091e-06, "loss": 0.5524, "step": 17736 }, { "epoch": 0.5436128478607332, "grad_norm": 0.45676720724604425, "learning_rate": 4.53735004047543e-06, "loss": 0.4005, "step": 17737 }, { "epoch": 0.5436434963834743, "grad_norm": 1.4309238244036453, "learning_rate": 4.536855854171378e-06, "loss": 0.7775, "step": 17738 }, { "epoch": 0.5436741449062156, "grad_norm": 1.371893854978407, "learning_rate": 4.536361672430802e-06, "loss": 0.6517, "step": 17739 }, { "epoch": 0.5437047934289567, "grad_norm": 1.1700478543910013, "learning_rate": 4.535867495258576e-06, "loss": 0.6406, "step": 17740 }, { "epoch": 0.5437354419516979, "grad_norm": 1.2655683207575146, "learning_rate": 4.535373322659563e-06, "loss": 0.6324, "step": 17741 }, { "epoch": 0.5437660904744391, "grad_norm": 1.2702593973827354, "learning_rate": 4.534879154638637e-06, "loss": 0.6177, "step": 17742 }, { "epoch": 0.5437967389971803, "grad_norm": 1.2553678261193617, "learning_rate": 4.534384991200667e-06, "loss": 0.676, "step": 17743 }, { "epoch": 0.5438273875199215, "grad_norm": 1.2663864238981244, "learning_rate": 4.533890832350519e-06, "loss": 0.6954, "step": 17744 }, { "epoch": 0.5438580360426627, "grad_norm": 1.2239779937464013, "learning_rate": 4.533396678093065e-06, "loss": 0.7472, "step": 17745 }, { "epoch": 0.543888684565404, "grad_norm": 1.3436631640906345, "learning_rate": 4.532902528433173e-06, "loss": 0.6397, "step": 17746 }, { "epoch": 0.5439193330881451, "grad_norm": 1.3428079038136784, "learning_rate": 4.53240838337571e-06, "loss": 0.7436, "step": 17747 }, { "epoch": 0.5439499816108864, "grad_norm": 1.4218198459274247, "learning_rate": 4.531914242925548e-06, "loss": 0.6553, "step": 17748 }, { "epoch": 0.5439806301336275, "grad_norm": 1.1709078809667794, "learning_rate": 4.531420107087557e-06, "loss": 0.6348, "step": 17749 }, { "epoch": 0.5440112786563688, "grad_norm": 1.4319997073005775, "learning_rate": 4.530925975866599e-06, "loss": 0.6377, "step": 17750 }, { "epoch": 0.5440419271791099, "grad_norm": 1.4010248326323727, "learning_rate": 4.530431849267552e-06, "loss": 0.5858, "step": 17751 }, { "epoch": 0.5440725757018512, "grad_norm": 0.4612149439900409, "learning_rate": 4.529937727295276e-06, "loss": 0.4083, "step": 17752 }, { "epoch": 0.5441032242245923, "grad_norm": 0.45513181564347277, "learning_rate": 4.529443609954647e-06, "loss": 0.3998, "step": 17753 }, { "epoch": 0.5441338727473336, "grad_norm": 1.16308800445084, "learning_rate": 4.52894949725053e-06, "loss": 0.6571, "step": 17754 }, { "epoch": 0.5441645212700748, "grad_norm": 1.337718177313423, "learning_rate": 4.528455389187792e-06, "loss": 0.6742, "step": 17755 }, { "epoch": 0.544195169792816, "grad_norm": 1.2454101524192278, "learning_rate": 4.527961285771306e-06, "loss": 0.6325, "step": 17756 }, { "epoch": 0.5442258183155572, "grad_norm": 1.2521691419652015, "learning_rate": 4.527467187005938e-06, "loss": 0.6055, "step": 17757 }, { "epoch": 0.5442564668382984, "grad_norm": 1.4631631731703332, "learning_rate": 4.526973092896556e-06, "loss": 0.5977, "step": 17758 }, { "epoch": 0.5442871153610396, "grad_norm": 1.4156380634995518, "learning_rate": 4.526479003448029e-06, "loss": 0.6926, "step": 17759 }, { "epoch": 0.5443177638837808, "grad_norm": 1.2337601415652852, "learning_rate": 4.525984918665225e-06, "loss": 0.6161, "step": 17760 }, { "epoch": 0.544348412406522, "grad_norm": 2.377452667826423, "learning_rate": 4.5254908385530135e-06, "loss": 0.624, "step": 17761 }, { "epoch": 0.5443790609292632, "grad_norm": 1.16703945827775, "learning_rate": 4.524996763116265e-06, "loss": 0.6168, "step": 17762 }, { "epoch": 0.5444097094520044, "grad_norm": 1.2957862668401798, "learning_rate": 4.5245026923598396e-06, "loss": 0.6461, "step": 17763 }, { "epoch": 0.5444403579747457, "grad_norm": 1.307797930976788, "learning_rate": 4.5240086262886154e-06, "loss": 0.6515, "step": 17764 }, { "epoch": 0.5444710064974868, "grad_norm": 1.31663333874331, "learning_rate": 4.523514564907454e-06, "loss": 0.6251, "step": 17765 }, { "epoch": 0.5445016550202281, "grad_norm": 1.1777852309943462, "learning_rate": 4.523020508221225e-06, "loss": 0.7102, "step": 17766 }, { "epoch": 0.5445323035429692, "grad_norm": 1.2031117258903465, "learning_rate": 4.522526456234797e-06, "loss": 0.619, "step": 17767 }, { "epoch": 0.5445629520657105, "grad_norm": 1.1487833206053442, "learning_rate": 4.522032408953039e-06, "loss": 0.5664, "step": 17768 }, { "epoch": 0.5445936005884516, "grad_norm": 1.4010370115790374, "learning_rate": 4.521538366380818e-06, "loss": 0.7219, "step": 17769 }, { "epoch": 0.5446242491111929, "grad_norm": 1.2545803855776223, "learning_rate": 4.521044328523001e-06, "loss": 0.6477, "step": 17770 }, { "epoch": 0.544654897633934, "grad_norm": 1.2598225223371327, "learning_rate": 4.520550295384457e-06, "loss": 0.692, "step": 17771 }, { "epoch": 0.5446855461566752, "grad_norm": 1.1097231413646003, "learning_rate": 4.520056266970055e-06, "loss": 0.703, "step": 17772 }, { "epoch": 0.5447161946794165, "grad_norm": 1.3343385400694372, "learning_rate": 4.51956224328466e-06, "loss": 0.6633, "step": 17773 }, { "epoch": 0.5447468432021576, "grad_norm": 1.2165338464059308, "learning_rate": 4.519068224333141e-06, "loss": 0.6774, "step": 17774 }, { "epoch": 0.5447774917248989, "grad_norm": 0.4894552313880652, "learning_rate": 4.518574210120366e-06, "loss": 0.4255, "step": 17775 }, { "epoch": 0.54480814024764, "grad_norm": 1.425128733462551, "learning_rate": 4.518080200651203e-06, "loss": 0.6832, "step": 17776 }, { "epoch": 0.5448387887703813, "grad_norm": 1.2310888333648697, "learning_rate": 4.517586195930519e-06, "loss": 0.5635, "step": 17777 }, { "epoch": 0.5448694372931224, "grad_norm": 1.2078303822820935, "learning_rate": 4.517092195963182e-06, "loss": 0.5477, "step": 17778 }, { "epoch": 0.5449000858158637, "grad_norm": 1.2766510722259363, "learning_rate": 4.516598200754057e-06, "loss": 0.6499, "step": 17779 }, { "epoch": 0.5449307343386048, "grad_norm": 1.2227066921413023, "learning_rate": 4.5161042103080156e-06, "loss": 0.7179, "step": 17780 }, { "epoch": 0.5449613828613461, "grad_norm": 1.3479801824882882, "learning_rate": 4.515610224629924e-06, "loss": 0.6368, "step": 17781 }, { "epoch": 0.5449920313840872, "grad_norm": 1.218931479502056, "learning_rate": 4.515116243724646e-06, "loss": 0.6213, "step": 17782 }, { "epoch": 0.5450226799068285, "grad_norm": 0.46461173446414356, "learning_rate": 4.514622267597054e-06, "loss": 0.4239, "step": 17783 }, { "epoch": 0.5450533284295697, "grad_norm": 1.2408171725733115, "learning_rate": 4.514128296252013e-06, "loss": 0.6187, "step": 17784 }, { "epoch": 0.5450839769523109, "grad_norm": 1.3776934134459642, "learning_rate": 4.513634329694389e-06, "loss": 0.6976, "step": 17785 }, { "epoch": 0.5451146254750521, "grad_norm": 1.1842248471380563, "learning_rate": 4.513140367929052e-06, "loss": 0.5733, "step": 17786 }, { "epoch": 0.5451452739977933, "grad_norm": 1.1337899465693608, "learning_rate": 4.512646410960865e-06, "loss": 0.6405, "step": 17787 }, { "epoch": 0.5451759225205345, "grad_norm": 1.1126981440020693, "learning_rate": 4.5121524587947e-06, "loss": 0.5655, "step": 17788 }, { "epoch": 0.5452065710432757, "grad_norm": 1.254270945369073, "learning_rate": 4.51165851143542e-06, "loss": 0.6576, "step": 17789 }, { "epoch": 0.5452372195660169, "grad_norm": 0.5005430972352348, "learning_rate": 4.511164568887894e-06, "loss": 0.3965, "step": 17790 }, { "epoch": 0.5452678680887582, "grad_norm": 1.2918429612599007, "learning_rate": 4.510670631156989e-06, "loss": 0.5488, "step": 17791 }, { "epoch": 0.5452985166114993, "grad_norm": 1.3126828034439768, "learning_rate": 4.510176698247573e-06, "loss": 0.7063, "step": 17792 }, { "epoch": 0.5453291651342406, "grad_norm": 1.3945687831929352, "learning_rate": 4.509682770164508e-06, "loss": 0.671, "step": 17793 }, { "epoch": 0.5453598136569817, "grad_norm": 1.269384637467288, "learning_rate": 4.509188846912667e-06, "loss": 0.6063, "step": 17794 }, { "epoch": 0.545390462179723, "grad_norm": 1.2989120710104574, "learning_rate": 4.508694928496911e-06, "loss": 0.6039, "step": 17795 }, { "epoch": 0.5454211107024641, "grad_norm": 1.2848337757567323, "learning_rate": 4.508201014922113e-06, "loss": 0.5686, "step": 17796 }, { "epoch": 0.5454517592252054, "grad_norm": 1.361126036355244, "learning_rate": 4.507707106193135e-06, "loss": 0.6368, "step": 17797 }, { "epoch": 0.5454824077479465, "grad_norm": 1.4925034218389295, "learning_rate": 4.507213202314843e-06, "loss": 0.6198, "step": 17798 }, { "epoch": 0.5455130562706878, "grad_norm": 1.4279449685056653, "learning_rate": 4.506719303292107e-06, "loss": 0.7227, "step": 17799 }, { "epoch": 0.545543704793429, "grad_norm": 0.4614692363411866, "learning_rate": 4.506225409129792e-06, "loss": 0.4118, "step": 17800 }, { "epoch": 0.5455743533161702, "grad_norm": 1.4302755060257626, "learning_rate": 4.505731519832763e-06, "loss": 0.6682, "step": 17801 }, { "epoch": 0.5456050018389114, "grad_norm": 1.3406056313950927, "learning_rate": 4.50523763540589e-06, "loss": 0.6445, "step": 17802 }, { "epoch": 0.5456356503616525, "grad_norm": 1.1049874861418352, "learning_rate": 4.504743755854037e-06, "loss": 0.5917, "step": 17803 }, { "epoch": 0.5456662988843938, "grad_norm": 1.221135769167515, "learning_rate": 4.504249881182067e-06, "loss": 0.5976, "step": 17804 }, { "epoch": 0.5456969474071349, "grad_norm": 1.335843281553667, "learning_rate": 4.503756011394854e-06, "loss": 0.6665, "step": 17805 }, { "epoch": 0.5457275959298762, "grad_norm": 1.2057548036949741, "learning_rate": 4.503262146497256e-06, "loss": 0.5936, "step": 17806 }, { "epoch": 0.5457582444526173, "grad_norm": 1.241374594680833, "learning_rate": 4.502768286494148e-06, "loss": 0.6044, "step": 17807 }, { "epoch": 0.5457888929753586, "grad_norm": 1.5571706397048586, "learning_rate": 4.502274431390388e-06, "loss": 0.6793, "step": 17808 }, { "epoch": 0.5458195414980997, "grad_norm": 1.235560333990643, "learning_rate": 4.501780581190845e-06, "loss": 0.6999, "step": 17809 }, { "epoch": 0.545850190020841, "grad_norm": 1.2434906192208386, "learning_rate": 4.501286735900388e-06, "loss": 0.5477, "step": 17810 }, { "epoch": 0.5458808385435822, "grad_norm": 1.2441283448329516, "learning_rate": 4.500792895523879e-06, "loss": 0.5553, "step": 17811 }, { "epoch": 0.5459114870663234, "grad_norm": 1.2812130737264569, "learning_rate": 4.500299060066184e-06, "loss": 0.6136, "step": 17812 }, { "epoch": 0.5459421355890646, "grad_norm": 1.6664946321728875, "learning_rate": 4.499805229532172e-06, "loss": 0.6894, "step": 17813 }, { "epoch": 0.5459727841118058, "grad_norm": 0.4595513011333046, "learning_rate": 4.499311403926705e-06, "loss": 0.3989, "step": 17814 }, { "epoch": 0.546003432634547, "grad_norm": 1.3505266924137078, "learning_rate": 4.498817583254654e-06, "loss": 0.6363, "step": 17815 }, { "epoch": 0.5460340811572882, "grad_norm": 0.4642328756581593, "learning_rate": 4.4983237675208805e-06, "loss": 0.4148, "step": 17816 }, { "epoch": 0.5460647296800294, "grad_norm": 1.2009396362788687, "learning_rate": 4.49782995673025e-06, "loss": 0.694, "step": 17817 }, { "epoch": 0.5460953782027707, "grad_norm": 0.45338834084874396, "learning_rate": 4.4973361508876306e-06, "loss": 0.4224, "step": 17818 }, { "epoch": 0.5461260267255118, "grad_norm": 1.0972605625445433, "learning_rate": 4.496842349997886e-06, "loss": 0.591, "step": 17819 }, { "epoch": 0.5461566752482531, "grad_norm": 1.463916130002378, "learning_rate": 4.4963485540658824e-06, "loss": 0.6233, "step": 17820 }, { "epoch": 0.5461873237709942, "grad_norm": 1.1587378175677443, "learning_rate": 4.495854763096486e-06, "loss": 0.5884, "step": 17821 }, { "epoch": 0.5462179722937355, "grad_norm": 1.1663236398222776, "learning_rate": 4.495360977094561e-06, "loss": 0.704, "step": 17822 }, { "epoch": 0.5462486208164766, "grad_norm": 1.279472317553073, "learning_rate": 4.494867196064973e-06, "loss": 0.6039, "step": 17823 }, { "epoch": 0.5462792693392179, "grad_norm": 1.4168384454660377, "learning_rate": 4.49437342001259e-06, "loss": 0.7522, "step": 17824 }, { "epoch": 0.546309917861959, "grad_norm": 1.2414147046730537, "learning_rate": 4.493879648942272e-06, "loss": 0.5286, "step": 17825 }, { "epoch": 0.5463405663847003, "grad_norm": 0.46263859791908263, "learning_rate": 4.49338588285889e-06, "loss": 0.3975, "step": 17826 }, { "epoch": 0.5463712149074414, "grad_norm": 1.3341821129512044, "learning_rate": 4.4928921217673055e-06, "loss": 0.6504, "step": 17827 }, { "epoch": 0.5464018634301827, "grad_norm": 1.3778624975544205, "learning_rate": 4.492398365672384e-06, "loss": 0.6011, "step": 17828 }, { "epoch": 0.5464325119529239, "grad_norm": 1.310225947426908, "learning_rate": 4.491904614578992e-06, "loss": 0.6467, "step": 17829 }, { "epoch": 0.5464631604756651, "grad_norm": 1.2382641960262346, "learning_rate": 4.491410868491994e-06, "loss": 0.6304, "step": 17830 }, { "epoch": 0.5464938089984063, "grad_norm": 0.47027123900622975, "learning_rate": 4.490917127416254e-06, "loss": 0.3972, "step": 17831 }, { "epoch": 0.5465244575211475, "grad_norm": 1.2093367828861896, "learning_rate": 4.490423391356638e-06, "loss": 0.677, "step": 17832 }, { "epoch": 0.5465551060438887, "grad_norm": 1.1980910400754836, "learning_rate": 4.4899296603180105e-06, "loss": 0.6673, "step": 17833 }, { "epoch": 0.5465857545666298, "grad_norm": 0.44700460829336486, "learning_rate": 4.4894359343052375e-06, "loss": 0.4048, "step": 17834 }, { "epoch": 0.5466164030893711, "grad_norm": 1.3984569853567395, "learning_rate": 4.488942213323184e-06, "loss": 0.7071, "step": 17835 }, { "epoch": 0.5466470516121122, "grad_norm": 1.2712658065173914, "learning_rate": 4.48844849737671e-06, "loss": 0.6569, "step": 17836 }, { "epoch": 0.5466777001348535, "grad_norm": 1.2260630852064447, "learning_rate": 4.487954786470687e-06, "loss": 0.5824, "step": 17837 }, { "epoch": 0.5467083486575947, "grad_norm": 1.4757700663885231, "learning_rate": 4.487461080609976e-06, "loss": 0.6454, "step": 17838 }, { "epoch": 0.5467389971803359, "grad_norm": 1.312923546875549, "learning_rate": 4.486967379799441e-06, "loss": 0.6523, "step": 17839 }, { "epoch": 0.5467696457030771, "grad_norm": 1.1332358603018216, "learning_rate": 4.486473684043948e-06, "loss": 0.6586, "step": 17840 }, { "epoch": 0.5468002942258183, "grad_norm": 1.379270326190471, "learning_rate": 4.485979993348361e-06, "loss": 0.7233, "step": 17841 }, { "epoch": 0.5468309427485595, "grad_norm": 1.348319776590899, "learning_rate": 4.4854863077175445e-06, "loss": 0.6198, "step": 17842 }, { "epoch": 0.5468615912713007, "grad_norm": 1.2351115117372395, "learning_rate": 4.484992627156365e-06, "loss": 0.7047, "step": 17843 }, { "epoch": 0.5468922397940419, "grad_norm": 1.402619529411348, "learning_rate": 4.484498951669682e-06, "loss": 0.6526, "step": 17844 }, { "epoch": 0.5469228883167832, "grad_norm": 1.2832970098316046, "learning_rate": 4.484005281262364e-06, "loss": 0.5934, "step": 17845 }, { "epoch": 0.5469535368395243, "grad_norm": 1.2375391978108754, "learning_rate": 4.483511615939276e-06, "loss": 0.5619, "step": 17846 }, { "epoch": 0.5469841853622656, "grad_norm": 1.4663512185309886, "learning_rate": 4.483017955705277e-06, "loss": 0.6088, "step": 17847 }, { "epoch": 0.5470148338850067, "grad_norm": 0.4720178298203342, "learning_rate": 4.4825243005652364e-06, "loss": 0.3974, "step": 17848 }, { "epoch": 0.547045482407748, "grad_norm": 1.0586103420840505, "learning_rate": 4.4820306505240166e-06, "loss": 0.5549, "step": 17849 }, { "epoch": 0.5470761309304891, "grad_norm": 1.6132267952480108, "learning_rate": 4.4815370055864785e-06, "loss": 0.6528, "step": 17850 }, { "epoch": 0.5471067794532304, "grad_norm": 0.4656108558464345, "learning_rate": 4.481043365757492e-06, "loss": 0.3995, "step": 17851 }, { "epoch": 0.5471374279759715, "grad_norm": 1.3161875848257982, "learning_rate": 4.480549731041915e-06, "loss": 0.5765, "step": 17852 }, { "epoch": 0.5471680764987128, "grad_norm": 0.4526692577740443, "learning_rate": 4.480056101444617e-06, "loss": 0.408, "step": 17853 }, { "epoch": 0.547198725021454, "grad_norm": 1.2768263483152504, "learning_rate": 4.4795624769704585e-06, "loss": 0.6629, "step": 17854 }, { "epoch": 0.5472293735441952, "grad_norm": 1.353713434263981, "learning_rate": 4.479068857624304e-06, "loss": 0.5972, "step": 17855 }, { "epoch": 0.5472600220669364, "grad_norm": 1.327018752856147, "learning_rate": 4.478575243411017e-06, "loss": 0.5957, "step": 17856 }, { "epoch": 0.5472906705896776, "grad_norm": 1.7089011903152616, "learning_rate": 4.478081634335464e-06, "loss": 0.67, "step": 17857 }, { "epoch": 0.5473213191124188, "grad_norm": 1.2591774623507266, "learning_rate": 4.477588030402502e-06, "loss": 0.6445, "step": 17858 }, { "epoch": 0.54735196763516, "grad_norm": 1.3503945766161614, "learning_rate": 4.477094431617001e-06, "loss": 0.5506, "step": 17859 }, { "epoch": 0.5473826161579012, "grad_norm": 1.1675603368102636, "learning_rate": 4.476600837983821e-06, "loss": 0.6335, "step": 17860 }, { "epoch": 0.5474132646806424, "grad_norm": 0.47327067991478167, "learning_rate": 4.47610724950783e-06, "loss": 0.4105, "step": 17861 }, { "epoch": 0.5474439132033836, "grad_norm": 0.4761991455989637, "learning_rate": 4.475613666193887e-06, "loss": 0.4138, "step": 17862 }, { "epoch": 0.5474745617261249, "grad_norm": 1.4148751181437067, "learning_rate": 4.475120088046855e-06, "loss": 0.6501, "step": 17863 }, { "epoch": 0.547505210248866, "grad_norm": 1.198482432338182, "learning_rate": 4.4746265150716e-06, "loss": 0.6226, "step": 17864 }, { "epoch": 0.5475358587716072, "grad_norm": 0.421452234434681, "learning_rate": 4.474132947272985e-06, "loss": 0.3794, "step": 17865 }, { "epoch": 0.5475665072943484, "grad_norm": 1.422059174003983, "learning_rate": 4.4736393846558716e-06, "loss": 0.7317, "step": 17866 }, { "epoch": 0.5475971558170896, "grad_norm": 1.3157468452456784, "learning_rate": 4.473145827225125e-06, "loss": 0.6897, "step": 17867 }, { "epoch": 0.5476278043398308, "grad_norm": 1.2152045032728689, "learning_rate": 4.472652274985606e-06, "loss": 0.6917, "step": 17868 }, { "epoch": 0.547658452862572, "grad_norm": 1.2916297610143816, "learning_rate": 4.472158727942181e-06, "loss": 0.5484, "step": 17869 }, { "epoch": 0.5476891013853132, "grad_norm": 1.3546388517751222, "learning_rate": 4.471665186099711e-06, "loss": 0.7024, "step": 17870 }, { "epoch": 0.5477197499080544, "grad_norm": 1.3342499334798563, "learning_rate": 4.471171649463057e-06, "loss": 0.5897, "step": 17871 }, { "epoch": 0.5477503984307956, "grad_norm": 1.3113066993461078, "learning_rate": 4.470678118037086e-06, "loss": 0.7203, "step": 17872 }, { "epoch": 0.5477810469535368, "grad_norm": 1.3244173813828362, "learning_rate": 4.470184591826658e-06, "loss": 0.6592, "step": 17873 }, { "epoch": 0.5478116954762781, "grad_norm": 1.4163343393150643, "learning_rate": 4.469691070836637e-06, "loss": 0.663, "step": 17874 }, { "epoch": 0.5478423439990192, "grad_norm": 1.449054837919177, "learning_rate": 4.469197555071886e-06, "loss": 0.7273, "step": 17875 }, { "epoch": 0.5478729925217605, "grad_norm": 1.3294512728162715, "learning_rate": 4.468704044537268e-06, "loss": 0.6805, "step": 17876 }, { "epoch": 0.5479036410445016, "grad_norm": 1.265700181222549, "learning_rate": 4.468210539237642e-06, "loss": 0.7343, "step": 17877 }, { "epoch": 0.5479342895672429, "grad_norm": 1.2120778898835658, "learning_rate": 4.467717039177877e-06, "loss": 0.5525, "step": 17878 }, { "epoch": 0.547964938089984, "grad_norm": 1.1957795339317168, "learning_rate": 4.467223544362828e-06, "loss": 0.641, "step": 17879 }, { "epoch": 0.5479955866127253, "grad_norm": 1.1730067713817622, "learning_rate": 4.466730054797366e-06, "loss": 0.6281, "step": 17880 }, { "epoch": 0.5480262351354664, "grad_norm": 1.1131825519175838, "learning_rate": 4.466236570486348e-06, "loss": 0.6065, "step": 17881 }, { "epoch": 0.5480568836582077, "grad_norm": 1.131787777422055, "learning_rate": 4.465743091434636e-06, "loss": 0.6475, "step": 17882 }, { "epoch": 0.5480875321809489, "grad_norm": 1.278310505446578, "learning_rate": 4.465249617647096e-06, "loss": 0.6126, "step": 17883 }, { "epoch": 0.5481181807036901, "grad_norm": 0.5178217797716449, "learning_rate": 4.464756149128588e-06, "loss": 0.4109, "step": 17884 }, { "epoch": 0.5481488292264313, "grad_norm": 1.5170143294203822, "learning_rate": 4.4642626858839736e-06, "loss": 0.6637, "step": 17885 }, { "epoch": 0.5481794777491725, "grad_norm": 1.3446341683220069, "learning_rate": 4.4637692279181174e-06, "loss": 0.7042, "step": 17886 }, { "epoch": 0.5482101262719137, "grad_norm": 1.216784738226187, "learning_rate": 4.463275775235878e-06, "loss": 0.6159, "step": 17887 }, { "epoch": 0.5482407747946549, "grad_norm": 0.49439495501173397, "learning_rate": 4.462782327842122e-06, "loss": 0.4388, "step": 17888 }, { "epoch": 0.5482714233173961, "grad_norm": 1.4581471643622483, "learning_rate": 4.462288885741709e-06, "loss": 0.7057, "step": 17889 }, { "epoch": 0.5483020718401374, "grad_norm": 1.1536312438339895, "learning_rate": 4.461795448939499e-06, "loss": 0.5371, "step": 17890 }, { "epoch": 0.5483327203628785, "grad_norm": 0.475083468884949, "learning_rate": 4.46130201744036e-06, "loss": 0.4289, "step": 17891 }, { "epoch": 0.5483633688856198, "grad_norm": 1.3829251975309031, "learning_rate": 4.460808591249147e-06, "loss": 0.6517, "step": 17892 }, { "epoch": 0.5483940174083609, "grad_norm": 0.44372204051211334, "learning_rate": 4.460315170370725e-06, "loss": 0.4151, "step": 17893 }, { "epoch": 0.5484246659311022, "grad_norm": 1.3864897416163142, "learning_rate": 4.4598217548099575e-06, "loss": 0.6803, "step": 17894 }, { "epoch": 0.5484553144538433, "grad_norm": 1.3215948972534144, "learning_rate": 4.459328344571702e-06, "loss": 0.7121, "step": 17895 }, { "epoch": 0.5484859629765845, "grad_norm": 1.1871498692187639, "learning_rate": 4.458834939660824e-06, "loss": 0.6295, "step": 17896 }, { "epoch": 0.5485166114993257, "grad_norm": 1.2639680899748242, "learning_rate": 4.458341540082185e-06, "loss": 0.6877, "step": 17897 }, { "epoch": 0.5485472600220669, "grad_norm": 0.4996017836456581, "learning_rate": 4.4578481458406425e-06, "loss": 0.4247, "step": 17898 }, { "epoch": 0.5485779085448081, "grad_norm": 1.5045765451056776, "learning_rate": 4.4573547569410634e-06, "loss": 0.6351, "step": 17899 }, { "epoch": 0.5486085570675493, "grad_norm": 1.2046293909660188, "learning_rate": 4.456861373388307e-06, "loss": 0.548, "step": 17900 }, { "epoch": 0.5486392055902906, "grad_norm": 1.3600323644377894, "learning_rate": 4.456367995187231e-06, "loss": 0.6023, "step": 17901 }, { "epoch": 0.5486698541130317, "grad_norm": 1.2959027048408711, "learning_rate": 4.455874622342705e-06, "loss": 0.6417, "step": 17902 }, { "epoch": 0.548700502635773, "grad_norm": 1.3202678451983774, "learning_rate": 4.455381254859583e-06, "loss": 0.6838, "step": 17903 }, { "epoch": 0.5487311511585141, "grad_norm": 1.231225157790908, "learning_rate": 4.454887892742728e-06, "loss": 0.6241, "step": 17904 }, { "epoch": 0.5487617996812554, "grad_norm": 1.1685713606077492, "learning_rate": 4.454394535997004e-06, "loss": 0.6116, "step": 17905 }, { "epoch": 0.5487924482039965, "grad_norm": 1.2829281801693462, "learning_rate": 4.4539011846272684e-06, "loss": 0.5897, "step": 17906 }, { "epoch": 0.5488230967267378, "grad_norm": 1.2373405753066644, "learning_rate": 4.453407838638385e-06, "loss": 0.6592, "step": 17907 }, { "epoch": 0.5488537452494789, "grad_norm": 1.2348635605388179, "learning_rate": 4.452914498035215e-06, "loss": 0.6708, "step": 17908 }, { "epoch": 0.5488843937722202, "grad_norm": 1.2167417866374748, "learning_rate": 4.452421162822616e-06, "loss": 0.5873, "step": 17909 }, { "epoch": 0.5489150422949614, "grad_norm": 1.4239856354913294, "learning_rate": 4.451927833005453e-06, "loss": 0.6285, "step": 17910 }, { "epoch": 0.5489456908177026, "grad_norm": 0.4859644927584399, "learning_rate": 4.451434508588587e-06, "loss": 0.4348, "step": 17911 }, { "epoch": 0.5489763393404438, "grad_norm": 1.4835156832961571, "learning_rate": 4.450941189576874e-06, "loss": 0.6474, "step": 17912 }, { "epoch": 0.549006987863185, "grad_norm": 1.3515393306086427, "learning_rate": 4.4504478759751805e-06, "loss": 0.5745, "step": 17913 }, { "epoch": 0.5490376363859262, "grad_norm": 1.1180474105145062, "learning_rate": 4.449954567788363e-06, "loss": 0.545, "step": 17914 }, { "epoch": 0.5490682849086674, "grad_norm": 1.1624258392991225, "learning_rate": 4.449461265021284e-06, "loss": 0.6543, "step": 17915 }, { "epoch": 0.5490989334314086, "grad_norm": 0.4352134102702665, "learning_rate": 4.448967967678805e-06, "loss": 0.4083, "step": 17916 }, { "epoch": 0.5491295819541498, "grad_norm": 1.3371127782195673, "learning_rate": 4.448474675765783e-06, "loss": 0.6295, "step": 17917 }, { "epoch": 0.549160230476891, "grad_norm": 1.2552893503462617, "learning_rate": 4.447981389287085e-06, "loss": 0.5696, "step": 17918 }, { "epoch": 0.5491908789996323, "grad_norm": 1.2744544547234218, "learning_rate": 4.4474881082475655e-06, "loss": 0.7251, "step": 17919 }, { "epoch": 0.5492215275223734, "grad_norm": 1.1384323033732866, "learning_rate": 4.4469948326520865e-06, "loss": 0.6216, "step": 17920 }, { "epoch": 0.5492521760451147, "grad_norm": 1.1548933549051823, "learning_rate": 4.446501562505511e-06, "loss": 0.6682, "step": 17921 }, { "epoch": 0.5492828245678558, "grad_norm": 1.2997268475186792, "learning_rate": 4.446008297812694e-06, "loss": 0.6478, "step": 17922 }, { "epoch": 0.5493134730905971, "grad_norm": 1.3605168279989848, "learning_rate": 4.445515038578502e-06, "loss": 0.6141, "step": 17923 }, { "epoch": 0.5493441216133382, "grad_norm": 1.294214784327581, "learning_rate": 4.445021784807792e-06, "loss": 0.6706, "step": 17924 }, { "epoch": 0.5493747701360795, "grad_norm": 1.2588095384347615, "learning_rate": 4.444528536505423e-06, "loss": 0.5965, "step": 17925 }, { "epoch": 0.5494054186588206, "grad_norm": 1.154264489847834, "learning_rate": 4.444035293676257e-06, "loss": 0.6083, "step": 17926 }, { "epoch": 0.5494360671815618, "grad_norm": 1.254457542563899, "learning_rate": 4.443542056325153e-06, "loss": 0.543, "step": 17927 }, { "epoch": 0.549466715704303, "grad_norm": 0.47446264143460665, "learning_rate": 4.4430488244569715e-06, "loss": 0.3953, "step": 17928 }, { "epoch": 0.5494973642270442, "grad_norm": 1.3001367905674146, "learning_rate": 4.442555598076573e-06, "loss": 0.7171, "step": 17929 }, { "epoch": 0.5495280127497855, "grad_norm": 1.3287557759666186, "learning_rate": 4.442062377188818e-06, "loss": 0.5958, "step": 17930 }, { "epoch": 0.5495586612725266, "grad_norm": 1.1398908545822861, "learning_rate": 4.441569161798562e-06, "loss": 0.652, "step": 17931 }, { "epoch": 0.5495893097952679, "grad_norm": 1.2677561577057435, "learning_rate": 4.441075951910671e-06, "loss": 0.6018, "step": 17932 }, { "epoch": 0.549619958318009, "grad_norm": 1.5853025961412741, "learning_rate": 4.440582747529998e-06, "loss": 0.7528, "step": 17933 }, { "epoch": 0.5496506068407503, "grad_norm": 1.2346186385757436, "learning_rate": 4.440089548661411e-06, "loss": 0.6046, "step": 17934 }, { "epoch": 0.5496812553634914, "grad_norm": 1.8061656967981987, "learning_rate": 4.439596355309763e-06, "loss": 0.6517, "step": 17935 }, { "epoch": 0.5497119038862327, "grad_norm": 0.4514295019702664, "learning_rate": 4.439103167479913e-06, "loss": 0.4033, "step": 17936 }, { "epoch": 0.5497425524089739, "grad_norm": 1.2651119493753598, "learning_rate": 4.438609985176726e-06, "loss": 0.7055, "step": 17937 }, { "epoch": 0.5497732009317151, "grad_norm": 1.3064801008947935, "learning_rate": 4.438116808405058e-06, "loss": 0.667, "step": 17938 }, { "epoch": 0.5498038494544563, "grad_norm": 1.3373754206208655, "learning_rate": 4.437623637169768e-06, "loss": 0.653, "step": 17939 }, { "epoch": 0.5498344979771975, "grad_norm": 1.2208890858662218, "learning_rate": 4.437130471475717e-06, "loss": 0.6071, "step": 17940 }, { "epoch": 0.5498651464999387, "grad_norm": 1.5815918718761406, "learning_rate": 4.4366373113277615e-06, "loss": 0.6624, "step": 17941 }, { "epoch": 0.5498957950226799, "grad_norm": 1.2866200115370243, "learning_rate": 4.436144156730765e-06, "loss": 0.5728, "step": 17942 }, { "epoch": 0.5499264435454211, "grad_norm": 1.2692454302816407, "learning_rate": 4.435651007689585e-06, "loss": 0.6653, "step": 17943 }, { "epoch": 0.5499570920681623, "grad_norm": 0.4602243753998847, "learning_rate": 4.435157864209077e-06, "loss": 0.412, "step": 17944 }, { "epoch": 0.5499877405909035, "grad_norm": 1.242581180137364, "learning_rate": 4.434664726294106e-06, "loss": 0.7062, "step": 17945 }, { "epoch": 0.5500183891136448, "grad_norm": 0.45153657566304317, "learning_rate": 4.434171593949527e-06, "loss": 0.4281, "step": 17946 }, { "epoch": 0.5500490376363859, "grad_norm": 1.20238636282177, "learning_rate": 4.4336784671802e-06, "loss": 0.6446, "step": 17947 }, { "epoch": 0.5500796861591272, "grad_norm": 1.394049099999937, "learning_rate": 4.433185345990984e-06, "loss": 0.6681, "step": 17948 }, { "epoch": 0.5501103346818683, "grad_norm": 1.4159023590170303, "learning_rate": 4.432692230386737e-06, "loss": 0.5842, "step": 17949 }, { "epoch": 0.5501409832046096, "grad_norm": 1.2352094234003423, "learning_rate": 4.432199120372319e-06, "loss": 0.6573, "step": 17950 }, { "epoch": 0.5501716317273507, "grad_norm": 1.2262164259493342, "learning_rate": 4.431706015952589e-06, "loss": 0.5484, "step": 17951 }, { "epoch": 0.550202280250092, "grad_norm": 1.3992914979780096, "learning_rate": 4.431212917132404e-06, "loss": 0.6408, "step": 17952 }, { "epoch": 0.5502329287728331, "grad_norm": 1.2106820485375935, "learning_rate": 4.4307198239166245e-06, "loss": 0.6271, "step": 17953 }, { "epoch": 0.5502635772955744, "grad_norm": 1.2296941226307267, "learning_rate": 4.43022673631011e-06, "loss": 0.6663, "step": 17954 }, { "epoch": 0.5502942258183156, "grad_norm": 1.3244785631379676, "learning_rate": 4.429733654317713e-06, "loss": 0.6001, "step": 17955 }, { "epoch": 0.5503248743410568, "grad_norm": 1.2176301456695187, "learning_rate": 4.4292405779443e-06, "loss": 0.6019, "step": 17956 }, { "epoch": 0.550355522863798, "grad_norm": 0.4314940812450734, "learning_rate": 4.428747507194725e-06, "loss": 0.4133, "step": 17957 }, { "epoch": 0.5503861713865391, "grad_norm": 1.2820633934899228, "learning_rate": 4.428254442073845e-06, "loss": 0.6119, "step": 17958 }, { "epoch": 0.5504168199092804, "grad_norm": 1.495599158164744, "learning_rate": 4.427761382586522e-06, "loss": 0.6934, "step": 17959 }, { "epoch": 0.5504474684320215, "grad_norm": 0.43632778488608204, "learning_rate": 4.427268328737611e-06, "loss": 0.4094, "step": 17960 }, { "epoch": 0.5504781169547628, "grad_norm": 1.270811733042018, "learning_rate": 4.426775280531973e-06, "loss": 0.6906, "step": 17961 }, { "epoch": 0.5505087654775039, "grad_norm": 0.44627459324092394, "learning_rate": 4.426282237974465e-06, "loss": 0.4147, "step": 17962 }, { "epoch": 0.5505394140002452, "grad_norm": 1.1011657050113046, "learning_rate": 4.425789201069943e-06, "loss": 0.6033, "step": 17963 }, { "epoch": 0.5505700625229863, "grad_norm": 1.1666779855862175, "learning_rate": 4.425296169823269e-06, "loss": 0.626, "step": 17964 }, { "epoch": 0.5506007110457276, "grad_norm": 1.262992075392796, "learning_rate": 4.4248031442392995e-06, "loss": 0.5893, "step": 17965 }, { "epoch": 0.5506313595684688, "grad_norm": 1.2996125287797191, "learning_rate": 4.42431012432289e-06, "loss": 0.6177, "step": 17966 }, { "epoch": 0.55066200809121, "grad_norm": 1.2734370180251229, "learning_rate": 4.423817110078901e-06, "loss": 0.6076, "step": 17967 }, { "epoch": 0.5506926566139512, "grad_norm": 1.2459043329298722, "learning_rate": 4.423324101512188e-06, "loss": 0.6114, "step": 17968 }, { "epoch": 0.5507233051366924, "grad_norm": 1.3198888601964869, "learning_rate": 4.422831098627611e-06, "loss": 0.6033, "step": 17969 }, { "epoch": 0.5507539536594336, "grad_norm": 1.2281536491740197, "learning_rate": 4.4223381014300285e-06, "loss": 0.5314, "step": 17970 }, { "epoch": 0.5507846021821748, "grad_norm": 1.453711044772362, "learning_rate": 4.421845109924294e-06, "loss": 0.6413, "step": 17971 }, { "epoch": 0.550815250704916, "grad_norm": 1.2685137095060803, "learning_rate": 4.421352124115269e-06, "loss": 0.7121, "step": 17972 }, { "epoch": 0.5508458992276573, "grad_norm": 1.1849911631487609, "learning_rate": 4.42085914400781e-06, "loss": 0.6683, "step": 17973 }, { "epoch": 0.5508765477503984, "grad_norm": 1.3044979649059372, "learning_rate": 4.420366169606772e-06, "loss": 0.6238, "step": 17974 }, { "epoch": 0.5509071962731397, "grad_norm": 1.2024821097222755, "learning_rate": 4.4198732009170165e-06, "loss": 0.6828, "step": 17975 }, { "epoch": 0.5509378447958808, "grad_norm": 1.2637325500208978, "learning_rate": 4.419380237943396e-06, "loss": 0.6716, "step": 17976 }, { "epoch": 0.5509684933186221, "grad_norm": 1.651891582452175, "learning_rate": 4.418887280690774e-06, "loss": 0.6458, "step": 17977 }, { "epoch": 0.5509991418413632, "grad_norm": 1.2663069458355984, "learning_rate": 4.418394329164003e-06, "loss": 0.6458, "step": 17978 }, { "epoch": 0.5510297903641045, "grad_norm": 1.1125778041655654, "learning_rate": 4.417901383367941e-06, "loss": 0.6006, "step": 17979 }, { "epoch": 0.5510604388868456, "grad_norm": 1.9146522892103945, "learning_rate": 4.417408443307446e-06, "loss": 0.526, "step": 17980 }, { "epoch": 0.5510910874095869, "grad_norm": 1.3936236230426051, "learning_rate": 4.416915508987375e-06, "loss": 0.7003, "step": 17981 }, { "epoch": 0.551121735932328, "grad_norm": 1.138944068532338, "learning_rate": 4.416422580412584e-06, "loss": 0.6152, "step": 17982 }, { "epoch": 0.5511523844550693, "grad_norm": 1.2136134804465153, "learning_rate": 4.4159296575879305e-06, "loss": 0.6137, "step": 17983 }, { "epoch": 0.5511830329778105, "grad_norm": 1.1878921856083515, "learning_rate": 4.415436740518273e-06, "loss": 0.6297, "step": 17984 }, { "epoch": 0.5512136815005517, "grad_norm": 1.4300473478922233, "learning_rate": 4.4149438292084645e-06, "loss": 0.7076, "step": 17985 }, { "epoch": 0.5512443300232929, "grad_norm": 1.2827811189008562, "learning_rate": 4.414450923663367e-06, "loss": 0.7315, "step": 17986 }, { "epoch": 0.5512749785460341, "grad_norm": 1.178089595038091, "learning_rate": 4.413958023887831e-06, "loss": 0.6485, "step": 17987 }, { "epoch": 0.5513056270687753, "grad_norm": 1.2384968126839002, "learning_rate": 4.413465129886719e-06, "loss": 0.6671, "step": 17988 }, { "epoch": 0.5513362755915164, "grad_norm": 1.2081256913228973, "learning_rate": 4.412972241664885e-06, "loss": 0.6665, "step": 17989 }, { "epoch": 0.5513669241142577, "grad_norm": 1.309035334454045, "learning_rate": 4.412479359227185e-06, "loss": 0.6028, "step": 17990 }, { "epoch": 0.5513975726369988, "grad_norm": 0.5175056639742345, "learning_rate": 4.411986482578476e-06, "loss": 0.3927, "step": 17991 }, { "epoch": 0.5514282211597401, "grad_norm": 1.1423320221479258, "learning_rate": 4.411493611723616e-06, "loss": 0.633, "step": 17992 }, { "epoch": 0.5514588696824813, "grad_norm": 1.2402238415884153, "learning_rate": 4.4110007466674575e-06, "loss": 0.6623, "step": 17993 }, { "epoch": 0.5514895182052225, "grad_norm": 1.2783127974934227, "learning_rate": 4.410507887414861e-06, "loss": 0.6143, "step": 17994 }, { "epoch": 0.5515201667279637, "grad_norm": 0.46763691859157097, "learning_rate": 4.410015033970681e-06, "loss": 0.4211, "step": 17995 }, { "epoch": 0.5515508152507049, "grad_norm": 1.2098072164233256, "learning_rate": 4.409522186339774e-06, "loss": 0.6929, "step": 17996 }, { "epoch": 0.5515814637734461, "grad_norm": 1.350796201754545, "learning_rate": 4.409029344526997e-06, "loss": 0.6468, "step": 17997 }, { "epoch": 0.5516121122961873, "grad_norm": 0.43594689235757517, "learning_rate": 4.408536508537202e-06, "loss": 0.3836, "step": 17998 }, { "epoch": 0.5516427608189285, "grad_norm": 0.44568574520000687, "learning_rate": 4.408043678375251e-06, "loss": 0.4105, "step": 17999 }, { "epoch": 0.5516734093416698, "grad_norm": 1.2502093113793329, "learning_rate": 4.407550854045996e-06, "loss": 0.5793, "step": 18000 }, { "epoch": 0.5517040578644109, "grad_norm": 1.2642845791059985, "learning_rate": 4.407058035554294e-06, "loss": 0.6448, "step": 18001 }, { "epoch": 0.5517347063871522, "grad_norm": 1.3086543984827201, "learning_rate": 4.406565222905002e-06, "loss": 0.6337, "step": 18002 }, { "epoch": 0.5517653549098933, "grad_norm": 1.3341207731191473, "learning_rate": 4.406072416102974e-06, "loss": 0.6767, "step": 18003 }, { "epoch": 0.5517960034326346, "grad_norm": 1.4330427312440395, "learning_rate": 4.405579615153065e-06, "loss": 0.6739, "step": 18004 }, { "epoch": 0.5518266519553757, "grad_norm": 1.36817126067046, "learning_rate": 4.405086820060133e-06, "loss": 0.7015, "step": 18005 }, { "epoch": 0.551857300478117, "grad_norm": 1.329864428952901, "learning_rate": 4.4045940308290325e-06, "loss": 0.5608, "step": 18006 }, { "epoch": 0.5518879490008581, "grad_norm": 1.3647050898499558, "learning_rate": 4.40410124746462e-06, "loss": 0.7768, "step": 18007 }, { "epoch": 0.5519185975235994, "grad_norm": 1.4431918275740285, "learning_rate": 4.4036084699717515e-06, "loss": 0.6015, "step": 18008 }, { "epoch": 0.5519492460463405, "grad_norm": 0.47388040219672883, "learning_rate": 4.40311569835528e-06, "loss": 0.4018, "step": 18009 }, { "epoch": 0.5519798945690818, "grad_norm": 0.4789142773835576, "learning_rate": 4.402622932620063e-06, "loss": 0.4234, "step": 18010 }, { "epoch": 0.552010543091823, "grad_norm": 1.2292297636114229, "learning_rate": 4.4021301727709545e-06, "loss": 0.56, "step": 18011 }, { "epoch": 0.5520411916145642, "grad_norm": 1.2304028634618673, "learning_rate": 4.401637418812809e-06, "loss": 0.6445, "step": 18012 }, { "epoch": 0.5520718401373054, "grad_norm": 1.3130182887731083, "learning_rate": 4.401144670750485e-06, "loss": 0.722, "step": 18013 }, { "epoch": 0.5521024886600466, "grad_norm": 0.46233912320255427, "learning_rate": 4.400651928588835e-06, "loss": 0.4072, "step": 18014 }, { "epoch": 0.5521331371827878, "grad_norm": 1.3683378433902724, "learning_rate": 4.4001591923327146e-06, "loss": 0.713, "step": 18015 }, { "epoch": 0.552163785705529, "grad_norm": 0.46278895437884643, "learning_rate": 4.399666461986982e-06, "loss": 0.4249, "step": 18016 }, { "epoch": 0.5521944342282702, "grad_norm": 1.2912418092701121, "learning_rate": 4.399173737556485e-06, "loss": 0.636, "step": 18017 }, { "epoch": 0.5522250827510115, "grad_norm": 1.3442814045960227, "learning_rate": 4.398681019046086e-06, "loss": 0.7146, "step": 18018 }, { "epoch": 0.5522557312737526, "grad_norm": 1.263118462285998, "learning_rate": 4.398188306460635e-06, "loss": 0.6656, "step": 18019 }, { "epoch": 0.5522863797964938, "grad_norm": 1.2798943870013284, "learning_rate": 4.39769559980499e-06, "loss": 0.5678, "step": 18020 }, { "epoch": 0.552317028319235, "grad_norm": 1.1532748558040178, "learning_rate": 4.397202899084003e-06, "loss": 0.5889, "step": 18021 }, { "epoch": 0.5523476768419762, "grad_norm": 1.2676435617481958, "learning_rate": 4.3967102043025305e-06, "loss": 0.635, "step": 18022 }, { "epoch": 0.5523783253647174, "grad_norm": 1.3894880642941052, "learning_rate": 4.3962175154654264e-06, "loss": 0.6207, "step": 18023 }, { "epoch": 0.5524089738874586, "grad_norm": 1.2820930287620342, "learning_rate": 4.395724832577547e-06, "loss": 0.6633, "step": 18024 }, { "epoch": 0.5524396224101998, "grad_norm": 0.44907282390207204, "learning_rate": 4.395232155643744e-06, "loss": 0.3916, "step": 18025 }, { "epoch": 0.552470270932941, "grad_norm": 1.3337334665655736, "learning_rate": 4.394739484668874e-06, "loss": 0.5679, "step": 18026 }, { "epoch": 0.5525009194556822, "grad_norm": 0.4484543869935675, "learning_rate": 4.394246819657792e-06, "loss": 0.4001, "step": 18027 }, { "epoch": 0.5525315679784234, "grad_norm": 1.3648420154325083, "learning_rate": 4.393754160615348e-06, "loss": 0.6683, "step": 18028 }, { "epoch": 0.5525622165011647, "grad_norm": 1.210274713074059, "learning_rate": 4.3932615075464025e-06, "loss": 0.6465, "step": 18029 }, { "epoch": 0.5525928650239058, "grad_norm": 0.4424657043830901, "learning_rate": 4.392768860455805e-06, "loss": 0.4001, "step": 18030 }, { "epoch": 0.5526235135466471, "grad_norm": 1.09942987081888, "learning_rate": 4.392276219348411e-06, "loss": 0.6396, "step": 18031 }, { "epoch": 0.5526541620693882, "grad_norm": 1.3705942029469405, "learning_rate": 4.391783584229076e-06, "loss": 0.6403, "step": 18032 }, { "epoch": 0.5526848105921295, "grad_norm": 1.2131083641748224, "learning_rate": 4.391290955102651e-06, "loss": 0.6083, "step": 18033 }, { "epoch": 0.5527154591148706, "grad_norm": 0.4359069611504371, "learning_rate": 4.390798331973994e-06, "loss": 0.3858, "step": 18034 }, { "epoch": 0.5527461076376119, "grad_norm": 1.2795334840303936, "learning_rate": 4.390305714847956e-06, "loss": 0.6183, "step": 18035 }, { "epoch": 0.552776756160353, "grad_norm": 1.3245895733607653, "learning_rate": 4.389813103729392e-06, "loss": 0.6807, "step": 18036 }, { "epoch": 0.5528074046830943, "grad_norm": 1.2858816657047014, "learning_rate": 4.3893204986231554e-06, "loss": 0.5441, "step": 18037 }, { "epoch": 0.5528380532058355, "grad_norm": 1.2443326893049194, "learning_rate": 4.388827899534102e-06, "loss": 0.6455, "step": 18038 }, { "epoch": 0.5528687017285767, "grad_norm": 1.2536579558538552, "learning_rate": 4.388335306467079e-06, "loss": 0.571, "step": 18039 }, { "epoch": 0.5528993502513179, "grad_norm": 1.2365010749504932, "learning_rate": 4.3878427194269506e-06, "loss": 0.7203, "step": 18040 }, { "epoch": 0.5529299987740591, "grad_norm": 1.3208080278539311, "learning_rate": 4.387350138418559e-06, "loss": 0.602, "step": 18041 }, { "epoch": 0.5529606472968003, "grad_norm": 1.3584835270161641, "learning_rate": 4.386857563446767e-06, "loss": 0.694, "step": 18042 }, { "epoch": 0.5529912958195415, "grad_norm": 0.46402931850970996, "learning_rate": 4.386364994516424e-06, "loss": 0.4179, "step": 18043 }, { "epoch": 0.5530219443422827, "grad_norm": 0.46063055345325266, "learning_rate": 4.385872431632382e-06, "loss": 0.4093, "step": 18044 }, { "epoch": 0.553052592865024, "grad_norm": 1.770758870361855, "learning_rate": 4.3853798747994975e-06, "loss": 0.6423, "step": 18045 }, { "epoch": 0.5530832413877651, "grad_norm": 1.2377287442247562, "learning_rate": 4.384887324022622e-06, "loss": 0.5837, "step": 18046 }, { "epoch": 0.5531138899105064, "grad_norm": 0.43669663542794623, "learning_rate": 4.384394779306609e-06, "loss": 0.3952, "step": 18047 }, { "epoch": 0.5531445384332475, "grad_norm": 1.1802026471040663, "learning_rate": 4.383902240656312e-06, "loss": 0.5947, "step": 18048 }, { "epoch": 0.5531751869559888, "grad_norm": 1.4124501661959044, "learning_rate": 4.383409708076582e-06, "loss": 0.6395, "step": 18049 }, { "epoch": 0.5532058354787299, "grad_norm": 0.42964813247037675, "learning_rate": 4.382917181572276e-06, "loss": 0.397, "step": 18050 }, { "epoch": 0.5532364840014711, "grad_norm": 1.3109589070499597, "learning_rate": 4.382424661148245e-06, "loss": 0.6399, "step": 18051 }, { "epoch": 0.5532671325242123, "grad_norm": 1.22797219554147, "learning_rate": 4.38193214680934e-06, "loss": 0.6013, "step": 18052 }, { "epoch": 0.5532977810469535, "grad_norm": 1.2462259452872568, "learning_rate": 4.381439638560418e-06, "loss": 0.739, "step": 18053 }, { "epoch": 0.5533284295696947, "grad_norm": 1.376234344596458, "learning_rate": 4.380947136406329e-06, "loss": 0.5823, "step": 18054 }, { "epoch": 0.5533590780924359, "grad_norm": 1.3737689250539238, "learning_rate": 4.380454640351924e-06, "loss": 0.6535, "step": 18055 }, { "epoch": 0.5533897266151772, "grad_norm": 1.3857887445733736, "learning_rate": 4.379962150402061e-06, "loss": 0.5758, "step": 18056 }, { "epoch": 0.5534203751379183, "grad_norm": 1.339944663917816, "learning_rate": 4.379469666561588e-06, "loss": 0.6061, "step": 18057 }, { "epoch": 0.5534510236606596, "grad_norm": 1.364143900499536, "learning_rate": 4.378977188835358e-06, "loss": 0.6338, "step": 18058 }, { "epoch": 0.5534816721834007, "grad_norm": 1.2400871798053343, "learning_rate": 4.378484717228226e-06, "loss": 0.6623, "step": 18059 }, { "epoch": 0.553512320706142, "grad_norm": 0.4609554669255363, "learning_rate": 4.377992251745043e-06, "loss": 0.3856, "step": 18060 }, { "epoch": 0.5535429692288831, "grad_norm": 1.1949623368450089, "learning_rate": 4.377499792390663e-06, "loss": 0.6412, "step": 18061 }, { "epoch": 0.5535736177516244, "grad_norm": 1.3612130655541888, "learning_rate": 4.377007339169935e-06, "loss": 0.5605, "step": 18062 }, { "epoch": 0.5536042662743655, "grad_norm": 1.338074964590859, "learning_rate": 4.376514892087713e-06, "loss": 0.6389, "step": 18063 }, { "epoch": 0.5536349147971068, "grad_norm": 0.47118660489055036, "learning_rate": 4.37602245114885e-06, "loss": 0.4208, "step": 18064 }, { "epoch": 0.553665563319848, "grad_norm": 1.2697041149867994, "learning_rate": 4.375530016358198e-06, "loss": 0.6329, "step": 18065 }, { "epoch": 0.5536962118425892, "grad_norm": 1.4047670540256838, "learning_rate": 4.375037587720606e-06, "loss": 0.7102, "step": 18066 }, { "epoch": 0.5537268603653304, "grad_norm": 1.3623388795107079, "learning_rate": 4.374545165240931e-06, "loss": 0.6531, "step": 18067 }, { "epoch": 0.5537575088880716, "grad_norm": 1.5770797134155539, "learning_rate": 4.374052748924022e-06, "loss": 0.7052, "step": 18068 }, { "epoch": 0.5537881574108128, "grad_norm": 0.43782517101674157, "learning_rate": 4.373560338774731e-06, "loss": 0.4014, "step": 18069 }, { "epoch": 0.553818805933554, "grad_norm": 1.2213565178647723, "learning_rate": 4.3730679347979114e-06, "loss": 0.7073, "step": 18070 }, { "epoch": 0.5538494544562952, "grad_norm": 1.2063568550270616, "learning_rate": 4.372575536998411e-06, "loss": 0.5378, "step": 18071 }, { "epoch": 0.5538801029790364, "grad_norm": 0.43389747801042083, "learning_rate": 4.372083145381087e-06, "loss": 0.3764, "step": 18072 }, { "epoch": 0.5539107515017776, "grad_norm": 1.31943103896086, "learning_rate": 4.371590759950789e-06, "loss": 0.5915, "step": 18073 }, { "epoch": 0.5539414000245189, "grad_norm": 1.3438408931276962, "learning_rate": 4.371098380712366e-06, "loss": 0.6802, "step": 18074 }, { "epoch": 0.55397204854726, "grad_norm": 0.4446163314473293, "learning_rate": 4.370606007670673e-06, "loss": 0.402, "step": 18075 }, { "epoch": 0.5540026970700013, "grad_norm": 1.2055050818291744, "learning_rate": 4.3701136408305575e-06, "loss": 0.6567, "step": 18076 }, { "epoch": 0.5540333455927424, "grad_norm": 1.2604269220556854, "learning_rate": 4.3696212801968765e-06, "loss": 0.6486, "step": 18077 }, { "epoch": 0.5540639941154837, "grad_norm": 1.1631233164941561, "learning_rate": 4.369128925774477e-06, "loss": 0.5732, "step": 18078 }, { "epoch": 0.5540946426382248, "grad_norm": 1.2158772137357343, "learning_rate": 4.368636577568211e-06, "loss": 0.5986, "step": 18079 }, { "epoch": 0.5541252911609661, "grad_norm": 1.318611296133059, "learning_rate": 4.368144235582931e-06, "loss": 0.6455, "step": 18080 }, { "epoch": 0.5541559396837072, "grad_norm": 1.3983065898840161, "learning_rate": 4.367651899823489e-06, "loss": 0.6691, "step": 18081 }, { "epoch": 0.5541865882064484, "grad_norm": 1.3196725748598939, "learning_rate": 4.367159570294731e-06, "loss": 0.6664, "step": 18082 }, { "epoch": 0.5542172367291897, "grad_norm": 1.2630883792942436, "learning_rate": 4.366667247001516e-06, "loss": 0.6741, "step": 18083 }, { "epoch": 0.5542478852519308, "grad_norm": 1.3458425267136238, "learning_rate": 4.3661749299486886e-06, "loss": 0.6599, "step": 18084 }, { "epoch": 0.5542785337746721, "grad_norm": 1.363033770262143, "learning_rate": 4.3656826191411014e-06, "loss": 0.6402, "step": 18085 }, { "epoch": 0.5543091822974132, "grad_norm": 0.4841092839168013, "learning_rate": 4.365190314583606e-06, "loss": 0.4047, "step": 18086 }, { "epoch": 0.5543398308201545, "grad_norm": 1.4173265730323799, "learning_rate": 4.364698016281052e-06, "loss": 0.672, "step": 18087 }, { "epoch": 0.5543704793428956, "grad_norm": 1.168715631562016, "learning_rate": 4.364205724238292e-06, "loss": 0.6311, "step": 18088 }, { "epoch": 0.5544011278656369, "grad_norm": 0.4697474537177557, "learning_rate": 4.363713438460176e-06, "loss": 0.3959, "step": 18089 }, { "epoch": 0.554431776388378, "grad_norm": 1.1828212537886673, "learning_rate": 4.3632211589515525e-06, "loss": 0.6132, "step": 18090 }, { "epoch": 0.5544624249111193, "grad_norm": 1.2065506645098731, "learning_rate": 4.362728885717275e-06, "loss": 0.6723, "step": 18091 }, { "epoch": 0.5544930734338605, "grad_norm": 0.44425136622491573, "learning_rate": 4.362236618762195e-06, "loss": 0.4008, "step": 18092 }, { "epoch": 0.5545237219566017, "grad_norm": 1.2452158098596378, "learning_rate": 4.361744358091157e-06, "loss": 0.6731, "step": 18093 }, { "epoch": 0.5545543704793429, "grad_norm": 1.2230482431090124, "learning_rate": 4.361252103709018e-06, "loss": 0.6004, "step": 18094 }, { "epoch": 0.5545850190020841, "grad_norm": 1.2825094893367281, "learning_rate": 4.360759855620622e-06, "loss": 0.6437, "step": 18095 }, { "epoch": 0.5546156675248253, "grad_norm": 1.3089350718351405, "learning_rate": 4.360267613830827e-06, "loss": 0.6863, "step": 18096 }, { "epoch": 0.5546463160475665, "grad_norm": 1.4297266967535474, "learning_rate": 4.359775378344478e-06, "loss": 0.5996, "step": 18097 }, { "epoch": 0.5546769645703077, "grad_norm": 1.2650695185625225, "learning_rate": 4.359283149166423e-06, "loss": 0.6911, "step": 18098 }, { "epoch": 0.554707613093049, "grad_norm": 1.2649037380925665, "learning_rate": 4.358790926301518e-06, "loss": 0.6105, "step": 18099 }, { "epoch": 0.5547382616157901, "grad_norm": 1.2367378461411305, "learning_rate": 4.3582987097546095e-06, "loss": 0.5614, "step": 18100 }, { "epoch": 0.5547689101385314, "grad_norm": 1.1292590459762688, "learning_rate": 4.357806499530547e-06, "loss": 0.5974, "step": 18101 }, { "epoch": 0.5547995586612725, "grad_norm": 1.288753388227235, "learning_rate": 4.357314295634182e-06, "loss": 0.6304, "step": 18102 }, { "epoch": 0.5548302071840138, "grad_norm": 1.4371886217954268, "learning_rate": 4.356822098070362e-06, "loss": 0.7127, "step": 18103 }, { "epoch": 0.5548608557067549, "grad_norm": 1.2741940122493935, "learning_rate": 4.356329906843941e-06, "loss": 0.6441, "step": 18104 }, { "epoch": 0.5548915042294962, "grad_norm": 0.46139820266339426, "learning_rate": 4.355837721959766e-06, "loss": 0.4336, "step": 18105 }, { "epoch": 0.5549221527522373, "grad_norm": 1.1468724739641891, "learning_rate": 4.355345543422686e-06, "loss": 0.6465, "step": 18106 }, { "epoch": 0.5549528012749786, "grad_norm": 1.1874207230336986, "learning_rate": 4.354853371237551e-06, "loss": 0.5382, "step": 18107 }, { "epoch": 0.5549834497977197, "grad_norm": 0.47070770504762327, "learning_rate": 4.354361205409212e-06, "loss": 0.4033, "step": 18108 }, { "epoch": 0.555014098320461, "grad_norm": 0.45550921749221746, "learning_rate": 4.353869045942515e-06, "loss": 0.4053, "step": 18109 }, { "epoch": 0.5550447468432022, "grad_norm": 0.43813685656464885, "learning_rate": 4.353376892842313e-06, "loss": 0.395, "step": 18110 }, { "epoch": 0.5550753953659434, "grad_norm": 1.245374892048978, "learning_rate": 4.352884746113454e-06, "loss": 0.5937, "step": 18111 }, { "epoch": 0.5551060438886846, "grad_norm": 1.3021560724327033, "learning_rate": 4.3523926057607866e-06, "loss": 0.6331, "step": 18112 }, { "epoch": 0.5551366924114257, "grad_norm": 0.42294204776086514, "learning_rate": 4.351900471789162e-06, "loss": 0.4008, "step": 18113 }, { "epoch": 0.555167340934167, "grad_norm": 1.0950382727702161, "learning_rate": 4.351408344203425e-06, "loss": 0.5405, "step": 18114 }, { "epoch": 0.5551979894569081, "grad_norm": 1.476519935295313, "learning_rate": 4.350916223008431e-06, "loss": 0.6524, "step": 18115 }, { "epoch": 0.5552286379796494, "grad_norm": 1.1806672009863062, "learning_rate": 4.350424108209024e-06, "loss": 0.5034, "step": 18116 }, { "epoch": 0.5552592865023905, "grad_norm": 1.2035893271323839, "learning_rate": 4.349931999810053e-06, "loss": 0.5022, "step": 18117 }, { "epoch": 0.5552899350251318, "grad_norm": 1.260417770526389, "learning_rate": 4.349439897816371e-06, "loss": 0.7295, "step": 18118 }, { "epoch": 0.555320583547873, "grad_norm": 1.464888775309697, "learning_rate": 4.348947802232823e-06, "loss": 0.6369, "step": 18119 }, { "epoch": 0.5553512320706142, "grad_norm": 1.2777055830311537, "learning_rate": 4.348455713064257e-06, "loss": 0.7237, "step": 18120 }, { "epoch": 0.5553818805933554, "grad_norm": 1.4091801486609188, "learning_rate": 4.347963630315526e-06, "loss": 0.6731, "step": 18121 }, { "epoch": 0.5554125291160966, "grad_norm": 1.4803976129675094, "learning_rate": 4.347471553991475e-06, "loss": 0.6136, "step": 18122 }, { "epoch": 0.5554431776388378, "grad_norm": 1.2507651262800548, "learning_rate": 4.346979484096954e-06, "loss": 0.6337, "step": 18123 }, { "epoch": 0.555473826161579, "grad_norm": 1.3327914287830191, "learning_rate": 4.346487420636812e-06, "loss": 0.5655, "step": 18124 }, { "epoch": 0.5555044746843202, "grad_norm": 0.46804024791888815, "learning_rate": 4.345995363615894e-06, "loss": 0.4059, "step": 18125 }, { "epoch": 0.5555351232070614, "grad_norm": 1.1547294958995136, "learning_rate": 4.345503313039056e-06, "loss": 0.5666, "step": 18126 }, { "epoch": 0.5555657717298026, "grad_norm": 1.3385495708719095, "learning_rate": 4.345011268911138e-06, "loss": 0.6108, "step": 18127 }, { "epoch": 0.5555964202525439, "grad_norm": 0.47461297898029203, "learning_rate": 4.344519231236991e-06, "loss": 0.4239, "step": 18128 }, { "epoch": 0.555627068775285, "grad_norm": 0.4910602967712972, "learning_rate": 4.344027200021465e-06, "loss": 0.4271, "step": 18129 }, { "epoch": 0.5556577172980263, "grad_norm": 1.252160006562998, "learning_rate": 4.343535175269406e-06, "loss": 0.6162, "step": 18130 }, { "epoch": 0.5556883658207674, "grad_norm": 1.2932575912350595, "learning_rate": 4.3430431569856635e-06, "loss": 0.5801, "step": 18131 }, { "epoch": 0.5557190143435087, "grad_norm": 1.3454197828098018, "learning_rate": 4.342551145175085e-06, "loss": 0.6152, "step": 18132 }, { "epoch": 0.5557496628662498, "grad_norm": 1.2929563735875687, "learning_rate": 4.342059139842517e-06, "loss": 0.575, "step": 18133 }, { "epoch": 0.5557803113889911, "grad_norm": 1.2837927320007572, "learning_rate": 4.34156714099281e-06, "loss": 0.6902, "step": 18134 }, { "epoch": 0.5558109599117322, "grad_norm": 1.2941011163952962, "learning_rate": 4.3410751486308116e-06, "loss": 0.6298, "step": 18135 }, { "epoch": 0.5558416084344735, "grad_norm": 1.1839282421188166, "learning_rate": 4.340583162761365e-06, "loss": 0.603, "step": 18136 }, { "epoch": 0.5558722569572146, "grad_norm": 1.2445920341019678, "learning_rate": 4.340091183389324e-06, "loss": 0.6395, "step": 18137 }, { "epoch": 0.5559029054799559, "grad_norm": 1.3270081789016626, "learning_rate": 4.339599210519533e-06, "loss": 0.7329, "step": 18138 }, { "epoch": 0.5559335540026971, "grad_norm": 1.309883768958598, "learning_rate": 4.3391072441568384e-06, "loss": 0.6707, "step": 18139 }, { "epoch": 0.5559642025254383, "grad_norm": 1.3350119587807157, "learning_rate": 4.338615284306091e-06, "loss": 0.6734, "step": 18140 }, { "epoch": 0.5559948510481795, "grad_norm": 0.4731146531503862, "learning_rate": 4.338123330972135e-06, "loss": 0.3965, "step": 18141 }, { "epoch": 0.5560254995709207, "grad_norm": 1.2383476495798094, "learning_rate": 4.337631384159819e-06, "loss": 0.5202, "step": 18142 }, { "epoch": 0.5560561480936619, "grad_norm": 1.2604162705562862, "learning_rate": 4.337139443873992e-06, "loss": 0.5758, "step": 18143 }, { "epoch": 0.556086796616403, "grad_norm": 1.1882402648279904, "learning_rate": 4.3366475101194984e-06, "loss": 0.6684, "step": 18144 }, { "epoch": 0.5561174451391443, "grad_norm": 1.0417520121706958, "learning_rate": 4.336155582901187e-06, "loss": 0.5045, "step": 18145 }, { "epoch": 0.5561480936618854, "grad_norm": 1.4247932175751825, "learning_rate": 4.335663662223907e-06, "loss": 0.7054, "step": 18146 }, { "epoch": 0.5561787421846267, "grad_norm": 0.4528661572742281, "learning_rate": 4.3351717480924986e-06, "loss": 0.4385, "step": 18147 }, { "epoch": 0.5562093907073679, "grad_norm": 1.3335550305788564, "learning_rate": 4.334679840511816e-06, "loss": 0.6258, "step": 18148 }, { "epoch": 0.5562400392301091, "grad_norm": 1.2809101845895647, "learning_rate": 4.3341879394867e-06, "loss": 0.7034, "step": 18149 }, { "epoch": 0.5562706877528503, "grad_norm": 0.434527392607154, "learning_rate": 4.333696045022005e-06, "loss": 0.3926, "step": 18150 }, { "epoch": 0.5563013362755915, "grad_norm": 1.3272544502243633, "learning_rate": 4.333204157122571e-06, "loss": 0.6158, "step": 18151 }, { "epoch": 0.5563319847983327, "grad_norm": 1.2271521589001688, "learning_rate": 4.332712275793246e-06, "loss": 0.5275, "step": 18152 }, { "epoch": 0.5563626333210739, "grad_norm": 1.280821745303445, "learning_rate": 4.332220401038879e-06, "loss": 0.6119, "step": 18153 }, { "epoch": 0.5563932818438151, "grad_norm": 1.2182382999679475, "learning_rate": 4.331728532864315e-06, "loss": 0.6282, "step": 18154 }, { "epoch": 0.5564239303665564, "grad_norm": 1.202092841856029, "learning_rate": 4.3312366712744e-06, "loss": 0.5459, "step": 18155 }, { "epoch": 0.5564545788892975, "grad_norm": 1.339048120879532, "learning_rate": 4.330744816273983e-06, "loss": 0.7141, "step": 18156 }, { "epoch": 0.5564852274120388, "grad_norm": 1.2632492128729302, "learning_rate": 4.330252967867906e-06, "loss": 0.657, "step": 18157 }, { "epoch": 0.5565158759347799, "grad_norm": 1.3639131087500151, "learning_rate": 4.329761126061021e-06, "loss": 0.6369, "step": 18158 }, { "epoch": 0.5565465244575212, "grad_norm": 0.4591309843223946, "learning_rate": 4.329269290858171e-06, "loss": 0.4216, "step": 18159 }, { "epoch": 0.5565771729802623, "grad_norm": 1.165295527412196, "learning_rate": 4.3287774622641995e-06, "loss": 0.6271, "step": 18160 }, { "epoch": 0.5566078215030036, "grad_norm": 1.1378331549427365, "learning_rate": 4.328285640283957e-06, "loss": 0.5757, "step": 18161 }, { "epoch": 0.5566384700257447, "grad_norm": 1.4445299827668685, "learning_rate": 4.327793824922288e-06, "loss": 0.6513, "step": 18162 }, { "epoch": 0.556669118548486, "grad_norm": 0.44421146278698703, "learning_rate": 4.327302016184037e-06, "loss": 0.4232, "step": 18163 }, { "epoch": 0.5566997670712271, "grad_norm": 1.3373461616387072, "learning_rate": 4.326810214074053e-06, "loss": 0.6544, "step": 18164 }, { "epoch": 0.5567304155939684, "grad_norm": 1.362308069028432, "learning_rate": 4.326318418597181e-06, "loss": 0.6764, "step": 18165 }, { "epoch": 0.5567610641167096, "grad_norm": 1.1973440051241737, "learning_rate": 4.325826629758263e-06, "loss": 0.6649, "step": 18166 }, { "epoch": 0.5567917126394508, "grad_norm": 1.4039971402859448, "learning_rate": 4.325334847562151e-06, "loss": 0.6576, "step": 18167 }, { "epoch": 0.556822361162192, "grad_norm": 1.1413993407592624, "learning_rate": 4.324843072013684e-06, "loss": 0.5905, "step": 18168 }, { "epoch": 0.5568530096849332, "grad_norm": 1.331589621034693, "learning_rate": 4.324351303117714e-06, "loss": 0.6045, "step": 18169 }, { "epoch": 0.5568836582076744, "grad_norm": 0.4544868252192489, "learning_rate": 4.3238595408790825e-06, "loss": 0.4034, "step": 18170 }, { "epoch": 0.5569143067304156, "grad_norm": 0.46693658269265265, "learning_rate": 4.323367785302634e-06, "loss": 0.4146, "step": 18171 }, { "epoch": 0.5569449552531568, "grad_norm": 1.2528131942070015, "learning_rate": 4.3228760363932186e-06, "loss": 0.5511, "step": 18172 }, { "epoch": 0.556975603775898, "grad_norm": 1.2470574340667042, "learning_rate": 4.322384294155678e-06, "loss": 0.6527, "step": 18173 }, { "epoch": 0.5570062522986392, "grad_norm": 0.43958216325230043, "learning_rate": 4.321892558594857e-06, "loss": 0.3973, "step": 18174 }, { "epoch": 0.5570369008213804, "grad_norm": 1.4686907749586895, "learning_rate": 4.321400829715604e-06, "loss": 0.7463, "step": 18175 }, { "epoch": 0.5570675493441216, "grad_norm": 1.3403739559581842, "learning_rate": 4.3209091075227605e-06, "loss": 0.7277, "step": 18176 }, { "epoch": 0.5570981978668628, "grad_norm": 1.1225736985978483, "learning_rate": 4.320417392021175e-06, "loss": 0.5879, "step": 18177 }, { "epoch": 0.557128846389604, "grad_norm": 1.287847190772821, "learning_rate": 4.319925683215691e-06, "loss": 0.6713, "step": 18178 }, { "epoch": 0.5571594949123452, "grad_norm": 1.2663048489062012, "learning_rate": 4.319433981111151e-06, "loss": 0.657, "step": 18179 }, { "epoch": 0.5571901434350864, "grad_norm": 1.3188503179537376, "learning_rate": 4.318942285712404e-06, "loss": 0.6364, "step": 18180 }, { "epoch": 0.5572207919578276, "grad_norm": 1.2150239566575753, "learning_rate": 4.3184505970242926e-06, "loss": 0.6369, "step": 18181 }, { "epoch": 0.5572514404805688, "grad_norm": 1.2304156181210777, "learning_rate": 4.317958915051661e-06, "loss": 0.6888, "step": 18182 }, { "epoch": 0.55728208900331, "grad_norm": 1.2047384753135875, "learning_rate": 4.317467239799355e-06, "loss": 0.6225, "step": 18183 }, { "epoch": 0.5573127375260513, "grad_norm": 1.2636455089782352, "learning_rate": 4.31697557127222e-06, "loss": 0.6661, "step": 18184 }, { "epoch": 0.5573433860487924, "grad_norm": 1.2271752127626672, "learning_rate": 4.316483909475098e-06, "loss": 0.6444, "step": 18185 }, { "epoch": 0.5573740345715337, "grad_norm": 1.1551027845684096, "learning_rate": 4.315992254412836e-06, "loss": 0.5857, "step": 18186 }, { "epoch": 0.5574046830942748, "grad_norm": 1.1358066303746879, "learning_rate": 4.315500606090276e-06, "loss": 0.6228, "step": 18187 }, { "epoch": 0.5574353316170161, "grad_norm": 0.49518442734600715, "learning_rate": 4.315008964512265e-06, "loss": 0.3952, "step": 18188 }, { "epoch": 0.5574659801397572, "grad_norm": 0.5075091667108668, "learning_rate": 4.3145173296836475e-06, "loss": 0.4162, "step": 18189 }, { "epoch": 0.5574966286624985, "grad_norm": 0.5111966058574382, "learning_rate": 4.314025701609262e-06, "loss": 0.4037, "step": 18190 }, { "epoch": 0.5575272771852396, "grad_norm": 1.4003609574849707, "learning_rate": 4.3135340802939605e-06, "loss": 0.6362, "step": 18191 }, { "epoch": 0.5575579257079809, "grad_norm": 0.43377786122363154, "learning_rate": 4.313042465742582e-06, "loss": 0.3913, "step": 18192 }, { "epoch": 0.5575885742307221, "grad_norm": 1.3848142322592873, "learning_rate": 4.31255085795997e-06, "loss": 0.6701, "step": 18193 }, { "epoch": 0.5576192227534633, "grad_norm": 0.4770966916499853, "learning_rate": 4.312059256950973e-06, "loss": 0.405, "step": 18194 }, { "epoch": 0.5576498712762045, "grad_norm": 1.167194583547409, "learning_rate": 4.3115676627204305e-06, "loss": 0.654, "step": 18195 }, { "epoch": 0.5576805197989457, "grad_norm": 1.3176950607669815, "learning_rate": 4.311076075273189e-06, "loss": 0.5989, "step": 18196 }, { "epoch": 0.5577111683216869, "grad_norm": 1.2472981137983297, "learning_rate": 4.310584494614091e-06, "loss": 0.6931, "step": 18197 }, { "epoch": 0.5577418168444281, "grad_norm": 1.2437167862679188, "learning_rate": 4.310092920747979e-06, "loss": 0.6727, "step": 18198 }, { "epoch": 0.5577724653671693, "grad_norm": 1.3497727896286982, "learning_rate": 4.3096013536797e-06, "loss": 0.548, "step": 18199 }, { "epoch": 0.5578031138899106, "grad_norm": 1.3181554792039185, "learning_rate": 4.309109793414096e-06, "loss": 0.651, "step": 18200 }, { "epoch": 0.5578337624126517, "grad_norm": 1.2351982072380996, "learning_rate": 4.308618239956006e-06, "loss": 0.6925, "step": 18201 }, { "epoch": 0.557864410935393, "grad_norm": 0.5173699121806611, "learning_rate": 4.308126693310281e-06, "loss": 0.3989, "step": 18202 }, { "epoch": 0.5578950594581341, "grad_norm": 1.2914788633494991, "learning_rate": 4.307635153481759e-06, "loss": 0.7195, "step": 18203 }, { "epoch": 0.5579257079808754, "grad_norm": 1.276098557906688, "learning_rate": 4.307143620475287e-06, "loss": 0.6593, "step": 18204 }, { "epoch": 0.5579563565036165, "grad_norm": 1.3272784737837684, "learning_rate": 4.306652094295705e-06, "loss": 0.66, "step": 18205 }, { "epoch": 0.5579870050263577, "grad_norm": 0.48122817776651794, "learning_rate": 4.306160574947856e-06, "loss": 0.4284, "step": 18206 }, { "epoch": 0.5580176535490989, "grad_norm": 1.3007570431777888, "learning_rate": 4.305669062436586e-06, "loss": 0.6207, "step": 18207 }, { "epoch": 0.5580483020718401, "grad_norm": 1.335773697875705, "learning_rate": 4.305177556766736e-06, "loss": 0.6391, "step": 18208 }, { "epoch": 0.5580789505945813, "grad_norm": 1.1844541259702193, "learning_rate": 4.3046860579431485e-06, "loss": 0.7003, "step": 18209 }, { "epoch": 0.5581095991173225, "grad_norm": 1.400754337060809, "learning_rate": 4.30419456597067e-06, "loss": 0.6735, "step": 18210 }, { "epoch": 0.5581402476400638, "grad_norm": 1.4840752662065164, "learning_rate": 4.303703080854138e-06, "loss": 0.6147, "step": 18211 }, { "epoch": 0.5581708961628049, "grad_norm": 1.4155873549696314, "learning_rate": 4.3032116025983975e-06, "loss": 0.6502, "step": 18212 }, { "epoch": 0.5582015446855462, "grad_norm": 1.0914892987055222, "learning_rate": 4.302720131208292e-06, "loss": 0.5204, "step": 18213 }, { "epoch": 0.5582321932082873, "grad_norm": 0.4720354536144329, "learning_rate": 4.302228666688663e-06, "loss": 0.414, "step": 18214 }, { "epoch": 0.5582628417310286, "grad_norm": 1.276180987273358, "learning_rate": 4.3017372090443545e-06, "loss": 0.6122, "step": 18215 }, { "epoch": 0.5582934902537697, "grad_norm": 1.2482258557857246, "learning_rate": 4.3012457582802076e-06, "loss": 0.479, "step": 18216 }, { "epoch": 0.558324138776511, "grad_norm": 0.4635664215258304, "learning_rate": 4.300754314401064e-06, "loss": 0.4019, "step": 18217 }, { "epoch": 0.5583547872992521, "grad_norm": 1.1081918075342911, "learning_rate": 4.300262877411767e-06, "loss": 0.5691, "step": 18218 }, { "epoch": 0.5583854358219934, "grad_norm": 1.2085155777618077, "learning_rate": 4.299771447317162e-06, "loss": 0.6083, "step": 18219 }, { "epoch": 0.5584160843447346, "grad_norm": 1.2295748447873724, "learning_rate": 4.299280024122084e-06, "loss": 0.662, "step": 18220 }, { "epoch": 0.5584467328674758, "grad_norm": 1.3295817593877828, "learning_rate": 4.298788607831382e-06, "loss": 0.6577, "step": 18221 }, { "epoch": 0.558477381390217, "grad_norm": 1.2696782516964122, "learning_rate": 4.2982971984498924e-06, "loss": 0.6268, "step": 18222 }, { "epoch": 0.5585080299129582, "grad_norm": 1.3351174402656025, "learning_rate": 4.297805795982464e-06, "loss": 0.6097, "step": 18223 }, { "epoch": 0.5585386784356994, "grad_norm": 1.277366558426648, "learning_rate": 4.2973144004339325e-06, "loss": 0.6755, "step": 18224 }, { "epoch": 0.5585693269584406, "grad_norm": 1.2630284761168311, "learning_rate": 4.296823011809142e-06, "loss": 0.7067, "step": 18225 }, { "epoch": 0.5585999754811818, "grad_norm": 1.2152178837718768, "learning_rate": 4.2963316301129345e-06, "loss": 0.6486, "step": 18226 }, { "epoch": 0.558630624003923, "grad_norm": 1.1982315770131686, "learning_rate": 4.295840255350151e-06, "loss": 0.6378, "step": 18227 }, { "epoch": 0.5586612725266642, "grad_norm": 1.3543117889356209, "learning_rate": 4.295348887525633e-06, "loss": 0.6711, "step": 18228 }, { "epoch": 0.5586919210494055, "grad_norm": 1.27811668784372, "learning_rate": 4.294857526644225e-06, "loss": 0.597, "step": 18229 }, { "epoch": 0.5587225695721466, "grad_norm": 1.4443588600970583, "learning_rate": 4.294366172710764e-06, "loss": 0.6801, "step": 18230 }, { "epoch": 0.5587532180948879, "grad_norm": 1.3972553298956394, "learning_rate": 4.293874825730095e-06, "loss": 0.5969, "step": 18231 }, { "epoch": 0.558783866617629, "grad_norm": 1.329421008238242, "learning_rate": 4.293383485707059e-06, "loss": 0.6009, "step": 18232 }, { "epoch": 0.5588145151403703, "grad_norm": 1.520669755931042, "learning_rate": 4.292892152646493e-06, "loss": 0.7432, "step": 18233 }, { "epoch": 0.5588451636631114, "grad_norm": 1.2705857864689272, "learning_rate": 4.292400826553245e-06, "loss": 0.5937, "step": 18234 }, { "epoch": 0.5588758121858527, "grad_norm": 1.1500122205830985, "learning_rate": 4.291909507432151e-06, "loss": 0.6662, "step": 18235 }, { "epoch": 0.5589064607085938, "grad_norm": 1.436674381844574, "learning_rate": 4.291418195288053e-06, "loss": 0.7211, "step": 18236 }, { "epoch": 0.558937109231335, "grad_norm": 1.2960784118976947, "learning_rate": 4.290926890125794e-06, "loss": 0.7569, "step": 18237 }, { "epoch": 0.5589677577540763, "grad_norm": 1.2087407427568804, "learning_rate": 4.290435591950215e-06, "loss": 0.602, "step": 18238 }, { "epoch": 0.5589984062768174, "grad_norm": 1.3278856226997398, "learning_rate": 4.289944300766153e-06, "loss": 0.6844, "step": 18239 }, { "epoch": 0.5590290547995587, "grad_norm": 1.261122596057871, "learning_rate": 4.289453016578453e-06, "loss": 0.5908, "step": 18240 }, { "epoch": 0.5590597033222998, "grad_norm": 1.1822312305040514, "learning_rate": 4.288961739391953e-06, "loss": 0.5876, "step": 18241 }, { "epoch": 0.5590903518450411, "grad_norm": 0.47810458290577185, "learning_rate": 4.2884704692114965e-06, "loss": 0.4068, "step": 18242 }, { "epoch": 0.5591210003677822, "grad_norm": 1.3718905122621659, "learning_rate": 4.287979206041923e-06, "loss": 0.6727, "step": 18243 }, { "epoch": 0.5591516488905235, "grad_norm": 0.4638724817636791, "learning_rate": 4.287487949888069e-06, "loss": 0.4038, "step": 18244 }, { "epoch": 0.5591822974132646, "grad_norm": 1.353871461949097, "learning_rate": 4.286996700754783e-06, "loss": 0.6428, "step": 18245 }, { "epoch": 0.5592129459360059, "grad_norm": 1.2861753672792064, "learning_rate": 4.286505458646899e-06, "loss": 0.6359, "step": 18246 }, { "epoch": 0.559243594458747, "grad_norm": 2.1011642600814566, "learning_rate": 4.286014223569258e-06, "loss": 0.6831, "step": 18247 }, { "epoch": 0.5592742429814883, "grad_norm": 1.132050932350532, "learning_rate": 4.285522995526703e-06, "loss": 0.6595, "step": 18248 }, { "epoch": 0.5593048915042295, "grad_norm": 0.44058078732108635, "learning_rate": 4.285031774524072e-06, "loss": 0.41, "step": 18249 }, { "epoch": 0.5593355400269707, "grad_norm": 1.3454533867744194, "learning_rate": 4.284540560566207e-06, "loss": 0.5943, "step": 18250 }, { "epoch": 0.5593661885497119, "grad_norm": 1.262727934409373, "learning_rate": 4.284049353657946e-06, "loss": 0.6289, "step": 18251 }, { "epoch": 0.5593968370724531, "grad_norm": 1.1602841317500017, "learning_rate": 4.28355815380413e-06, "loss": 0.5604, "step": 18252 }, { "epoch": 0.5594274855951943, "grad_norm": 1.3674687801888292, "learning_rate": 4.283066961009599e-06, "loss": 0.7165, "step": 18253 }, { "epoch": 0.5594581341179355, "grad_norm": 1.1638278285604378, "learning_rate": 4.282575775279194e-06, "loss": 0.598, "step": 18254 }, { "epoch": 0.5594887826406767, "grad_norm": 1.4595349255812784, "learning_rate": 4.282084596617752e-06, "loss": 0.6072, "step": 18255 }, { "epoch": 0.559519431163418, "grad_norm": 1.312507065704752, "learning_rate": 4.281593425030114e-06, "loss": 0.6902, "step": 18256 }, { "epoch": 0.5595500796861591, "grad_norm": 1.2984127481215022, "learning_rate": 4.281102260521119e-06, "loss": 0.5519, "step": 18257 }, { "epoch": 0.5595807282089004, "grad_norm": 1.475469365483119, "learning_rate": 4.280611103095609e-06, "loss": 0.7032, "step": 18258 }, { "epoch": 0.5596113767316415, "grad_norm": 1.3485296786642418, "learning_rate": 4.280119952758422e-06, "loss": 0.665, "step": 18259 }, { "epoch": 0.5596420252543828, "grad_norm": 1.2148569217213507, "learning_rate": 4.279628809514395e-06, "loss": 0.5911, "step": 18260 }, { "epoch": 0.5596726737771239, "grad_norm": 1.2292874618311773, "learning_rate": 4.279137673368371e-06, "loss": 0.5605, "step": 18261 }, { "epoch": 0.5597033222998652, "grad_norm": 1.2841063300218807, "learning_rate": 4.27864654432519e-06, "loss": 0.6609, "step": 18262 }, { "epoch": 0.5597339708226063, "grad_norm": 1.511587348417187, "learning_rate": 4.278155422389685e-06, "loss": 0.6664, "step": 18263 }, { "epoch": 0.5597646193453476, "grad_norm": 1.1505737419053723, "learning_rate": 4.277664307566703e-06, "loss": 0.675, "step": 18264 }, { "epoch": 0.5597952678680888, "grad_norm": 1.4032971400616, "learning_rate": 4.277173199861079e-06, "loss": 0.6746, "step": 18265 }, { "epoch": 0.55982591639083, "grad_norm": 1.350270993775446, "learning_rate": 4.27668209927765e-06, "loss": 0.6822, "step": 18266 }, { "epoch": 0.5598565649135712, "grad_norm": 1.1936162906072603, "learning_rate": 4.2761910058212595e-06, "loss": 0.608, "step": 18267 }, { "epoch": 0.5598872134363123, "grad_norm": 1.2466776580788592, "learning_rate": 4.275699919496742e-06, "loss": 0.6816, "step": 18268 }, { "epoch": 0.5599178619590536, "grad_norm": 1.3684775043928747, "learning_rate": 4.275208840308941e-06, "loss": 0.7123, "step": 18269 }, { "epoch": 0.5599485104817947, "grad_norm": 0.4544138562651929, "learning_rate": 4.274717768262692e-06, "loss": 0.4277, "step": 18270 }, { "epoch": 0.559979159004536, "grad_norm": 0.4572662280396214, "learning_rate": 4.274226703362833e-06, "loss": 0.3941, "step": 18271 }, { "epoch": 0.5600098075272771, "grad_norm": 1.2459604567040123, "learning_rate": 4.273735645614206e-06, "loss": 0.6565, "step": 18272 }, { "epoch": 0.5600404560500184, "grad_norm": 1.278159187138185, "learning_rate": 4.273244595021648e-06, "loss": 0.702, "step": 18273 }, { "epoch": 0.5600711045727595, "grad_norm": 1.2234584646983722, "learning_rate": 4.272753551589993e-06, "loss": 0.6619, "step": 18274 }, { "epoch": 0.5601017530955008, "grad_norm": 1.2307242903469182, "learning_rate": 4.272262515324088e-06, "loss": 0.6546, "step": 18275 }, { "epoch": 0.560132401618242, "grad_norm": 1.0510820351298495, "learning_rate": 4.271771486228762e-06, "loss": 0.5567, "step": 18276 }, { "epoch": 0.5601630501409832, "grad_norm": 0.438853913351453, "learning_rate": 4.2712804643088625e-06, "loss": 0.4068, "step": 18277 }, { "epoch": 0.5601936986637244, "grad_norm": 1.4518311812324782, "learning_rate": 4.2707894495692205e-06, "loss": 0.6209, "step": 18278 }, { "epoch": 0.5602243471864656, "grad_norm": 1.3242439055920574, "learning_rate": 4.270298442014677e-06, "loss": 0.6585, "step": 18279 }, { "epoch": 0.5602549957092068, "grad_norm": 1.171924466171197, "learning_rate": 4.26980744165007e-06, "loss": 0.5697, "step": 18280 }, { "epoch": 0.560285644231948, "grad_norm": 1.2568869235887552, "learning_rate": 4.269316448480237e-06, "loss": 0.7083, "step": 18281 }, { "epoch": 0.5603162927546892, "grad_norm": 1.5229704604541938, "learning_rate": 4.268825462510015e-06, "loss": 0.6526, "step": 18282 }, { "epoch": 0.5603469412774305, "grad_norm": 1.3410171527120456, "learning_rate": 4.268334483744244e-06, "loss": 0.6756, "step": 18283 }, { "epoch": 0.5603775898001716, "grad_norm": 1.351330142075373, "learning_rate": 4.26784351218776e-06, "loss": 0.6875, "step": 18284 }, { "epoch": 0.5604082383229129, "grad_norm": 2.280384769735448, "learning_rate": 4.267352547845401e-06, "loss": 0.6639, "step": 18285 }, { "epoch": 0.560438886845654, "grad_norm": 1.385614738944921, "learning_rate": 4.266861590722007e-06, "loss": 0.6447, "step": 18286 }, { "epoch": 0.5604695353683953, "grad_norm": 1.3002759111099607, "learning_rate": 4.2663706408224094e-06, "loss": 0.7075, "step": 18287 }, { "epoch": 0.5605001838911364, "grad_norm": 1.328466046462302, "learning_rate": 4.265879698151453e-06, "loss": 0.6196, "step": 18288 }, { "epoch": 0.5605308324138777, "grad_norm": 1.9704765496700578, "learning_rate": 4.26538876271397e-06, "loss": 0.6369, "step": 18289 }, { "epoch": 0.5605614809366188, "grad_norm": 1.3115841661280416, "learning_rate": 4.2648978345147995e-06, "loss": 0.6546, "step": 18290 }, { "epoch": 0.5605921294593601, "grad_norm": 1.1002427822762109, "learning_rate": 4.264406913558779e-06, "loss": 0.6214, "step": 18291 }, { "epoch": 0.5606227779821013, "grad_norm": 0.4520325568306245, "learning_rate": 4.263915999850746e-06, "loss": 0.3978, "step": 18292 }, { "epoch": 0.5606534265048425, "grad_norm": 0.450665096303924, "learning_rate": 4.263425093395536e-06, "loss": 0.3928, "step": 18293 }, { "epoch": 0.5606840750275837, "grad_norm": 0.4665425349744901, "learning_rate": 4.2629341941979885e-06, "loss": 0.4052, "step": 18294 }, { "epoch": 0.5607147235503249, "grad_norm": 1.2815653605621233, "learning_rate": 4.262443302262937e-06, "loss": 0.6849, "step": 18295 }, { "epoch": 0.5607453720730661, "grad_norm": 1.3352285984381747, "learning_rate": 4.261952417595222e-06, "loss": 0.6669, "step": 18296 }, { "epoch": 0.5607760205958073, "grad_norm": 1.4191094377462898, "learning_rate": 4.261461540199679e-06, "loss": 0.6438, "step": 18297 }, { "epoch": 0.5608066691185485, "grad_norm": 1.2775064140653078, "learning_rate": 4.2609706700811424e-06, "loss": 0.5407, "step": 18298 }, { "epoch": 0.5608373176412896, "grad_norm": 1.139346064001948, "learning_rate": 4.260479807244452e-06, "loss": 0.5913, "step": 18299 }, { "epoch": 0.5608679661640309, "grad_norm": 0.4577147946707151, "learning_rate": 4.2599889516944435e-06, "loss": 0.3984, "step": 18300 }, { "epoch": 0.560898614686772, "grad_norm": 1.2296765968797778, "learning_rate": 4.259498103435953e-06, "loss": 0.5993, "step": 18301 }, { "epoch": 0.5609292632095133, "grad_norm": 1.3235130235904862, "learning_rate": 4.259007262473817e-06, "loss": 0.5927, "step": 18302 }, { "epoch": 0.5609599117322545, "grad_norm": 0.4680760914661513, "learning_rate": 4.258516428812871e-06, "loss": 0.4026, "step": 18303 }, { "epoch": 0.5609905602549957, "grad_norm": 1.1740580515443622, "learning_rate": 4.258025602457954e-06, "loss": 0.6537, "step": 18304 }, { "epoch": 0.5610212087777369, "grad_norm": 1.275698985319851, "learning_rate": 4.2575347834139e-06, "loss": 0.6065, "step": 18305 }, { "epoch": 0.5610518573004781, "grad_norm": 1.2161098189509576, "learning_rate": 4.257043971685545e-06, "loss": 0.5688, "step": 18306 }, { "epoch": 0.5610825058232193, "grad_norm": 1.433117328471283, "learning_rate": 4.256553167277729e-06, "loss": 0.6479, "step": 18307 }, { "epoch": 0.5611131543459605, "grad_norm": 1.3571629554731295, "learning_rate": 4.256062370195282e-06, "loss": 0.6806, "step": 18308 }, { "epoch": 0.5611438028687017, "grad_norm": 1.2759376119912413, "learning_rate": 4.2555715804430425e-06, "loss": 0.6202, "step": 18309 }, { "epoch": 0.561174451391443, "grad_norm": 1.3344139311021974, "learning_rate": 4.255080798025848e-06, "loss": 0.6503, "step": 18310 }, { "epoch": 0.5612050999141841, "grad_norm": 0.45463081923195464, "learning_rate": 4.2545900229485315e-06, "loss": 0.4248, "step": 18311 }, { "epoch": 0.5612357484369254, "grad_norm": 1.3461239487903822, "learning_rate": 4.254099255215931e-06, "loss": 0.636, "step": 18312 }, { "epoch": 0.5612663969596665, "grad_norm": 1.2303768457219937, "learning_rate": 4.253608494832882e-06, "loss": 0.5477, "step": 18313 }, { "epoch": 0.5612970454824078, "grad_norm": 1.2590695782930863, "learning_rate": 4.253117741804219e-06, "loss": 0.6225, "step": 18314 }, { "epoch": 0.5613276940051489, "grad_norm": 1.2571336841417913, "learning_rate": 4.252626996134778e-06, "loss": 0.6317, "step": 18315 }, { "epoch": 0.5613583425278902, "grad_norm": 1.2255449351415357, "learning_rate": 4.252136257829396e-06, "loss": 0.6924, "step": 18316 }, { "epoch": 0.5613889910506313, "grad_norm": 0.48313681005642883, "learning_rate": 4.251645526892903e-06, "loss": 0.4284, "step": 18317 }, { "epoch": 0.5614196395733726, "grad_norm": 0.45016887636720265, "learning_rate": 4.251154803330142e-06, "loss": 0.3988, "step": 18318 }, { "epoch": 0.5614502880961137, "grad_norm": 1.305332300946407, "learning_rate": 4.250664087145943e-06, "loss": 0.6335, "step": 18319 }, { "epoch": 0.561480936618855, "grad_norm": 1.291447441685951, "learning_rate": 4.250173378345141e-06, "loss": 0.6367, "step": 18320 }, { "epoch": 0.5615115851415962, "grad_norm": 1.2661572585960417, "learning_rate": 4.2496826769325735e-06, "loss": 0.5981, "step": 18321 }, { "epoch": 0.5615422336643374, "grad_norm": 1.3023625870153714, "learning_rate": 4.249191982913074e-06, "loss": 0.64, "step": 18322 }, { "epoch": 0.5615728821870786, "grad_norm": 1.274476736071588, "learning_rate": 4.248701296291479e-06, "loss": 0.6724, "step": 18323 }, { "epoch": 0.5616035307098198, "grad_norm": 1.133059060113983, "learning_rate": 4.248210617072623e-06, "loss": 0.5972, "step": 18324 }, { "epoch": 0.561634179232561, "grad_norm": 1.1745685216676907, "learning_rate": 4.247719945261338e-06, "loss": 0.6597, "step": 18325 }, { "epoch": 0.5616648277553022, "grad_norm": 1.198848797463691, "learning_rate": 4.247229280862463e-06, "loss": 0.6032, "step": 18326 }, { "epoch": 0.5616954762780434, "grad_norm": 1.393267459761848, "learning_rate": 4.246738623880831e-06, "loss": 0.6954, "step": 18327 }, { "epoch": 0.5617261248007847, "grad_norm": 1.264223111517761, "learning_rate": 4.246247974321273e-06, "loss": 0.7008, "step": 18328 }, { "epoch": 0.5617567733235258, "grad_norm": 1.3500420375115454, "learning_rate": 4.245757332188629e-06, "loss": 0.6376, "step": 18329 }, { "epoch": 0.561787421846267, "grad_norm": 1.47282298432716, "learning_rate": 4.245266697487729e-06, "loss": 0.6692, "step": 18330 }, { "epoch": 0.5618180703690082, "grad_norm": 1.4086585779364065, "learning_rate": 4.244776070223412e-06, "loss": 0.6891, "step": 18331 }, { "epoch": 0.5618487188917494, "grad_norm": 1.2957229670932304, "learning_rate": 4.244285450400508e-06, "loss": 0.7012, "step": 18332 }, { "epoch": 0.5618793674144906, "grad_norm": 1.281655708633364, "learning_rate": 4.2437948380238525e-06, "loss": 0.6396, "step": 18333 }, { "epoch": 0.5619100159372318, "grad_norm": 0.4713944806437629, "learning_rate": 4.2433042330982805e-06, "loss": 0.4103, "step": 18334 }, { "epoch": 0.561940664459973, "grad_norm": 1.1852584677451246, "learning_rate": 4.242813635628626e-06, "loss": 0.5291, "step": 18335 }, { "epoch": 0.5619713129827142, "grad_norm": 0.4829368735441589, "learning_rate": 4.242323045619721e-06, "loss": 0.4058, "step": 18336 }, { "epoch": 0.5620019615054554, "grad_norm": 1.2292818609138625, "learning_rate": 4.241832463076402e-06, "loss": 0.6597, "step": 18337 }, { "epoch": 0.5620326100281966, "grad_norm": 1.514882151783803, "learning_rate": 4.241341888003501e-06, "loss": 0.673, "step": 18338 }, { "epoch": 0.5620632585509379, "grad_norm": 1.3789855973963254, "learning_rate": 4.240851320405853e-06, "loss": 0.6489, "step": 18339 }, { "epoch": 0.562093907073679, "grad_norm": 1.13507900070802, "learning_rate": 4.240360760288293e-06, "loss": 0.5567, "step": 18340 }, { "epoch": 0.5621245555964203, "grad_norm": 1.4595140943777192, "learning_rate": 4.239870207655648e-06, "loss": 0.6314, "step": 18341 }, { "epoch": 0.5621552041191614, "grad_norm": 1.3372887079578673, "learning_rate": 4.239379662512761e-06, "loss": 0.6322, "step": 18342 }, { "epoch": 0.5621858526419027, "grad_norm": 1.3506876984402323, "learning_rate": 4.238889124864461e-06, "loss": 0.6747, "step": 18343 }, { "epoch": 0.5622165011646438, "grad_norm": 1.4518406645642823, "learning_rate": 4.238398594715577e-06, "loss": 0.7643, "step": 18344 }, { "epoch": 0.5622471496873851, "grad_norm": 1.3681441483105319, "learning_rate": 4.23790807207095e-06, "loss": 0.6461, "step": 18345 }, { "epoch": 0.5622777982101262, "grad_norm": 1.2176298021494492, "learning_rate": 4.237417556935409e-06, "loss": 0.5948, "step": 18346 }, { "epoch": 0.5623084467328675, "grad_norm": 1.245535275151408, "learning_rate": 4.236927049313786e-06, "loss": 0.584, "step": 18347 }, { "epoch": 0.5623390952556087, "grad_norm": 1.2586853435255896, "learning_rate": 4.236436549210918e-06, "loss": 0.5509, "step": 18348 }, { "epoch": 0.5623697437783499, "grad_norm": 1.1920767147927827, "learning_rate": 4.235946056631635e-06, "loss": 0.6387, "step": 18349 }, { "epoch": 0.5624003923010911, "grad_norm": 1.1793027577468398, "learning_rate": 4.2354555715807735e-06, "loss": 0.6179, "step": 18350 }, { "epoch": 0.5624310408238323, "grad_norm": 1.5976918744473687, "learning_rate": 4.2349650940631615e-06, "loss": 0.6433, "step": 18351 }, { "epoch": 0.5624616893465735, "grad_norm": 1.1169225073667677, "learning_rate": 4.2344746240836345e-06, "loss": 0.6308, "step": 18352 }, { "epoch": 0.5624923378693147, "grad_norm": 1.1777593029913578, "learning_rate": 4.233984161647025e-06, "loss": 0.657, "step": 18353 }, { "epoch": 0.5625229863920559, "grad_norm": 1.248002870004235, "learning_rate": 4.233493706758166e-06, "loss": 0.6423, "step": 18354 }, { "epoch": 0.5625536349147972, "grad_norm": 0.5063842570569805, "learning_rate": 4.2330032594218885e-06, "loss": 0.4176, "step": 18355 }, { "epoch": 0.5625842834375383, "grad_norm": 1.4145983113881955, "learning_rate": 4.2325128196430265e-06, "loss": 0.6607, "step": 18356 }, { "epoch": 0.5626149319602796, "grad_norm": 1.3445633822556822, "learning_rate": 4.232022387426412e-06, "loss": 0.6566, "step": 18357 }, { "epoch": 0.5626455804830207, "grad_norm": 1.2802382904086747, "learning_rate": 4.231531962776878e-06, "loss": 0.602, "step": 18358 }, { "epoch": 0.562676229005762, "grad_norm": 1.1449143917801563, "learning_rate": 4.231041545699257e-06, "loss": 0.5944, "step": 18359 }, { "epoch": 0.5627068775285031, "grad_norm": 1.2464802956036916, "learning_rate": 4.230551136198377e-06, "loss": 0.6588, "step": 18360 }, { "epoch": 0.5627375260512444, "grad_norm": 1.3911447862596478, "learning_rate": 4.230060734279078e-06, "loss": 0.6299, "step": 18361 }, { "epoch": 0.5627681745739855, "grad_norm": 1.1998241514120545, "learning_rate": 4.229570339946186e-06, "loss": 0.6808, "step": 18362 }, { "epoch": 0.5627988230967267, "grad_norm": 1.24887119529753, "learning_rate": 4.229079953204533e-06, "loss": 0.6396, "step": 18363 }, { "epoch": 0.562829471619468, "grad_norm": 1.2988285687207013, "learning_rate": 4.228589574058954e-06, "loss": 0.7192, "step": 18364 }, { "epoch": 0.5628601201422091, "grad_norm": 0.48209192577860915, "learning_rate": 4.228099202514279e-06, "loss": 0.3763, "step": 18365 }, { "epoch": 0.5628907686649504, "grad_norm": 1.2958616306200177, "learning_rate": 4.2276088385753396e-06, "loss": 0.6625, "step": 18366 }, { "epoch": 0.5629214171876915, "grad_norm": 1.2926117482659636, "learning_rate": 4.227118482246968e-06, "loss": 0.6253, "step": 18367 }, { "epoch": 0.5629520657104328, "grad_norm": 1.1799198779187219, "learning_rate": 4.226628133533996e-06, "loss": 0.6432, "step": 18368 }, { "epoch": 0.5629827142331739, "grad_norm": 1.0413680160163035, "learning_rate": 4.226137792441254e-06, "loss": 0.617, "step": 18369 }, { "epoch": 0.5630133627559152, "grad_norm": 1.1177412768775092, "learning_rate": 4.225647458973578e-06, "loss": 0.679, "step": 18370 }, { "epoch": 0.5630440112786563, "grad_norm": 1.2070313203121317, "learning_rate": 4.22515713313579e-06, "loss": 0.6117, "step": 18371 }, { "epoch": 0.5630746598013976, "grad_norm": 1.2671257262984714, "learning_rate": 4.224666814932731e-06, "loss": 0.6845, "step": 18372 }, { "epoch": 0.5631053083241387, "grad_norm": 1.2073858273481768, "learning_rate": 4.224176504369228e-06, "loss": 0.6317, "step": 18373 }, { "epoch": 0.56313595684688, "grad_norm": 1.3811986295902863, "learning_rate": 4.223686201450111e-06, "loss": 0.6433, "step": 18374 }, { "epoch": 0.5631666053696212, "grad_norm": 1.1824290242663393, "learning_rate": 4.223195906180213e-06, "loss": 0.6365, "step": 18375 }, { "epoch": 0.5631972538923624, "grad_norm": 1.3178871685745295, "learning_rate": 4.222705618564364e-06, "loss": 0.6559, "step": 18376 }, { "epoch": 0.5632279024151036, "grad_norm": 1.263800979209137, "learning_rate": 4.222215338607396e-06, "loss": 0.6082, "step": 18377 }, { "epoch": 0.5632585509378448, "grad_norm": 1.4829008847473424, "learning_rate": 4.22172506631414e-06, "loss": 0.6694, "step": 18378 }, { "epoch": 0.563289199460586, "grad_norm": 1.371059719580686, "learning_rate": 4.221234801689424e-06, "loss": 0.6988, "step": 18379 }, { "epoch": 0.5633198479833272, "grad_norm": 1.1030687013250966, "learning_rate": 4.220744544738082e-06, "loss": 0.5939, "step": 18380 }, { "epoch": 0.5633504965060684, "grad_norm": 1.4861666526458572, "learning_rate": 4.220254295464945e-06, "loss": 0.7595, "step": 18381 }, { "epoch": 0.5633811450288096, "grad_norm": 0.4674579276517868, "learning_rate": 4.219764053874838e-06, "loss": 0.424, "step": 18382 }, { "epoch": 0.5634117935515508, "grad_norm": 1.1331659467769577, "learning_rate": 4.2192738199726e-06, "loss": 0.6025, "step": 18383 }, { "epoch": 0.5634424420742921, "grad_norm": 1.2160556910910503, "learning_rate": 4.2187835937630524e-06, "loss": 0.6501, "step": 18384 }, { "epoch": 0.5634730905970332, "grad_norm": 1.2291196697411955, "learning_rate": 4.218293375251034e-06, "loss": 0.6076, "step": 18385 }, { "epoch": 0.5635037391197745, "grad_norm": 1.3077198600363817, "learning_rate": 4.217803164441369e-06, "loss": 0.6155, "step": 18386 }, { "epoch": 0.5635343876425156, "grad_norm": 1.23513712995857, "learning_rate": 4.217312961338889e-06, "loss": 0.546, "step": 18387 }, { "epoch": 0.5635650361652569, "grad_norm": 0.4708525055424881, "learning_rate": 4.216822765948425e-06, "loss": 0.4081, "step": 18388 }, { "epoch": 0.563595684687998, "grad_norm": 1.4221950519178432, "learning_rate": 4.216332578274808e-06, "loss": 0.7178, "step": 18389 }, { "epoch": 0.5636263332107393, "grad_norm": 0.45292850608411417, "learning_rate": 4.215842398322865e-06, "loss": 0.4078, "step": 18390 }, { "epoch": 0.5636569817334804, "grad_norm": 0.4428508372813129, "learning_rate": 4.215352226097428e-06, "loss": 0.3957, "step": 18391 }, { "epoch": 0.5636876302562217, "grad_norm": 1.1518386222105177, "learning_rate": 4.214862061603328e-06, "loss": 0.67, "step": 18392 }, { "epoch": 0.5637182787789629, "grad_norm": 1.2771023498478677, "learning_rate": 4.214371904845389e-06, "loss": 0.6465, "step": 18393 }, { "epoch": 0.563748927301704, "grad_norm": 1.288107790726132, "learning_rate": 4.213881755828449e-06, "loss": 0.5938, "step": 18394 }, { "epoch": 0.5637795758244453, "grad_norm": 1.2169715424494596, "learning_rate": 4.2133916145573295e-06, "loss": 0.5412, "step": 18395 }, { "epoch": 0.5638102243471864, "grad_norm": 1.1160406054504035, "learning_rate": 4.212901481036866e-06, "loss": 0.6333, "step": 18396 }, { "epoch": 0.5638408728699277, "grad_norm": 1.2354844830950595, "learning_rate": 4.212411355271885e-06, "loss": 0.6358, "step": 18397 }, { "epoch": 0.5638715213926688, "grad_norm": 1.25366349995579, "learning_rate": 4.211921237267216e-06, "loss": 0.6643, "step": 18398 }, { "epoch": 0.5639021699154101, "grad_norm": 1.344183647998169, "learning_rate": 4.2114311270276895e-06, "loss": 0.6503, "step": 18399 }, { "epoch": 0.5639328184381512, "grad_norm": 1.3063099766200519, "learning_rate": 4.210941024558133e-06, "loss": 0.5802, "step": 18400 }, { "epoch": 0.5639634669608925, "grad_norm": 1.143717367894535, "learning_rate": 4.210450929863376e-06, "loss": 0.7106, "step": 18401 }, { "epoch": 0.5639941154836337, "grad_norm": 1.3594476490614853, "learning_rate": 4.20996084294825e-06, "loss": 0.6807, "step": 18402 }, { "epoch": 0.5640247640063749, "grad_norm": 1.2916554675656475, "learning_rate": 4.20947076381758e-06, "loss": 0.5806, "step": 18403 }, { "epoch": 0.5640554125291161, "grad_norm": 1.2164762381673984, "learning_rate": 4.208980692476199e-06, "loss": 0.551, "step": 18404 }, { "epoch": 0.5640860610518573, "grad_norm": 0.4861891338821013, "learning_rate": 4.2084906289289325e-06, "loss": 0.4066, "step": 18405 }, { "epoch": 0.5641167095745985, "grad_norm": 1.3027514316640632, "learning_rate": 4.20800057318061e-06, "loss": 0.6254, "step": 18406 }, { "epoch": 0.5641473580973397, "grad_norm": 1.1791348405567552, "learning_rate": 4.20751052523606e-06, "loss": 0.6111, "step": 18407 }, { "epoch": 0.5641780066200809, "grad_norm": 1.217294277832283, "learning_rate": 4.207020485100113e-06, "loss": 0.6629, "step": 18408 }, { "epoch": 0.5642086551428221, "grad_norm": 1.3423129277306625, "learning_rate": 4.206530452777594e-06, "loss": 0.5712, "step": 18409 }, { "epoch": 0.5642393036655633, "grad_norm": 1.199714394919604, "learning_rate": 4.206040428273336e-06, "loss": 0.6003, "step": 18410 }, { "epoch": 0.5642699521883046, "grad_norm": 0.4596325533799724, "learning_rate": 4.205550411592162e-06, "loss": 0.4032, "step": 18411 }, { "epoch": 0.5643006007110457, "grad_norm": 1.249828013864655, "learning_rate": 4.205060402738905e-06, "loss": 0.6226, "step": 18412 }, { "epoch": 0.564331249233787, "grad_norm": 1.4043455268123832, "learning_rate": 4.204570401718392e-06, "loss": 0.6734, "step": 18413 }, { "epoch": 0.5643618977565281, "grad_norm": 1.1502510804425545, "learning_rate": 4.204080408535448e-06, "loss": 0.6258, "step": 18414 }, { "epoch": 0.5643925462792694, "grad_norm": 1.16681469445289, "learning_rate": 4.203590423194905e-06, "loss": 0.5955, "step": 18415 }, { "epoch": 0.5644231948020105, "grad_norm": 1.2315839356877143, "learning_rate": 4.20310044570159e-06, "loss": 0.6174, "step": 18416 }, { "epoch": 0.5644538433247518, "grad_norm": 1.2398481718486527, "learning_rate": 4.202610476060328e-06, "loss": 0.5902, "step": 18417 }, { "epoch": 0.5644844918474929, "grad_norm": 1.168734576600318, "learning_rate": 4.202120514275951e-06, "loss": 0.6281, "step": 18418 }, { "epoch": 0.5645151403702342, "grad_norm": 0.483989115542233, "learning_rate": 4.2016305603532835e-06, "loss": 0.4211, "step": 18419 }, { "epoch": 0.5645457888929754, "grad_norm": 1.221840828888475, "learning_rate": 4.201140614297155e-06, "loss": 0.71, "step": 18420 }, { "epoch": 0.5645764374157166, "grad_norm": 0.45644866133659295, "learning_rate": 4.200650676112392e-06, "loss": 0.3977, "step": 18421 }, { "epoch": 0.5646070859384578, "grad_norm": 1.2842939900894241, "learning_rate": 4.200160745803821e-06, "loss": 0.7017, "step": 18422 }, { "epoch": 0.564637734461199, "grad_norm": 0.4440450866483801, "learning_rate": 4.199670823376273e-06, "loss": 0.3928, "step": 18423 }, { "epoch": 0.5646683829839402, "grad_norm": 1.2367756824959062, "learning_rate": 4.199180908834573e-06, "loss": 0.5051, "step": 18424 }, { "epoch": 0.5646990315066813, "grad_norm": 1.139220080564126, "learning_rate": 4.198691002183547e-06, "loss": 0.5828, "step": 18425 }, { "epoch": 0.5647296800294226, "grad_norm": 1.3354268889284293, "learning_rate": 4.198201103428025e-06, "loss": 0.6984, "step": 18426 }, { "epoch": 0.5647603285521637, "grad_norm": 1.315540532069587, "learning_rate": 4.197711212572834e-06, "loss": 0.5505, "step": 18427 }, { "epoch": 0.564790977074905, "grad_norm": 1.207073588529134, "learning_rate": 4.197221329622796e-06, "loss": 0.5483, "step": 18428 }, { "epoch": 0.5648216255976461, "grad_norm": 1.256731127021004, "learning_rate": 4.196731454582744e-06, "loss": 0.6107, "step": 18429 }, { "epoch": 0.5648522741203874, "grad_norm": 1.5437918700212687, "learning_rate": 4.196241587457501e-06, "loss": 0.616, "step": 18430 }, { "epoch": 0.5648829226431286, "grad_norm": 1.2233724992383952, "learning_rate": 4.1957517282518965e-06, "loss": 0.6452, "step": 18431 }, { "epoch": 0.5649135711658698, "grad_norm": 1.3070238376384302, "learning_rate": 4.195261876970756e-06, "loss": 0.6506, "step": 18432 }, { "epoch": 0.564944219688611, "grad_norm": 1.6222006864770284, "learning_rate": 4.1947720336189055e-06, "loss": 0.6852, "step": 18433 }, { "epoch": 0.5649748682113522, "grad_norm": 1.4030148948698087, "learning_rate": 4.1942821982011735e-06, "loss": 0.6835, "step": 18434 }, { "epoch": 0.5650055167340934, "grad_norm": 1.2861036599508608, "learning_rate": 4.193792370722386e-06, "loss": 0.586, "step": 18435 }, { "epoch": 0.5650361652568346, "grad_norm": 1.2125457728437827, "learning_rate": 4.193302551187364e-06, "loss": 0.6043, "step": 18436 }, { "epoch": 0.5650668137795758, "grad_norm": 1.3157569632510062, "learning_rate": 4.192812739600942e-06, "loss": 0.6531, "step": 18437 }, { "epoch": 0.565097462302317, "grad_norm": 1.345611878314366, "learning_rate": 4.1923229359679405e-06, "loss": 0.6342, "step": 18438 }, { "epoch": 0.5651281108250582, "grad_norm": 1.5482544381231214, "learning_rate": 4.191833140293191e-06, "loss": 0.7327, "step": 18439 }, { "epoch": 0.5651587593477995, "grad_norm": 1.1883175605550236, "learning_rate": 4.191343352581514e-06, "loss": 0.6433, "step": 18440 }, { "epoch": 0.5651894078705406, "grad_norm": 1.532546438649704, "learning_rate": 4.190853572837737e-06, "loss": 0.738, "step": 18441 }, { "epoch": 0.5652200563932819, "grad_norm": 1.3633416235085973, "learning_rate": 4.1903638010666895e-06, "loss": 0.6126, "step": 18442 }, { "epoch": 0.565250704916023, "grad_norm": 1.266159183074992, "learning_rate": 4.189874037273193e-06, "loss": 0.6267, "step": 18443 }, { "epoch": 0.5652813534387643, "grad_norm": 1.3971481051601886, "learning_rate": 4.189384281462074e-06, "loss": 0.7167, "step": 18444 }, { "epoch": 0.5653120019615054, "grad_norm": 1.1847717047961621, "learning_rate": 4.188894533638161e-06, "loss": 0.6125, "step": 18445 }, { "epoch": 0.5653426504842467, "grad_norm": 1.150125026416736, "learning_rate": 4.1884047938062774e-06, "loss": 0.5919, "step": 18446 }, { "epoch": 0.5653732990069879, "grad_norm": 0.4820309626268444, "learning_rate": 4.187915061971248e-06, "loss": 0.4064, "step": 18447 }, { "epoch": 0.5654039475297291, "grad_norm": 1.296933119250947, "learning_rate": 4.1874253381379e-06, "loss": 0.6436, "step": 18448 }, { "epoch": 0.5654345960524703, "grad_norm": 1.2961485494512615, "learning_rate": 4.186935622311057e-06, "loss": 0.6872, "step": 18449 }, { "epoch": 0.5654652445752115, "grad_norm": 1.4768392418926324, "learning_rate": 4.186445914495546e-06, "loss": 0.643, "step": 18450 }, { "epoch": 0.5654958930979527, "grad_norm": 1.2779574175123813, "learning_rate": 4.1859562146961925e-06, "loss": 0.6775, "step": 18451 }, { "epoch": 0.5655265416206939, "grad_norm": 1.1626816794865171, "learning_rate": 4.185466522917819e-06, "loss": 0.4866, "step": 18452 }, { "epoch": 0.5655571901434351, "grad_norm": 1.1120650282039461, "learning_rate": 4.184976839165254e-06, "loss": 0.6596, "step": 18453 }, { "epoch": 0.5655878386661763, "grad_norm": 1.383357367620509, "learning_rate": 4.1844871634433206e-06, "loss": 0.6885, "step": 18454 }, { "epoch": 0.5656184871889175, "grad_norm": 1.0332597758122102, "learning_rate": 4.183997495756841e-06, "loss": 0.4942, "step": 18455 }, { "epoch": 0.5656491357116586, "grad_norm": 1.3389173911022354, "learning_rate": 4.183507836110646e-06, "loss": 0.6473, "step": 18456 }, { "epoch": 0.5656797842343999, "grad_norm": 1.3838910344423914, "learning_rate": 4.183018184509555e-06, "loss": 0.626, "step": 18457 }, { "epoch": 0.5657104327571411, "grad_norm": 1.376174944375238, "learning_rate": 4.182528540958397e-06, "loss": 0.6796, "step": 18458 }, { "epoch": 0.5657410812798823, "grad_norm": 1.3034730964985388, "learning_rate": 4.182038905461994e-06, "loss": 0.5392, "step": 18459 }, { "epoch": 0.5657717298026235, "grad_norm": 2.620203219648077, "learning_rate": 4.1815492780251695e-06, "loss": 0.7164, "step": 18460 }, { "epoch": 0.5658023783253647, "grad_norm": 1.1960855826825116, "learning_rate": 4.181059658652751e-06, "loss": 0.5426, "step": 18461 }, { "epoch": 0.5658330268481059, "grad_norm": 1.2948357151321128, "learning_rate": 4.18057004734956e-06, "loss": 0.5886, "step": 18462 }, { "epoch": 0.5658636753708471, "grad_norm": 1.243290931010604, "learning_rate": 4.180080444120422e-06, "loss": 0.6093, "step": 18463 }, { "epoch": 0.5658943238935883, "grad_norm": 1.3376834707276024, "learning_rate": 4.179590848970162e-06, "loss": 0.6895, "step": 18464 }, { "epoch": 0.5659249724163296, "grad_norm": 1.2120818923515286, "learning_rate": 4.179101261903602e-06, "loss": 0.5443, "step": 18465 }, { "epoch": 0.5659556209390707, "grad_norm": 1.2596161697466999, "learning_rate": 4.178611682925569e-06, "loss": 0.541, "step": 18466 }, { "epoch": 0.565986269461812, "grad_norm": 1.2118588896513058, "learning_rate": 4.178122112040886e-06, "loss": 0.5761, "step": 18467 }, { "epoch": 0.5660169179845531, "grad_norm": 1.1776243173633332, "learning_rate": 4.177632549254372e-06, "loss": 0.7139, "step": 18468 }, { "epoch": 0.5660475665072944, "grad_norm": 1.2901337628912906, "learning_rate": 4.177142994570859e-06, "loss": 0.602, "step": 18469 }, { "epoch": 0.5660782150300355, "grad_norm": 1.6230201228267098, "learning_rate": 4.176653447995165e-06, "loss": 0.7336, "step": 18470 }, { "epoch": 0.5661088635527768, "grad_norm": 1.1627730421309768, "learning_rate": 4.176163909532115e-06, "loss": 0.6564, "step": 18471 }, { "epoch": 0.5661395120755179, "grad_norm": 1.2448614984777728, "learning_rate": 4.175674379186534e-06, "loss": 0.6356, "step": 18472 }, { "epoch": 0.5661701605982592, "grad_norm": 1.1849062327434523, "learning_rate": 4.175184856963243e-06, "loss": 0.5906, "step": 18473 }, { "epoch": 0.5662008091210003, "grad_norm": 1.1684316380565718, "learning_rate": 4.174695342867066e-06, "loss": 0.6452, "step": 18474 }, { "epoch": 0.5662314576437416, "grad_norm": 0.46850126701458744, "learning_rate": 4.174205836902828e-06, "loss": 0.4194, "step": 18475 }, { "epoch": 0.5662621061664828, "grad_norm": 1.389294103984766, "learning_rate": 4.173716339075351e-06, "loss": 0.8014, "step": 18476 }, { "epoch": 0.566292754689224, "grad_norm": 1.331414009703959, "learning_rate": 4.1732268493894586e-06, "loss": 0.6626, "step": 18477 }, { "epoch": 0.5663234032119652, "grad_norm": 1.2122428649030228, "learning_rate": 4.172737367849975e-06, "loss": 0.6087, "step": 18478 }, { "epoch": 0.5663540517347064, "grad_norm": 1.3920470015912936, "learning_rate": 4.1722478944617184e-06, "loss": 0.6345, "step": 18479 }, { "epoch": 0.5663847002574476, "grad_norm": 0.44901888320918165, "learning_rate": 4.171758429229518e-06, "loss": 0.4065, "step": 18480 }, { "epoch": 0.5664153487801888, "grad_norm": 1.309342055966583, "learning_rate": 4.171268972158193e-06, "loss": 0.6802, "step": 18481 }, { "epoch": 0.56644599730293, "grad_norm": 1.2867610533034024, "learning_rate": 4.170779523252565e-06, "loss": 0.6707, "step": 18482 }, { "epoch": 0.5664766458256713, "grad_norm": 0.437884821960547, "learning_rate": 4.170290082517461e-06, "loss": 0.4114, "step": 18483 }, { "epoch": 0.5665072943484124, "grad_norm": 0.4467384289489539, "learning_rate": 4.169800649957699e-06, "loss": 0.4124, "step": 18484 }, { "epoch": 0.5665379428711537, "grad_norm": 1.0659542575736431, "learning_rate": 4.1693112255781055e-06, "loss": 0.5765, "step": 18485 }, { "epoch": 0.5665685913938948, "grad_norm": 0.4675970414367316, "learning_rate": 4.1688218093835005e-06, "loss": 0.4098, "step": 18486 }, { "epoch": 0.566599239916636, "grad_norm": 1.2680611058707956, "learning_rate": 4.1683324013787056e-06, "loss": 0.6706, "step": 18487 }, { "epoch": 0.5666298884393772, "grad_norm": 1.305069872978865, "learning_rate": 4.167843001568545e-06, "loss": 0.6592, "step": 18488 }, { "epoch": 0.5666605369621184, "grad_norm": 1.5115574307587516, "learning_rate": 4.1673536099578425e-06, "loss": 0.6589, "step": 18489 }, { "epoch": 0.5666911854848596, "grad_norm": 1.204857332242769, "learning_rate": 4.1668642265514145e-06, "loss": 0.5991, "step": 18490 }, { "epoch": 0.5667218340076008, "grad_norm": 1.524833924760383, "learning_rate": 4.166374851354089e-06, "loss": 0.6907, "step": 18491 }, { "epoch": 0.566752482530342, "grad_norm": 1.2817995595552298, "learning_rate": 4.165885484370684e-06, "loss": 0.7369, "step": 18492 }, { "epoch": 0.5667831310530832, "grad_norm": 1.2178490647857498, "learning_rate": 4.1653961256060235e-06, "loss": 0.5743, "step": 18493 }, { "epoch": 0.5668137795758245, "grad_norm": 1.3046760593091575, "learning_rate": 4.164906775064929e-06, "loss": 0.6293, "step": 18494 }, { "epoch": 0.5668444280985656, "grad_norm": 1.1414247742978814, "learning_rate": 4.1644174327522195e-06, "loss": 0.5771, "step": 18495 }, { "epoch": 0.5668750766213069, "grad_norm": 1.2798340153498065, "learning_rate": 4.1639280986727205e-06, "loss": 0.6415, "step": 18496 }, { "epoch": 0.566905725144048, "grad_norm": 1.2740892920093894, "learning_rate": 4.163438772831251e-06, "loss": 0.6569, "step": 18497 }, { "epoch": 0.5669363736667893, "grad_norm": 0.4896544612788001, "learning_rate": 4.162949455232634e-06, "loss": 0.4129, "step": 18498 }, { "epoch": 0.5669670221895304, "grad_norm": 0.4567512701873226, "learning_rate": 4.162460145881691e-06, "loss": 0.4216, "step": 18499 }, { "epoch": 0.5669976707122717, "grad_norm": 1.2547216379568678, "learning_rate": 4.161970844783242e-06, "loss": 0.6765, "step": 18500 }, { "epoch": 0.5670283192350128, "grad_norm": 1.1652263117011121, "learning_rate": 4.161481551942107e-06, "loss": 0.6202, "step": 18501 }, { "epoch": 0.5670589677577541, "grad_norm": 0.4633352335239677, "learning_rate": 4.16099226736311e-06, "loss": 0.4156, "step": 18502 }, { "epoch": 0.5670896162804953, "grad_norm": 1.4980261852057726, "learning_rate": 4.160502991051071e-06, "loss": 0.6702, "step": 18503 }, { "epoch": 0.5671202648032365, "grad_norm": 1.1551906533861611, "learning_rate": 4.1600137230108106e-06, "loss": 0.5967, "step": 18504 }, { "epoch": 0.5671509133259777, "grad_norm": 1.3191841352444376, "learning_rate": 4.159524463247151e-06, "loss": 0.6023, "step": 18505 }, { "epoch": 0.5671815618487189, "grad_norm": 1.5824861259967256, "learning_rate": 4.159035211764909e-06, "loss": 0.5593, "step": 18506 }, { "epoch": 0.5672122103714601, "grad_norm": 1.1791372931039914, "learning_rate": 4.1585459685689105e-06, "loss": 0.7343, "step": 18507 }, { "epoch": 0.5672428588942013, "grad_norm": 1.4580879419527948, "learning_rate": 4.158056733663975e-06, "loss": 0.5734, "step": 18508 }, { "epoch": 0.5672735074169425, "grad_norm": 1.146153665718577, "learning_rate": 4.157567507054919e-06, "loss": 0.6185, "step": 18509 }, { "epoch": 0.5673041559396838, "grad_norm": 1.3836533054262448, "learning_rate": 4.1570782887465685e-06, "loss": 0.7136, "step": 18510 }, { "epoch": 0.5673348044624249, "grad_norm": 1.419366877599907, "learning_rate": 4.156589078743738e-06, "loss": 0.7216, "step": 18511 }, { "epoch": 0.5673654529851662, "grad_norm": 1.2107050887276674, "learning_rate": 4.156099877051254e-06, "loss": 0.5805, "step": 18512 }, { "epoch": 0.5673961015079073, "grad_norm": 1.2837531248200105, "learning_rate": 4.155610683673934e-06, "loss": 0.6075, "step": 18513 }, { "epoch": 0.5674267500306486, "grad_norm": 1.208393740232266, "learning_rate": 4.155121498616596e-06, "loss": 0.5901, "step": 18514 }, { "epoch": 0.5674573985533897, "grad_norm": 0.48458695500937293, "learning_rate": 4.154632321884063e-06, "loss": 0.418, "step": 18515 }, { "epoch": 0.567488047076131, "grad_norm": 1.1843515435055767, "learning_rate": 4.154143153481155e-06, "loss": 0.6016, "step": 18516 }, { "epoch": 0.5675186955988721, "grad_norm": 1.2240228982420682, "learning_rate": 4.15365399341269e-06, "loss": 0.5552, "step": 18517 }, { "epoch": 0.5675493441216133, "grad_norm": 1.2638291972154732, "learning_rate": 4.153164841683488e-06, "loss": 0.5867, "step": 18518 }, { "epoch": 0.5675799926443545, "grad_norm": 1.1746625979655867, "learning_rate": 4.152675698298371e-06, "loss": 0.7079, "step": 18519 }, { "epoch": 0.5676106411670957, "grad_norm": 1.312276780571209, "learning_rate": 4.152186563262155e-06, "loss": 0.5944, "step": 18520 }, { "epoch": 0.567641289689837, "grad_norm": 1.252675989194333, "learning_rate": 4.1516974365796645e-06, "loss": 0.609, "step": 18521 }, { "epoch": 0.5676719382125781, "grad_norm": 1.2164481094540203, "learning_rate": 4.151208318255713e-06, "loss": 0.5907, "step": 18522 }, { "epoch": 0.5677025867353194, "grad_norm": 1.4584119887304146, "learning_rate": 4.150719208295127e-06, "loss": 0.603, "step": 18523 }, { "epoch": 0.5677332352580605, "grad_norm": 1.2927358437512892, "learning_rate": 4.15023010670272e-06, "loss": 0.5925, "step": 18524 }, { "epoch": 0.5677638837808018, "grad_norm": 0.4604518112801797, "learning_rate": 4.149741013483312e-06, "loss": 0.402, "step": 18525 }, { "epoch": 0.5677945323035429, "grad_norm": 1.1735846650289914, "learning_rate": 4.149251928641725e-06, "loss": 0.576, "step": 18526 }, { "epoch": 0.5678251808262842, "grad_norm": 1.3450048940816308, "learning_rate": 4.1487628521827765e-06, "loss": 0.6548, "step": 18527 }, { "epoch": 0.5678558293490253, "grad_norm": 1.1145418474238833, "learning_rate": 4.1482737841112835e-06, "loss": 0.6541, "step": 18528 }, { "epoch": 0.5678864778717666, "grad_norm": 1.343776174619622, "learning_rate": 4.1477847244320685e-06, "loss": 0.6681, "step": 18529 }, { "epoch": 0.5679171263945078, "grad_norm": 0.4404952645592111, "learning_rate": 4.147295673149947e-06, "loss": 0.4044, "step": 18530 }, { "epoch": 0.567947774917249, "grad_norm": 1.3933645806141624, "learning_rate": 4.146806630269741e-06, "loss": 0.6299, "step": 18531 }, { "epoch": 0.5679784234399902, "grad_norm": 1.400347758592259, "learning_rate": 4.1463175957962686e-06, "loss": 0.7461, "step": 18532 }, { "epoch": 0.5680090719627314, "grad_norm": 1.3745296076263658, "learning_rate": 4.1458285697343445e-06, "loss": 0.6596, "step": 18533 }, { "epoch": 0.5680397204854726, "grad_norm": 1.37622652671889, "learning_rate": 4.145339552088793e-06, "loss": 0.6713, "step": 18534 }, { "epoch": 0.5680703690082138, "grad_norm": 0.4378397692406552, "learning_rate": 4.144850542864428e-06, "loss": 0.4072, "step": 18535 }, { "epoch": 0.568101017530955, "grad_norm": 1.205175359365213, "learning_rate": 4.144361542066069e-06, "loss": 0.5988, "step": 18536 }, { "epoch": 0.5681316660536962, "grad_norm": 1.2009988653564825, "learning_rate": 4.143872549698535e-06, "loss": 0.5923, "step": 18537 }, { "epoch": 0.5681623145764374, "grad_norm": 1.164360557355893, "learning_rate": 4.143383565766643e-06, "loss": 0.6876, "step": 18538 }, { "epoch": 0.5681929630991787, "grad_norm": 1.3232269967813697, "learning_rate": 4.1428945902752135e-06, "loss": 0.506, "step": 18539 }, { "epoch": 0.5682236116219198, "grad_norm": 1.1304518465513083, "learning_rate": 4.142405623229062e-06, "loss": 0.6196, "step": 18540 }, { "epoch": 0.5682542601446611, "grad_norm": 1.3342169661319778, "learning_rate": 4.141916664633008e-06, "loss": 0.7113, "step": 18541 }, { "epoch": 0.5682849086674022, "grad_norm": 1.250968024082305, "learning_rate": 4.141427714491868e-06, "loss": 0.6057, "step": 18542 }, { "epoch": 0.5683155571901435, "grad_norm": 1.315581942219145, "learning_rate": 4.1409387728104615e-06, "loss": 0.5637, "step": 18543 }, { "epoch": 0.5683462057128846, "grad_norm": 1.2396985764891595, "learning_rate": 4.1404498395936035e-06, "loss": 0.6392, "step": 18544 }, { "epoch": 0.5683768542356259, "grad_norm": 1.2196639684357153, "learning_rate": 4.1399609148461135e-06, "loss": 0.6745, "step": 18545 }, { "epoch": 0.568407502758367, "grad_norm": 1.3740429960099596, "learning_rate": 4.13947199857281e-06, "loss": 0.6424, "step": 18546 }, { "epoch": 0.5684381512811083, "grad_norm": 1.077547557727208, "learning_rate": 4.138983090778507e-06, "loss": 0.6507, "step": 18547 }, { "epoch": 0.5684687998038495, "grad_norm": 1.1411702756901108, "learning_rate": 4.1384941914680256e-06, "loss": 0.5675, "step": 18548 }, { "epoch": 0.5684994483265906, "grad_norm": 0.4558203714304338, "learning_rate": 4.1380053006461804e-06, "loss": 0.3888, "step": 18549 }, { "epoch": 0.5685300968493319, "grad_norm": 1.2061832013562352, "learning_rate": 4.13751641831779e-06, "loss": 0.7217, "step": 18550 }, { "epoch": 0.568560745372073, "grad_norm": 1.2630598937454296, "learning_rate": 4.137027544487672e-06, "loss": 0.6829, "step": 18551 }, { "epoch": 0.5685913938948143, "grad_norm": 1.1819228403677766, "learning_rate": 4.136538679160639e-06, "loss": 0.6624, "step": 18552 }, { "epoch": 0.5686220424175554, "grad_norm": 1.1994242852836323, "learning_rate": 4.136049822341516e-06, "loss": 0.6163, "step": 18553 }, { "epoch": 0.5686526909402967, "grad_norm": 1.1695332060302177, "learning_rate": 4.135560974035112e-06, "loss": 0.5531, "step": 18554 }, { "epoch": 0.5686833394630378, "grad_norm": 1.3194739333999133, "learning_rate": 4.135072134246247e-06, "loss": 0.6431, "step": 18555 }, { "epoch": 0.5687139879857791, "grad_norm": 1.264120558135401, "learning_rate": 4.134583302979739e-06, "loss": 0.6085, "step": 18556 }, { "epoch": 0.5687446365085203, "grad_norm": 0.43804302821390706, "learning_rate": 4.134094480240402e-06, "loss": 0.4008, "step": 18557 }, { "epoch": 0.5687752850312615, "grad_norm": 1.2418647587266682, "learning_rate": 4.1336056660330535e-06, "loss": 0.6765, "step": 18558 }, { "epoch": 0.5688059335540027, "grad_norm": 0.451688938807477, "learning_rate": 4.133116860362511e-06, "loss": 0.4284, "step": 18559 }, { "epoch": 0.5688365820767439, "grad_norm": 1.2054412315146006, "learning_rate": 4.132628063233589e-06, "loss": 0.5958, "step": 18560 }, { "epoch": 0.5688672305994851, "grad_norm": 0.455694747571661, "learning_rate": 4.132139274651105e-06, "loss": 0.3904, "step": 18561 }, { "epoch": 0.5688978791222263, "grad_norm": 1.3248946180107655, "learning_rate": 4.131650494619876e-06, "loss": 0.6791, "step": 18562 }, { "epoch": 0.5689285276449675, "grad_norm": 1.2952340432811793, "learning_rate": 4.1311617231447136e-06, "loss": 0.6489, "step": 18563 }, { "epoch": 0.5689591761677087, "grad_norm": 0.4361310366363082, "learning_rate": 4.130672960230441e-06, "loss": 0.4229, "step": 18564 }, { "epoch": 0.5689898246904499, "grad_norm": 0.4379670865476519, "learning_rate": 4.130184205881866e-06, "loss": 0.3791, "step": 18565 }, { "epoch": 0.5690204732131912, "grad_norm": 1.3101320424204286, "learning_rate": 4.129695460103813e-06, "loss": 0.617, "step": 18566 }, { "epoch": 0.5690511217359323, "grad_norm": 1.294110897601432, "learning_rate": 4.12920672290109e-06, "loss": 0.6413, "step": 18567 }, { "epoch": 0.5690817702586736, "grad_norm": 1.4167185695700346, "learning_rate": 4.128717994278517e-06, "loss": 0.5943, "step": 18568 }, { "epoch": 0.5691124187814147, "grad_norm": 1.2772211081653575, "learning_rate": 4.1282292742409095e-06, "loss": 0.586, "step": 18569 }, { "epoch": 0.569143067304156, "grad_norm": 1.4497021138870572, "learning_rate": 4.127740562793081e-06, "loss": 0.5793, "step": 18570 }, { "epoch": 0.5691737158268971, "grad_norm": 1.322879285074186, "learning_rate": 4.127251859939847e-06, "loss": 0.6278, "step": 18571 }, { "epoch": 0.5692043643496384, "grad_norm": 1.2871019631193243, "learning_rate": 4.126763165686025e-06, "loss": 0.7035, "step": 18572 }, { "epoch": 0.5692350128723795, "grad_norm": 1.174619278653549, "learning_rate": 4.12627448003643e-06, "loss": 0.6207, "step": 18573 }, { "epoch": 0.5692656613951208, "grad_norm": 0.48025897628964315, "learning_rate": 4.125785802995874e-06, "loss": 0.373, "step": 18574 }, { "epoch": 0.569296309917862, "grad_norm": 1.3974454153118103, "learning_rate": 4.125297134569176e-06, "loss": 0.693, "step": 18575 }, { "epoch": 0.5693269584406032, "grad_norm": 1.330647439417715, "learning_rate": 4.124808474761146e-06, "loss": 0.6592, "step": 18576 }, { "epoch": 0.5693576069633444, "grad_norm": 1.3876369999063314, "learning_rate": 4.124319823576606e-06, "loss": 0.6863, "step": 18577 }, { "epoch": 0.5693882554860856, "grad_norm": 1.2848238952594182, "learning_rate": 4.123831181020365e-06, "loss": 0.599, "step": 18578 }, { "epoch": 0.5694189040088268, "grad_norm": 1.2992387312349323, "learning_rate": 4.123342547097239e-06, "loss": 0.6179, "step": 18579 }, { "epoch": 0.5694495525315679, "grad_norm": 1.3140889197995576, "learning_rate": 4.122853921812044e-06, "loss": 0.6098, "step": 18580 }, { "epoch": 0.5694802010543092, "grad_norm": 1.4795386429517534, "learning_rate": 4.122365305169593e-06, "loss": 0.6248, "step": 18581 }, { "epoch": 0.5695108495770503, "grad_norm": 1.379019507309203, "learning_rate": 4.121876697174701e-06, "loss": 0.5886, "step": 18582 }, { "epoch": 0.5695414980997916, "grad_norm": 1.3070875682577703, "learning_rate": 4.121388097832184e-06, "loss": 0.6384, "step": 18583 }, { "epoch": 0.5695721466225327, "grad_norm": 1.3837991368853877, "learning_rate": 4.120899507146853e-06, "loss": 0.7277, "step": 18584 }, { "epoch": 0.569602795145274, "grad_norm": 1.225005484236741, "learning_rate": 4.1204109251235255e-06, "loss": 0.5164, "step": 18585 }, { "epoch": 0.5696334436680152, "grad_norm": 1.2899932591567733, "learning_rate": 4.119922351767016e-06, "loss": 0.6359, "step": 18586 }, { "epoch": 0.5696640921907564, "grad_norm": 0.4456129652217708, "learning_rate": 4.119433787082133e-06, "loss": 0.3903, "step": 18587 }, { "epoch": 0.5696947407134976, "grad_norm": 1.3817035844122247, "learning_rate": 4.118945231073697e-06, "loss": 0.6516, "step": 18588 }, { "epoch": 0.5697253892362388, "grad_norm": 1.4026029126275399, "learning_rate": 4.118456683746518e-06, "loss": 0.6781, "step": 18589 }, { "epoch": 0.56975603775898, "grad_norm": 1.3541354893606763, "learning_rate": 4.11796814510541e-06, "loss": 0.6096, "step": 18590 }, { "epoch": 0.5697866862817212, "grad_norm": 1.2037113430306443, "learning_rate": 4.1174796151551885e-06, "loss": 0.7036, "step": 18591 }, { "epoch": 0.5698173348044624, "grad_norm": 1.1380655611353319, "learning_rate": 4.116991093900665e-06, "loss": 0.6078, "step": 18592 }, { "epoch": 0.5698479833272037, "grad_norm": 1.344697021985336, "learning_rate": 4.116502581346655e-06, "loss": 0.6022, "step": 18593 }, { "epoch": 0.5698786318499448, "grad_norm": 1.2657763102150363, "learning_rate": 4.116014077497972e-06, "loss": 0.7228, "step": 18594 }, { "epoch": 0.5699092803726861, "grad_norm": 1.2580290957375653, "learning_rate": 4.115525582359427e-06, "loss": 0.5866, "step": 18595 }, { "epoch": 0.5699399288954272, "grad_norm": 1.4009223866877587, "learning_rate": 4.115037095935837e-06, "loss": 0.6924, "step": 18596 }, { "epoch": 0.5699705774181685, "grad_norm": 1.196246071000293, "learning_rate": 4.114548618232012e-06, "loss": 0.7849, "step": 18597 }, { "epoch": 0.5700012259409096, "grad_norm": 1.2882405255947376, "learning_rate": 4.114060149252764e-06, "loss": 0.6412, "step": 18598 }, { "epoch": 0.5700318744636509, "grad_norm": 1.2660258488148723, "learning_rate": 4.1135716890029096e-06, "loss": 0.6692, "step": 18599 }, { "epoch": 0.570062522986392, "grad_norm": 1.0645548941897909, "learning_rate": 4.1130832374872605e-06, "loss": 0.5915, "step": 18600 }, { "epoch": 0.5700931715091333, "grad_norm": 1.3677673630669271, "learning_rate": 4.112594794710628e-06, "loss": 0.6183, "step": 18601 }, { "epoch": 0.5701238200318745, "grad_norm": 1.1990337364091863, "learning_rate": 4.1121063606778264e-06, "loss": 0.5757, "step": 18602 }, { "epoch": 0.5701544685546157, "grad_norm": 1.2948841061929133, "learning_rate": 4.111617935393668e-06, "loss": 0.4871, "step": 18603 }, { "epoch": 0.5701851170773569, "grad_norm": 1.3880756931715896, "learning_rate": 4.1111295188629665e-06, "loss": 0.6795, "step": 18604 }, { "epoch": 0.5702157656000981, "grad_norm": 1.1523888509814115, "learning_rate": 4.110641111090533e-06, "loss": 0.5497, "step": 18605 }, { "epoch": 0.5702464141228393, "grad_norm": 1.1942362191328362, "learning_rate": 4.110152712081178e-06, "loss": 0.6158, "step": 18606 }, { "epoch": 0.5702770626455805, "grad_norm": 1.3468589341227555, "learning_rate": 4.109664321839719e-06, "loss": 0.6537, "step": 18607 }, { "epoch": 0.5703077111683217, "grad_norm": 1.2968195905864923, "learning_rate": 4.109175940370965e-06, "loss": 0.6973, "step": 18608 }, { "epoch": 0.570338359691063, "grad_norm": 0.46663546360311475, "learning_rate": 4.108687567679726e-06, "loss": 0.4234, "step": 18609 }, { "epoch": 0.5703690082138041, "grad_norm": 1.2042711457325281, "learning_rate": 4.108199203770818e-06, "loss": 0.6607, "step": 18610 }, { "epoch": 0.5703996567365452, "grad_norm": 1.1488168485825925, "learning_rate": 4.10771084864905e-06, "loss": 0.6731, "step": 18611 }, { "epoch": 0.5704303052592865, "grad_norm": 0.43665170983635027, "learning_rate": 4.107222502319237e-06, "loss": 0.3874, "step": 18612 }, { "epoch": 0.5704609537820277, "grad_norm": 1.546579626375882, "learning_rate": 4.106734164786189e-06, "loss": 0.667, "step": 18613 }, { "epoch": 0.5704916023047689, "grad_norm": 1.3761886354613644, "learning_rate": 4.106245836054717e-06, "loss": 0.6646, "step": 18614 }, { "epoch": 0.5705222508275101, "grad_norm": 1.3081677159116238, "learning_rate": 4.1057575161296335e-06, "loss": 0.6559, "step": 18615 }, { "epoch": 0.5705528993502513, "grad_norm": 1.1843218708298509, "learning_rate": 4.105269205015753e-06, "loss": 0.6046, "step": 18616 }, { "epoch": 0.5705835478729925, "grad_norm": 1.135322727267542, "learning_rate": 4.104780902717879e-06, "loss": 0.6737, "step": 18617 }, { "epoch": 0.5706141963957337, "grad_norm": 1.1328087881629472, "learning_rate": 4.104292609240831e-06, "loss": 0.4919, "step": 18618 }, { "epoch": 0.5706448449184749, "grad_norm": 1.3787771196288379, "learning_rate": 4.103804324589414e-06, "loss": 0.5894, "step": 18619 }, { "epoch": 0.5706754934412162, "grad_norm": 1.2811818325471738, "learning_rate": 4.103316048768447e-06, "loss": 0.5891, "step": 18620 }, { "epoch": 0.5707061419639573, "grad_norm": 0.5091264430653103, "learning_rate": 4.102827781782734e-06, "loss": 0.4136, "step": 18621 }, { "epoch": 0.5707367904866986, "grad_norm": 1.2649352179907933, "learning_rate": 4.102339523637087e-06, "loss": 0.6685, "step": 18622 }, { "epoch": 0.5707674390094397, "grad_norm": 1.4350395075312996, "learning_rate": 4.10185127433632e-06, "loss": 0.6291, "step": 18623 }, { "epoch": 0.570798087532181, "grad_norm": 1.2703638373655692, "learning_rate": 4.101363033885242e-06, "loss": 0.591, "step": 18624 }, { "epoch": 0.5708287360549221, "grad_norm": 1.3841733111924326, "learning_rate": 4.100874802288664e-06, "loss": 0.6647, "step": 18625 }, { "epoch": 0.5708593845776634, "grad_norm": 1.2751630335965067, "learning_rate": 4.100386579551397e-06, "loss": 0.6123, "step": 18626 }, { "epoch": 0.5708900331004045, "grad_norm": 0.4992750234116233, "learning_rate": 4.099898365678252e-06, "loss": 0.4014, "step": 18627 }, { "epoch": 0.5709206816231458, "grad_norm": 1.545085133050731, "learning_rate": 4.099410160674035e-06, "loss": 0.6702, "step": 18628 }, { "epoch": 0.570951330145887, "grad_norm": 1.1109516926605374, "learning_rate": 4.098921964543563e-06, "loss": 0.5137, "step": 18629 }, { "epoch": 0.5709819786686282, "grad_norm": 1.1325769356470081, "learning_rate": 4.0984337772916415e-06, "loss": 0.566, "step": 18630 }, { "epoch": 0.5710126271913694, "grad_norm": 1.3291814513718756, "learning_rate": 4.097945598923085e-06, "loss": 0.6848, "step": 18631 }, { "epoch": 0.5710432757141106, "grad_norm": 1.27191525307467, "learning_rate": 4.0974574294427016e-06, "loss": 0.6841, "step": 18632 }, { "epoch": 0.5710739242368518, "grad_norm": 0.43702689507273473, "learning_rate": 4.096969268855299e-06, "loss": 0.386, "step": 18633 }, { "epoch": 0.571104572759593, "grad_norm": 1.198400068194337, "learning_rate": 4.09648111716569e-06, "loss": 0.632, "step": 18634 }, { "epoch": 0.5711352212823342, "grad_norm": 1.1992619369790058, "learning_rate": 4.095992974378684e-06, "loss": 0.6665, "step": 18635 }, { "epoch": 0.5711658698050754, "grad_norm": 1.2616956413452107, "learning_rate": 4.09550484049909e-06, "loss": 0.6697, "step": 18636 }, { "epoch": 0.5711965183278166, "grad_norm": 1.24373365941172, "learning_rate": 4.0950167155317185e-06, "loss": 0.5204, "step": 18637 }, { "epoch": 0.5712271668505579, "grad_norm": 1.2004933794322783, "learning_rate": 4.094528599481379e-06, "loss": 0.5993, "step": 18638 }, { "epoch": 0.571257815373299, "grad_norm": 1.2181256330992076, "learning_rate": 4.0940404923528824e-06, "loss": 0.5723, "step": 18639 }, { "epoch": 0.5712884638960403, "grad_norm": 1.3322231386932728, "learning_rate": 4.093552394151034e-06, "loss": 0.6465, "step": 18640 }, { "epoch": 0.5713191124187814, "grad_norm": 1.4161922233034494, "learning_rate": 4.0930643048806465e-06, "loss": 0.6526, "step": 18641 }, { "epoch": 0.5713497609415226, "grad_norm": 1.2723538996239538, "learning_rate": 4.092576224546529e-06, "loss": 0.5482, "step": 18642 }, { "epoch": 0.5713804094642638, "grad_norm": 1.125988529131597, "learning_rate": 4.092088153153491e-06, "loss": 0.5441, "step": 18643 }, { "epoch": 0.571411057987005, "grad_norm": 1.2616726792253217, "learning_rate": 4.091600090706338e-06, "loss": 0.5902, "step": 18644 }, { "epoch": 0.5714417065097462, "grad_norm": 1.105824145327887, "learning_rate": 4.091112037209885e-06, "loss": 0.5956, "step": 18645 }, { "epoch": 0.5714723550324874, "grad_norm": 1.2717814926862339, "learning_rate": 4.090623992668934e-06, "loss": 0.564, "step": 18646 }, { "epoch": 0.5715030035552287, "grad_norm": 1.8862570832774441, "learning_rate": 4.0901359570883006e-06, "loss": 0.6455, "step": 18647 }, { "epoch": 0.5715336520779698, "grad_norm": 1.2388064249512794, "learning_rate": 4.089647930472791e-06, "loss": 0.5799, "step": 18648 }, { "epoch": 0.5715643006007111, "grad_norm": 1.12560574501325, "learning_rate": 4.089159912827209e-06, "loss": 0.5831, "step": 18649 }, { "epoch": 0.5715949491234522, "grad_norm": 1.1859720988909204, "learning_rate": 4.088671904156372e-06, "loss": 0.5766, "step": 18650 }, { "epoch": 0.5716255976461935, "grad_norm": 1.3453411986702548, "learning_rate": 4.088183904465081e-06, "loss": 0.7485, "step": 18651 }, { "epoch": 0.5716562461689346, "grad_norm": 1.3577752164331607, "learning_rate": 4.0876959137581484e-06, "loss": 0.6074, "step": 18652 }, { "epoch": 0.5716868946916759, "grad_norm": 1.3285888786492606, "learning_rate": 4.0872079320403814e-06, "loss": 0.647, "step": 18653 }, { "epoch": 0.571717543214417, "grad_norm": 0.501501485911276, "learning_rate": 4.086719959316588e-06, "loss": 0.4061, "step": 18654 }, { "epoch": 0.5717481917371583, "grad_norm": 1.357855607710619, "learning_rate": 4.0862319955915755e-06, "loss": 0.6836, "step": 18655 }, { "epoch": 0.5717788402598994, "grad_norm": 0.45306939823361553, "learning_rate": 4.085744040870155e-06, "loss": 0.4077, "step": 18656 }, { "epoch": 0.5718094887826407, "grad_norm": 0.4434657445302692, "learning_rate": 4.08525609515713e-06, "loss": 0.3746, "step": 18657 }, { "epoch": 0.5718401373053819, "grad_norm": 1.3640601209979863, "learning_rate": 4.084768158457313e-06, "loss": 0.6024, "step": 18658 }, { "epoch": 0.5718707858281231, "grad_norm": 1.477727847261263, "learning_rate": 4.08428023077551e-06, "loss": 0.6777, "step": 18659 }, { "epoch": 0.5719014343508643, "grad_norm": 1.1750453328474564, "learning_rate": 4.0837923121165245e-06, "loss": 0.5971, "step": 18660 }, { "epoch": 0.5719320828736055, "grad_norm": 0.4597898082503704, "learning_rate": 4.083304402485171e-06, "loss": 0.4216, "step": 18661 }, { "epoch": 0.5719627313963467, "grad_norm": 1.4003508441413457, "learning_rate": 4.082816501886254e-06, "loss": 0.5826, "step": 18662 }, { "epoch": 0.5719933799190879, "grad_norm": 1.5780440236809925, "learning_rate": 4.082328610324579e-06, "loss": 0.7497, "step": 18663 }, { "epoch": 0.5720240284418291, "grad_norm": 1.305303430040089, "learning_rate": 4.0818407278049545e-06, "loss": 0.5784, "step": 18664 }, { "epoch": 0.5720546769645704, "grad_norm": 1.2980915594060043, "learning_rate": 4.081352854332189e-06, "loss": 0.6691, "step": 18665 }, { "epoch": 0.5720853254873115, "grad_norm": 1.35686039633046, "learning_rate": 4.08086498991109e-06, "loss": 0.7099, "step": 18666 }, { "epoch": 0.5721159740100528, "grad_norm": 1.240275750729277, "learning_rate": 4.080377134546462e-06, "loss": 0.6724, "step": 18667 }, { "epoch": 0.5721466225327939, "grad_norm": 1.2901728379440038, "learning_rate": 4.0798892882431135e-06, "loss": 0.6537, "step": 18668 }, { "epoch": 0.5721772710555352, "grad_norm": 1.6438400325216571, "learning_rate": 4.079401451005852e-06, "loss": 0.6895, "step": 18669 }, { "epoch": 0.5722079195782763, "grad_norm": 1.4850966594871393, "learning_rate": 4.078913622839485e-06, "loss": 0.6573, "step": 18670 }, { "epoch": 0.5722385681010176, "grad_norm": 1.3826799863289652, "learning_rate": 4.078425803748813e-06, "loss": 0.6769, "step": 18671 }, { "epoch": 0.5722692166237587, "grad_norm": 0.47738412315777334, "learning_rate": 4.077937993738652e-06, "loss": 0.419, "step": 18672 }, { "epoch": 0.5722998651464999, "grad_norm": 1.1770993379056978, "learning_rate": 4.0774501928138005e-06, "loss": 0.6189, "step": 18673 }, { "epoch": 0.5723305136692411, "grad_norm": 1.3082989745386446, "learning_rate": 4.076962400979071e-06, "loss": 0.7157, "step": 18674 }, { "epoch": 0.5723611621919823, "grad_norm": 0.46188515085873605, "learning_rate": 4.076474618239266e-06, "loss": 0.4251, "step": 18675 }, { "epoch": 0.5723918107147236, "grad_norm": 1.4270702991901505, "learning_rate": 4.0759868445991925e-06, "loss": 0.6047, "step": 18676 }, { "epoch": 0.5724224592374647, "grad_norm": 1.3152684344104797, "learning_rate": 4.075499080063658e-06, "loss": 0.6528, "step": 18677 }, { "epoch": 0.572453107760206, "grad_norm": 1.3736851421559741, "learning_rate": 4.075011324637468e-06, "loss": 0.7404, "step": 18678 }, { "epoch": 0.5724837562829471, "grad_norm": 1.2700541831008456, "learning_rate": 4.074523578325426e-06, "loss": 0.6107, "step": 18679 }, { "epoch": 0.5725144048056884, "grad_norm": 1.250381469546012, "learning_rate": 4.0740358411323415e-06, "loss": 0.6659, "step": 18680 }, { "epoch": 0.5725450533284295, "grad_norm": 1.3425537131829104, "learning_rate": 4.07354811306302e-06, "loss": 0.6316, "step": 18681 }, { "epoch": 0.5725757018511708, "grad_norm": 1.267786335085075, "learning_rate": 4.0730603941222626e-06, "loss": 0.5994, "step": 18682 }, { "epoch": 0.5726063503739119, "grad_norm": 1.3130169135748393, "learning_rate": 4.072572684314881e-06, "loss": 0.616, "step": 18683 }, { "epoch": 0.5726369988966532, "grad_norm": 0.43671943696244125, "learning_rate": 4.072084983645677e-06, "loss": 0.4046, "step": 18684 }, { "epoch": 0.5726676474193944, "grad_norm": 1.1670961391292325, "learning_rate": 4.071597292119457e-06, "loss": 0.4846, "step": 18685 }, { "epoch": 0.5726982959421356, "grad_norm": 1.2946829402981848, "learning_rate": 4.071109609741027e-06, "loss": 0.643, "step": 18686 }, { "epoch": 0.5727289444648768, "grad_norm": 1.3382298293705739, "learning_rate": 4.070621936515191e-06, "loss": 0.6501, "step": 18687 }, { "epoch": 0.572759592987618, "grad_norm": 1.1659245696706744, "learning_rate": 4.070134272446755e-06, "loss": 0.6494, "step": 18688 }, { "epoch": 0.5727902415103592, "grad_norm": 1.2266659599779506, "learning_rate": 4.069646617540525e-06, "loss": 0.5904, "step": 18689 }, { "epoch": 0.5728208900331004, "grad_norm": 0.45685483736166155, "learning_rate": 4.069158971801304e-06, "loss": 0.3992, "step": 18690 }, { "epoch": 0.5728515385558416, "grad_norm": 1.3179784613444852, "learning_rate": 4.068671335233898e-06, "loss": 0.6745, "step": 18691 }, { "epoch": 0.5728821870785828, "grad_norm": 0.5398074235430057, "learning_rate": 4.06818370784311e-06, "loss": 0.3833, "step": 18692 }, { "epoch": 0.572912835601324, "grad_norm": 1.5621247477813605, "learning_rate": 4.067696089633749e-06, "loss": 0.7126, "step": 18693 }, { "epoch": 0.5729434841240653, "grad_norm": 1.2063343781505327, "learning_rate": 4.067208480610617e-06, "loss": 0.6045, "step": 18694 }, { "epoch": 0.5729741326468064, "grad_norm": 1.3389627055254858, "learning_rate": 4.066720880778516e-06, "loss": 0.7198, "step": 18695 }, { "epoch": 0.5730047811695477, "grad_norm": 0.452473423033942, "learning_rate": 4.0662332901422545e-06, "loss": 0.4072, "step": 18696 }, { "epoch": 0.5730354296922888, "grad_norm": 1.2525198108768403, "learning_rate": 4.065745708706636e-06, "loss": 0.7199, "step": 18697 }, { "epoch": 0.5730660782150301, "grad_norm": 1.338018924743876, "learning_rate": 4.065258136476462e-06, "loss": 0.6084, "step": 18698 }, { "epoch": 0.5730967267377712, "grad_norm": 1.1369942165848168, "learning_rate": 4.06477057345654e-06, "loss": 0.5518, "step": 18699 }, { "epoch": 0.5731273752605125, "grad_norm": 1.119704004812326, "learning_rate": 4.064283019651674e-06, "loss": 0.6403, "step": 18700 }, { "epoch": 0.5731580237832536, "grad_norm": 0.4326315476800385, "learning_rate": 4.063795475066664e-06, "loss": 0.3855, "step": 18701 }, { "epoch": 0.5731886723059949, "grad_norm": 1.4009476766897087, "learning_rate": 4.063307939706319e-06, "loss": 0.6578, "step": 18702 }, { "epoch": 0.5732193208287361, "grad_norm": 1.3204183811968413, "learning_rate": 4.062820413575438e-06, "loss": 0.5985, "step": 18703 }, { "epoch": 0.5732499693514772, "grad_norm": 1.3446944989914609, "learning_rate": 4.062332896678831e-06, "loss": 0.5663, "step": 18704 }, { "epoch": 0.5732806178742185, "grad_norm": 1.2483552311190937, "learning_rate": 4.061845389021296e-06, "loss": 0.5859, "step": 18705 }, { "epoch": 0.5733112663969596, "grad_norm": 0.45348393756352595, "learning_rate": 4.061357890607638e-06, "loss": 0.4061, "step": 18706 }, { "epoch": 0.5733419149197009, "grad_norm": 0.4570117106747504, "learning_rate": 4.060870401442661e-06, "loss": 0.3922, "step": 18707 }, { "epoch": 0.573372563442442, "grad_norm": 0.438569104686788, "learning_rate": 4.060382921531169e-06, "loss": 0.3939, "step": 18708 }, { "epoch": 0.5734032119651833, "grad_norm": 1.3085138452206224, "learning_rate": 4.059895450877963e-06, "loss": 0.6118, "step": 18709 }, { "epoch": 0.5734338604879244, "grad_norm": 1.3771385100254758, "learning_rate": 4.059407989487849e-06, "loss": 0.6152, "step": 18710 }, { "epoch": 0.5734645090106657, "grad_norm": 1.2743439796102283, "learning_rate": 4.058920537365627e-06, "loss": 0.5398, "step": 18711 }, { "epoch": 0.5734951575334069, "grad_norm": 1.2300464393421882, "learning_rate": 4.058433094516105e-06, "loss": 0.59, "step": 18712 }, { "epoch": 0.5735258060561481, "grad_norm": 1.094361560831324, "learning_rate": 4.057945660944081e-06, "loss": 0.6273, "step": 18713 }, { "epoch": 0.5735564545788893, "grad_norm": 0.45002163560743136, "learning_rate": 4.057458236654358e-06, "loss": 0.3883, "step": 18714 }, { "epoch": 0.5735871031016305, "grad_norm": 1.2328617976845428, "learning_rate": 4.056970821651742e-06, "loss": 0.6647, "step": 18715 }, { "epoch": 0.5736177516243717, "grad_norm": 1.2788366937863438, "learning_rate": 4.056483415941033e-06, "loss": 0.5979, "step": 18716 }, { "epoch": 0.5736484001471129, "grad_norm": 1.4344041973127435, "learning_rate": 4.055996019527034e-06, "loss": 0.6886, "step": 18717 }, { "epoch": 0.5736790486698541, "grad_norm": 1.3550042992776794, "learning_rate": 4.0555086324145484e-06, "loss": 0.6339, "step": 18718 }, { "epoch": 0.5737096971925953, "grad_norm": 1.3381838054580208, "learning_rate": 4.055021254608377e-06, "loss": 0.6514, "step": 18719 }, { "epoch": 0.5737403457153365, "grad_norm": 1.5039863042199115, "learning_rate": 4.054533886113324e-06, "loss": 0.7136, "step": 18720 }, { "epoch": 0.5737709942380778, "grad_norm": 1.1308072429487355, "learning_rate": 4.05404652693419e-06, "loss": 0.6526, "step": 18721 }, { "epoch": 0.5738016427608189, "grad_norm": 1.2941860651727564, "learning_rate": 4.053559177075777e-06, "loss": 0.7112, "step": 18722 }, { "epoch": 0.5738322912835602, "grad_norm": 1.1312032035389183, "learning_rate": 4.053071836542889e-06, "loss": 0.6174, "step": 18723 }, { "epoch": 0.5738629398063013, "grad_norm": 1.2843587334841038, "learning_rate": 4.052584505340327e-06, "loss": 0.5573, "step": 18724 }, { "epoch": 0.5738935883290426, "grad_norm": 1.229841445522982, "learning_rate": 4.052097183472889e-06, "loss": 0.5763, "step": 18725 }, { "epoch": 0.5739242368517837, "grad_norm": 1.233208351931889, "learning_rate": 4.0516098709453835e-06, "loss": 0.5823, "step": 18726 }, { "epoch": 0.573954885374525, "grad_norm": 1.1449376808622085, "learning_rate": 4.051122567762608e-06, "loss": 0.6112, "step": 18727 }, { "epoch": 0.5739855338972661, "grad_norm": 1.2048276681871821, "learning_rate": 4.050635273929362e-06, "loss": 0.5855, "step": 18728 }, { "epoch": 0.5740161824200074, "grad_norm": 1.307426173625045, "learning_rate": 4.050147989450452e-06, "loss": 0.6532, "step": 18729 }, { "epoch": 0.5740468309427486, "grad_norm": 1.3839477406904204, "learning_rate": 4.049660714330676e-06, "loss": 0.6203, "step": 18730 }, { "epoch": 0.5740774794654898, "grad_norm": 1.1627417299026699, "learning_rate": 4.049173448574836e-06, "loss": 0.5786, "step": 18731 }, { "epoch": 0.574108127988231, "grad_norm": 1.1722336827618336, "learning_rate": 4.0486861921877345e-06, "loss": 0.6074, "step": 18732 }, { "epoch": 0.5741387765109722, "grad_norm": 0.53364183782232, "learning_rate": 4.048198945174169e-06, "loss": 0.4128, "step": 18733 }, { "epoch": 0.5741694250337134, "grad_norm": 1.3170355013645862, "learning_rate": 4.047711707538945e-06, "loss": 0.6875, "step": 18734 }, { "epoch": 0.5742000735564545, "grad_norm": 1.1405349417470025, "learning_rate": 4.047224479286862e-06, "loss": 0.6416, "step": 18735 }, { "epoch": 0.5742307220791958, "grad_norm": 1.2587334065206102, "learning_rate": 4.0467372604227175e-06, "loss": 0.5962, "step": 18736 }, { "epoch": 0.5742613706019369, "grad_norm": 1.226745420285116, "learning_rate": 4.046250050951316e-06, "loss": 0.7091, "step": 18737 }, { "epoch": 0.5742920191246782, "grad_norm": 1.2286383777916003, "learning_rate": 4.045762850877456e-06, "loss": 0.6154, "step": 18738 }, { "epoch": 0.5743226676474193, "grad_norm": 1.453661243407025, "learning_rate": 4.045275660205939e-06, "loss": 0.6764, "step": 18739 }, { "epoch": 0.5743533161701606, "grad_norm": 1.321278493988205, "learning_rate": 4.044788478941566e-06, "loss": 0.6754, "step": 18740 }, { "epoch": 0.5743839646929018, "grad_norm": 1.3992363536462513, "learning_rate": 4.044301307089134e-06, "loss": 0.6446, "step": 18741 }, { "epoch": 0.574414613215643, "grad_norm": 1.1897801872483862, "learning_rate": 4.043814144653449e-06, "loss": 0.548, "step": 18742 }, { "epoch": 0.5744452617383842, "grad_norm": 0.4379394836935086, "learning_rate": 4.043326991639308e-06, "loss": 0.398, "step": 18743 }, { "epoch": 0.5744759102611254, "grad_norm": 0.4445134373182676, "learning_rate": 4.0428398480515074e-06, "loss": 0.3937, "step": 18744 }, { "epoch": 0.5745065587838666, "grad_norm": 1.2793035410702598, "learning_rate": 4.042352713894854e-06, "loss": 0.6929, "step": 18745 }, { "epoch": 0.5745372073066078, "grad_norm": 0.4463358245552104, "learning_rate": 4.041865589174141e-06, "loss": 0.4072, "step": 18746 }, { "epoch": 0.574567855829349, "grad_norm": 1.3609628247049854, "learning_rate": 4.0413784738941755e-06, "loss": 0.6561, "step": 18747 }, { "epoch": 0.5745985043520903, "grad_norm": 1.3456518078433584, "learning_rate": 4.040891368059752e-06, "loss": 0.6672, "step": 18748 }, { "epoch": 0.5746291528748314, "grad_norm": 1.4218289181849004, "learning_rate": 4.040404271675669e-06, "loss": 0.6226, "step": 18749 }, { "epoch": 0.5746598013975727, "grad_norm": 1.4099341739238769, "learning_rate": 4.03991718474673e-06, "loss": 0.5656, "step": 18750 }, { "epoch": 0.5746904499203138, "grad_norm": 1.213473179875772, "learning_rate": 4.0394301072777335e-06, "loss": 0.5779, "step": 18751 }, { "epoch": 0.5747210984430551, "grad_norm": 1.2824307106184059, "learning_rate": 4.038943039273476e-06, "loss": 0.5938, "step": 18752 }, { "epoch": 0.5747517469657962, "grad_norm": 0.4404099821984737, "learning_rate": 4.038455980738759e-06, "loss": 0.4232, "step": 18753 }, { "epoch": 0.5747823954885375, "grad_norm": 1.419377425530004, "learning_rate": 4.037968931678383e-06, "loss": 0.6754, "step": 18754 }, { "epoch": 0.5748130440112786, "grad_norm": 1.3064179608477207, "learning_rate": 4.037481892097143e-06, "loss": 0.5949, "step": 18755 }, { "epoch": 0.5748436925340199, "grad_norm": 0.4493674564067299, "learning_rate": 4.036994861999842e-06, "loss": 0.3943, "step": 18756 }, { "epoch": 0.574874341056761, "grad_norm": 1.253567805861592, "learning_rate": 4.036507841391274e-06, "loss": 0.5974, "step": 18757 }, { "epoch": 0.5749049895795023, "grad_norm": 1.2286906381032079, "learning_rate": 4.036020830276245e-06, "loss": 0.6748, "step": 18758 }, { "epoch": 0.5749356381022435, "grad_norm": 1.161933577405172, "learning_rate": 4.0355338286595465e-06, "loss": 0.5995, "step": 18759 }, { "epoch": 0.5749662866249847, "grad_norm": 1.2837979281609686, "learning_rate": 4.035046836545981e-06, "loss": 0.6138, "step": 18760 }, { "epoch": 0.5749969351477259, "grad_norm": 0.4469653209278358, "learning_rate": 4.034559853940346e-06, "loss": 0.4141, "step": 18761 }, { "epoch": 0.5750275836704671, "grad_norm": 1.4906690114051584, "learning_rate": 4.0340728808474395e-06, "loss": 0.6682, "step": 18762 }, { "epoch": 0.5750582321932083, "grad_norm": 1.3687562706748233, "learning_rate": 4.03358591727206e-06, "loss": 0.6314, "step": 18763 }, { "epoch": 0.5750888807159495, "grad_norm": 1.1389728412216, "learning_rate": 4.033098963219006e-06, "loss": 0.6217, "step": 18764 }, { "epoch": 0.5751195292386907, "grad_norm": 1.3072626536548937, "learning_rate": 4.032612018693073e-06, "loss": 0.6652, "step": 18765 }, { "epoch": 0.5751501777614318, "grad_norm": 1.3879000598121347, "learning_rate": 4.032125083699064e-06, "loss": 0.6347, "step": 18766 }, { "epoch": 0.5751808262841731, "grad_norm": 1.2615380466277741, "learning_rate": 4.031638158241775e-06, "loss": 0.6529, "step": 18767 }, { "epoch": 0.5752114748069143, "grad_norm": 1.428035815684783, "learning_rate": 4.031151242326e-06, "loss": 0.6714, "step": 18768 }, { "epoch": 0.5752421233296555, "grad_norm": 1.2637129256696062, "learning_rate": 4.0306643359565426e-06, "loss": 0.6234, "step": 18769 }, { "epoch": 0.5752727718523967, "grad_norm": 1.3009961168870388, "learning_rate": 4.030177439138197e-06, "loss": 0.6468, "step": 18770 }, { "epoch": 0.5753034203751379, "grad_norm": 0.46676520244876546, "learning_rate": 4.029690551875759e-06, "loss": 0.4233, "step": 18771 }, { "epoch": 0.5753340688978791, "grad_norm": 1.3430531261716674, "learning_rate": 4.02920367417403e-06, "loss": 0.6072, "step": 18772 }, { "epoch": 0.5753647174206203, "grad_norm": 1.4475716488920618, "learning_rate": 4.028716806037804e-06, "loss": 0.6443, "step": 18773 }, { "epoch": 0.5753953659433615, "grad_norm": 1.2688694818829334, "learning_rate": 4.028229947471881e-06, "loss": 0.6044, "step": 18774 }, { "epoch": 0.5754260144661028, "grad_norm": 0.4631103313975537, "learning_rate": 4.027743098481058e-06, "loss": 0.3937, "step": 18775 }, { "epoch": 0.5754566629888439, "grad_norm": 1.3427875491451786, "learning_rate": 4.0272562590701295e-06, "loss": 0.5948, "step": 18776 }, { "epoch": 0.5754873115115852, "grad_norm": 1.2712499541589013, "learning_rate": 4.026769429243894e-06, "loss": 0.633, "step": 18777 }, { "epoch": 0.5755179600343263, "grad_norm": 1.1963733596090493, "learning_rate": 4.0262826090071505e-06, "loss": 0.5722, "step": 18778 }, { "epoch": 0.5755486085570676, "grad_norm": 1.2241598123493127, "learning_rate": 4.02579579836469e-06, "loss": 0.6731, "step": 18779 }, { "epoch": 0.5755792570798087, "grad_norm": 1.4520714996078048, "learning_rate": 4.025308997321316e-06, "loss": 0.6298, "step": 18780 }, { "epoch": 0.57560990560255, "grad_norm": 1.2054653315502395, "learning_rate": 4.0248222058818206e-06, "loss": 0.6204, "step": 18781 }, { "epoch": 0.5756405541252911, "grad_norm": 1.2405794474019258, "learning_rate": 4.024335424051001e-06, "loss": 0.571, "step": 18782 }, { "epoch": 0.5756712026480324, "grad_norm": 1.5289369933930625, "learning_rate": 4.023848651833655e-06, "loss": 0.6982, "step": 18783 }, { "epoch": 0.5757018511707735, "grad_norm": 1.5414633567719154, "learning_rate": 4.023361889234576e-06, "loss": 0.5525, "step": 18784 }, { "epoch": 0.5757324996935148, "grad_norm": 1.3430186543446807, "learning_rate": 4.022875136258564e-06, "loss": 0.576, "step": 18785 }, { "epoch": 0.575763148216256, "grad_norm": 1.3342796512885278, "learning_rate": 4.022388392910413e-06, "loss": 0.6641, "step": 18786 }, { "epoch": 0.5757937967389972, "grad_norm": 1.305295556244005, "learning_rate": 4.021901659194919e-06, "loss": 0.6327, "step": 18787 }, { "epoch": 0.5758244452617384, "grad_norm": 1.3568002623106814, "learning_rate": 4.02141493511688e-06, "loss": 0.6999, "step": 18788 }, { "epoch": 0.5758550937844796, "grad_norm": 1.4048580740791328, "learning_rate": 4.020928220681089e-06, "loss": 0.519, "step": 18789 }, { "epoch": 0.5758857423072208, "grad_norm": 1.3366111716859772, "learning_rate": 4.020441515892341e-06, "loss": 0.6041, "step": 18790 }, { "epoch": 0.575916390829962, "grad_norm": 1.5228518022269448, "learning_rate": 4.019954820755435e-06, "loss": 0.7483, "step": 18791 }, { "epoch": 0.5759470393527032, "grad_norm": 1.3163592102767256, "learning_rate": 4.019468135275164e-06, "loss": 0.5702, "step": 18792 }, { "epoch": 0.5759776878754445, "grad_norm": 1.428029256500898, "learning_rate": 4.018981459456325e-06, "loss": 0.6679, "step": 18793 }, { "epoch": 0.5760083363981856, "grad_norm": 1.2908457552539423, "learning_rate": 4.018494793303714e-06, "loss": 0.7157, "step": 18794 }, { "epoch": 0.5760389849209269, "grad_norm": 1.3290146507260685, "learning_rate": 4.018008136822122e-06, "loss": 0.6166, "step": 18795 }, { "epoch": 0.576069633443668, "grad_norm": 1.2516934970414915, "learning_rate": 4.0175214900163485e-06, "loss": 0.6211, "step": 18796 }, { "epoch": 0.5761002819664092, "grad_norm": 0.45587298939559906, "learning_rate": 4.017034852891189e-06, "loss": 0.391, "step": 18797 }, { "epoch": 0.5761309304891504, "grad_norm": 1.3841912875514462, "learning_rate": 4.0165482254514325e-06, "loss": 0.6864, "step": 18798 }, { "epoch": 0.5761615790118916, "grad_norm": 1.3737202212786095, "learning_rate": 4.0160616077018826e-06, "loss": 0.6838, "step": 18799 }, { "epoch": 0.5761922275346328, "grad_norm": 1.33453341277512, "learning_rate": 4.015574999647324e-06, "loss": 0.5649, "step": 18800 }, { "epoch": 0.576222876057374, "grad_norm": 1.1754169234621386, "learning_rate": 4.0150884012925614e-06, "loss": 0.5837, "step": 18801 }, { "epoch": 0.5762535245801153, "grad_norm": 1.1232535851695764, "learning_rate": 4.014601812642384e-06, "loss": 0.6447, "step": 18802 }, { "epoch": 0.5762841731028564, "grad_norm": 1.2606072839385836, "learning_rate": 4.0141152337015854e-06, "loss": 0.5973, "step": 18803 }, { "epoch": 0.5763148216255977, "grad_norm": 1.323087550840237, "learning_rate": 4.013628664474963e-06, "loss": 0.6692, "step": 18804 }, { "epoch": 0.5763454701483388, "grad_norm": 1.2161459989137078, "learning_rate": 4.01314210496731e-06, "loss": 0.6322, "step": 18805 }, { "epoch": 0.5763761186710801, "grad_norm": 1.3764794269514717, "learning_rate": 4.012655555183419e-06, "loss": 0.6276, "step": 18806 }, { "epoch": 0.5764067671938212, "grad_norm": 1.1963539537017411, "learning_rate": 4.012169015128086e-06, "loss": 0.6311, "step": 18807 }, { "epoch": 0.5764374157165625, "grad_norm": 1.21839235146211, "learning_rate": 4.0116824848061065e-06, "loss": 0.623, "step": 18808 }, { "epoch": 0.5764680642393036, "grad_norm": 1.1950228827606182, "learning_rate": 4.011195964222268e-06, "loss": 0.5042, "step": 18809 }, { "epoch": 0.5764987127620449, "grad_norm": 1.2284514096522363, "learning_rate": 4.010709453381373e-06, "loss": 0.5579, "step": 18810 }, { "epoch": 0.576529361284786, "grad_norm": 1.2399233351864254, "learning_rate": 4.010222952288207e-06, "loss": 0.5574, "step": 18811 }, { "epoch": 0.5765600098075273, "grad_norm": 1.4691284196395682, "learning_rate": 4.009736460947571e-06, "loss": 0.6407, "step": 18812 }, { "epoch": 0.5765906583302685, "grad_norm": 1.2991091953463636, "learning_rate": 4.009249979364254e-06, "loss": 0.5589, "step": 18813 }, { "epoch": 0.5766213068530097, "grad_norm": 1.2595083472954984, "learning_rate": 4.008763507543048e-06, "loss": 0.5927, "step": 18814 }, { "epoch": 0.5766519553757509, "grad_norm": 1.3240612265358673, "learning_rate": 4.0082770454887514e-06, "loss": 0.6227, "step": 18815 }, { "epoch": 0.5766826038984921, "grad_norm": 1.4600871518589131, "learning_rate": 4.007790593206154e-06, "loss": 0.6468, "step": 18816 }, { "epoch": 0.5767132524212333, "grad_norm": 3.564061196342189, "learning_rate": 4.00730415070005e-06, "loss": 0.6968, "step": 18817 }, { "epoch": 0.5767439009439745, "grad_norm": 1.1615825578457788, "learning_rate": 4.006817717975232e-06, "loss": 0.6722, "step": 18818 }, { "epoch": 0.5767745494667157, "grad_norm": 1.2985259963707374, "learning_rate": 4.0063312950364925e-06, "loss": 0.6792, "step": 18819 }, { "epoch": 0.576805197989457, "grad_norm": 1.1909988318635196, "learning_rate": 4.005844881888626e-06, "loss": 0.6368, "step": 18820 }, { "epoch": 0.5768358465121981, "grad_norm": 1.193488103554199, "learning_rate": 4.005358478536425e-06, "loss": 0.5971, "step": 18821 }, { "epoch": 0.5768664950349394, "grad_norm": 1.3968161109326378, "learning_rate": 4.004872084984679e-06, "loss": 0.6302, "step": 18822 }, { "epoch": 0.5768971435576805, "grad_norm": 1.2539357111870557, "learning_rate": 4.0043857012381855e-06, "loss": 0.6226, "step": 18823 }, { "epoch": 0.5769277920804218, "grad_norm": 1.2904963717355296, "learning_rate": 4.003899327301733e-06, "loss": 0.7072, "step": 18824 }, { "epoch": 0.5769584406031629, "grad_norm": 1.2492495828226826, "learning_rate": 4.003412963180115e-06, "loss": 0.6078, "step": 18825 }, { "epoch": 0.5769890891259042, "grad_norm": 1.2372829832681134, "learning_rate": 4.002926608878125e-06, "loss": 0.5228, "step": 18826 }, { "epoch": 0.5770197376486453, "grad_norm": 1.3688758874304272, "learning_rate": 4.002440264400553e-06, "loss": 0.6682, "step": 18827 }, { "epoch": 0.5770503861713865, "grad_norm": 1.3892444617363529, "learning_rate": 4.001953929752193e-06, "loss": 0.6866, "step": 18828 }, { "epoch": 0.5770810346941277, "grad_norm": 0.4776495222744547, "learning_rate": 4.001467604937837e-06, "loss": 0.4036, "step": 18829 }, { "epoch": 0.5771116832168689, "grad_norm": 1.2121331604409749, "learning_rate": 4.000981289962275e-06, "loss": 0.6002, "step": 18830 }, { "epoch": 0.5771423317396102, "grad_norm": 1.351524524225631, "learning_rate": 4.000494984830301e-06, "loss": 0.6303, "step": 18831 }, { "epoch": 0.5771729802623513, "grad_norm": 1.1336319212544004, "learning_rate": 4.000008689546707e-06, "loss": 0.6107, "step": 18832 }, { "epoch": 0.5772036287850926, "grad_norm": 1.2814334223628419, "learning_rate": 3.99952240411628e-06, "loss": 0.7062, "step": 18833 }, { "epoch": 0.5772342773078337, "grad_norm": 1.2641691372730688, "learning_rate": 3.999036128543817e-06, "loss": 0.6996, "step": 18834 }, { "epoch": 0.577264925830575, "grad_norm": 1.3313002171822237, "learning_rate": 3.998549862834106e-06, "loss": 0.679, "step": 18835 }, { "epoch": 0.5772955743533161, "grad_norm": 1.0743995037226641, "learning_rate": 3.998063606991939e-06, "loss": 0.5495, "step": 18836 }, { "epoch": 0.5773262228760574, "grad_norm": 1.230968005953193, "learning_rate": 3.997577361022109e-06, "loss": 0.6001, "step": 18837 }, { "epoch": 0.5773568713987985, "grad_norm": 1.266313327151456, "learning_rate": 3.997091124929404e-06, "loss": 0.624, "step": 18838 }, { "epoch": 0.5773875199215398, "grad_norm": 1.2396082538516742, "learning_rate": 3.996604898718618e-06, "loss": 0.6674, "step": 18839 }, { "epoch": 0.577418168444281, "grad_norm": 1.3232316614076947, "learning_rate": 3.996118682394542e-06, "loss": 0.5765, "step": 18840 }, { "epoch": 0.5774488169670222, "grad_norm": 1.2139268656147582, "learning_rate": 3.995632475961962e-06, "loss": 0.5957, "step": 18841 }, { "epoch": 0.5774794654897634, "grad_norm": 1.3495444347353367, "learning_rate": 3.995146279425676e-06, "loss": 0.6813, "step": 18842 }, { "epoch": 0.5775101140125046, "grad_norm": 1.3359662672699835, "learning_rate": 3.9946600927904695e-06, "loss": 0.7263, "step": 18843 }, { "epoch": 0.5775407625352458, "grad_norm": 1.2699353835201665, "learning_rate": 3.994173916061133e-06, "loss": 0.6967, "step": 18844 }, { "epoch": 0.577571411057987, "grad_norm": 1.234730607552108, "learning_rate": 3.993687749242459e-06, "loss": 0.6586, "step": 18845 }, { "epoch": 0.5776020595807282, "grad_norm": 1.3842754767817642, "learning_rate": 3.993201592339237e-06, "loss": 0.6443, "step": 18846 }, { "epoch": 0.5776327081034694, "grad_norm": 1.3694357752486959, "learning_rate": 3.992715445356258e-06, "loss": 0.654, "step": 18847 }, { "epoch": 0.5776633566262106, "grad_norm": 1.4036811848698305, "learning_rate": 3.992229308298311e-06, "loss": 0.5966, "step": 18848 }, { "epoch": 0.5776940051489519, "grad_norm": 1.2459494952655255, "learning_rate": 3.991743181170186e-06, "loss": 0.6483, "step": 18849 }, { "epoch": 0.577724653671693, "grad_norm": 1.5163891441380635, "learning_rate": 3.991257063976673e-06, "loss": 0.6762, "step": 18850 }, { "epoch": 0.5777553021944343, "grad_norm": 1.2502813025439656, "learning_rate": 3.990770956722565e-06, "loss": 0.5348, "step": 18851 }, { "epoch": 0.5777859507171754, "grad_norm": 1.3395669604124854, "learning_rate": 3.990284859412646e-06, "loss": 0.646, "step": 18852 }, { "epoch": 0.5778165992399167, "grad_norm": 1.3220468085556467, "learning_rate": 3.989798772051711e-06, "loss": 0.6262, "step": 18853 }, { "epoch": 0.5778472477626578, "grad_norm": 1.1262809812656918, "learning_rate": 3.9893126946445435e-06, "loss": 0.5856, "step": 18854 }, { "epoch": 0.5778778962853991, "grad_norm": 0.4869736436872766, "learning_rate": 3.98882662719594e-06, "loss": 0.4209, "step": 18855 }, { "epoch": 0.5779085448081402, "grad_norm": 1.5026395415244151, "learning_rate": 3.988340569710686e-06, "loss": 0.7485, "step": 18856 }, { "epoch": 0.5779391933308815, "grad_norm": 1.3175137263661298, "learning_rate": 3.98785452219357e-06, "loss": 0.6363, "step": 18857 }, { "epoch": 0.5779698418536227, "grad_norm": 1.412880192294585, "learning_rate": 3.9873684846493835e-06, "loss": 0.554, "step": 18858 }, { "epoch": 0.5780004903763638, "grad_norm": 1.4359403341397985, "learning_rate": 3.986882457082914e-06, "loss": 0.612, "step": 18859 }, { "epoch": 0.5780311388991051, "grad_norm": 1.0933987503737888, "learning_rate": 3.98639643949895e-06, "loss": 0.5947, "step": 18860 }, { "epoch": 0.5780617874218462, "grad_norm": 1.3968438057274115, "learning_rate": 3.985910431902282e-06, "loss": 0.5723, "step": 18861 }, { "epoch": 0.5780924359445875, "grad_norm": 0.43352392307206605, "learning_rate": 3.985424434297699e-06, "loss": 0.3784, "step": 18862 }, { "epoch": 0.5781230844673286, "grad_norm": 1.3801810595227748, "learning_rate": 3.984938446689987e-06, "loss": 0.5694, "step": 18863 }, { "epoch": 0.5781537329900699, "grad_norm": 1.383553519622045, "learning_rate": 3.9844524690839376e-06, "loss": 0.6702, "step": 18864 }, { "epoch": 0.578184381512811, "grad_norm": 1.2295301958118785, "learning_rate": 3.983966501484336e-06, "loss": 0.6552, "step": 18865 }, { "epoch": 0.5782150300355523, "grad_norm": 1.2584642680121507, "learning_rate": 3.983480543895974e-06, "loss": 0.6248, "step": 18866 }, { "epoch": 0.5782456785582935, "grad_norm": 1.2686804440747386, "learning_rate": 3.982994596323638e-06, "loss": 0.582, "step": 18867 }, { "epoch": 0.5782763270810347, "grad_norm": 1.427384005158949, "learning_rate": 3.982508658772116e-06, "loss": 0.6949, "step": 18868 }, { "epoch": 0.5783069756037759, "grad_norm": 1.2764877876299532, "learning_rate": 3.982022731246197e-06, "loss": 0.6159, "step": 18869 }, { "epoch": 0.5783376241265171, "grad_norm": 1.4105347851779746, "learning_rate": 3.981536813750668e-06, "loss": 0.7199, "step": 18870 }, { "epoch": 0.5783682726492583, "grad_norm": 0.4699698057065989, "learning_rate": 3.981050906290317e-06, "loss": 0.4326, "step": 18871 }, { "epoch": 0.5783989211719995, "grad_norm": 1.2003606715682458, "learning_rate": 3.980565008869933e-06, "loss": 0.6563, "step": 18872 }, { "epoch": 0.5784295696947407, "grad_norm": 1.2633795080401033, "learning_rate": 3.9800791214943015e-06, "loss": 0.5578, "step": 18873 }, { "epoch": 0.578460218217482, "grad_norm": 1.3610802701465132, "learning_rate": 3.979593244168214e-06, "loss": 0.6014, "step": 18874 }, { "epoch": 0.5784908667402231, "grad_norm": 1.2774406955843225, "learning_rate": 3.979107376896454e-06, "loss": 0.687, "step": 18875 }, { "epoch": 0.5785215152629644, "grad_norm": 1.364827097625007, "learning_rate": 3.978621519683808e-06, "loss": 0.5644, "step": 18876 }, { "epoch": 0.5785521637857055, "grad_norm": 1.2000399795904144, "learning_rate": 3.97813567253507e-06, "loss": 0.5975, "step": 18877 }, { "epoch": 0.5785828123084468, "grad_norm": 1.2358805684817407, "learning_rate": 3.97764983545502e-06, "loss": 0.5949, "step": 18878 }, { "epoch": 0.5786134608311879, "grad_norm": 1.2328413905970317, "learning_rate": 3.977164008448447e-06, "loss": 0.6099, "step": 18879 }, { "epoch": 0.5786441093539292, "grad_norm": 1.0880721568899123, "learning_rate": 3.976678191520141e-06, "loss": 0.6218, "step": 18880 }, { "epoch": 0.5786747578766703, "grad_norm": 1.4166175785661963, "learning_rate": 3.976192384674884e-06, "loss": 0.5916, "step": 18881 }, { "epoch": 0.5787054063994116, "grad_norm": 1.2678723370597922, "learning_rate": 3.9757065879174665e-06, "loss": 0.7111, "step": 18882 }, { "epoch": 0.5787360549221527, "grad_norm": 1.1742608895176472, "learning_rate": 3.975220801252674e-06, "loss": 0.5847, "step": 18883 }, { "epoch": 0.578766703444894, "grad_norm": 1.3665317357026525, "learning_rate": 3.974735024685293e-06, "loss": 0.7495, "step": 18884 }, { "epoch": 0.5787973519676352, "grad_norm": 1.373798245991134, "learning_rate": 3.974249258220112e-06, "loss": 0.6122, "step": 18885 }, { "epoch": 0.5788280004903764, "grad_norm": 1.2239644885865817, "learning_rate": 3.973763501861914e-06, "loss": 0.5541, "step": 18886 }, { "epoch": 0.5788586490131176, "grad_norm": 1.3882206470259115, "learning_rate": 3.973277755615486e-06, "loss": 0.5974, "step": 18887 }, { "epoch": 0.5788892975358588, "grad_norm": 1.3399447042587178, "learning_rate": 3.972792019485616e-06, "loss": 0.5759, "step": 18888 }, { "epoch": 0.5789199460586, "grad_norm": 1.303100552840462, "learning_rate": 3.9723062934770895e-06, "loss": 0.5404, "step": 18889 }, { "epoch": 0.5789505945813411, "grad_norm": 1.4726731276633855, "learning_rate": 3.97182057759469e-06, "loss": 0.6255, "step": 18890 }, { "epoch": 0.5789812431040824, "grad_norm": 1.212840478215131, "learning_rate": 3.971334871843207e-06, "loss": 0.7038, "step": 18891 }, { "epoch": 0.5790118916268235, "grad_norm": 1.323637087782533, "learning_rate": 3.970849176227424e-06, "loss": 0.6809, "step": 18892 }, { "epoch": 0.5790425401495648, "grad_norm": 1.3922380952734652, "learning_rate": 3.9703634907521285e-06, "loss": 0.6672, "step": 18893 }, { "epoch": 0.579073188672306, "grad_norm": 1.4041482555110727, "learning_rate": 3.969877815422106e-06, "loss": 0.6709, "step": 18894 }, { "epoch": 0.5791038371950472, "grad_norm": 1.4694616222882602, "learning_rate": 3.969392150242136e-06, "loss": 0.6088, "step": 18895 }, { "epoch": 0.5791344857177884, "grad_norm": 1.2575742460262036, "learning_rate": 3.968906495217014e-06, "loss": 0.6358, "step": 18896 }, { "epoch": 0.5791651342405296, "grad_norm": 1.185947878579291, "learning_rate": 3.968420850351519e-06, "loss": 0.6475, "step": 18897 }, { "epoch": 0.5791957827632708, "grad_norm": 0.47968350068091653, "learning_rate": 3.967935215650436e-06, "loss": 0.3923, "step": 18898 }, { "epoch": 0.579226431286012, "grad_norm": 0.45229476310742905, "learning_rate": 3.967449591118552e-06, "loss": 0.4073, "step": 18899 }, { "epoch": 0.5792570798087532, "grad_norm": 1.3331359022992693, "learning_rate": 3.966963976760651e-06, "loss": 0.6283, "step": 18900 }, { "epoch": 0.5792877283314944, "grad_norm": 1.2964226842496178, "learning_rate": 3.966478372581518e-06, "loss": 0.5908, "step": 18901 }, { "epoch": 0.5793183768542356, "grad_norm": 1.2652434191718194, "learning_rate": 3.965992778585939e-06, "loss": 0.5998, "step": 18902 }, { "epoch": 0.5793490253769769, "grad_norm": 1.3172546668127538, "learning_rate": 3.965507194778697e-06, "loss": 0.6751, "step": 18903 }, { "epoch": 0.579379673899718, "grad_norm": 1.1076475287466474, "learning_rate": 3.965021621164577e-06, "loss": 0.5752, "step": 18904 }, { "epoch": 0.5794103224224593, "grad_norm": 1.1961085317530191, "learning_rate": 3.964536057748366e-06, "loss": 0.5689, "step": 18905 }, { "epoch": 0.5794409709452004, "grad_norm": 1.1979672792545126, "learning_rate": 3.964050504534844e-06, "loss": 0.5367, "step": 18906 }, { "epoch": 0.5794716194679417, "grad_norm": 1.2285516329825652, "learning_rate": 3.963564961528798e-06, "loss": 0.5699, "step": 18907 }, { "epoch": 0.5795022679906828, "grad_norm": 1.2963721611726564, "learning_rate": 3.9630794287350126e-06, "loss": 0.6217, "step": 18908 }, { "epoch": 0.5795329165134241, "grad_norm": 1.1734716964102914, "learning_rate": 3.962593906158269e-06, "loss": 0.5851, "step": 18909 }, { "epoch": 0.5795635650361652, "grad_norm": 1.313874098693241, "learning_rate": 3.962108393803354e-06, "loss": 0.6571, "step": 18910 }, { "epoch": 0.5795942135589065, "grad_norm": 1.1926142634438772, "learning_rate": 3.96162289167505e-06, "loss": 0.5835, "step": 18911 }, { "epoch": 0.5796248620816477, "grad_norm": 1.357390006567915, "learning_rate": 3.961137399778142e-06, "loss": 0.5804, "step": 18912 }, { "epoch": 0.5796555106043889, "grad_norm": 1.209222399723508, "learning_rate": 3.960651918117413e-06, "loss": 0.7374, "step": 18913 }, { "epoch": 0.5796861591271301, "grad_norm": 1.2709010776193943, "learning_rate": 3.960166446697645e-06, "loss": 0.6266, "step": 18914 }, { "epoch": 0.5797168076498713, "grad_norm": 1.3231842972816963, "learning_rate": 3.959680985523625e-06, "loss": 0.5894, "step": 18915 }, { "epoch": 0.5797474561726125, "grad_norm": 1.3135236762047908, "learning_rate": 3.959195534600136e-06, "loss": 0.6672, "step": 18916 }, { "epoch": 0.5797781046953537, "grad_norm": 1.4557157182563467, "learning_rate": 3.958710093931956e-06, "loss": 0.7341, "step": 18917 }, { "epoch": 0.5798087532180949, "grad_norm": 1.346204031325274, "learning_rate": 3.9582246635238745e-06, "loss": 0.6696, "step": 18918 }, { "epoch": 0.5798394017408361, "grad_norm": 1.4540144654112188, "learning_rate": 3.957739243380669e-06, "loss": 0.7002, "step": 18919 }, { "epoch": 0.5798700502635773, "grad_norm": 1.3558216890473707, "learning_rate": 3.957253833507129e-06, "loss": 0.6266, "step": 18920 }, { "epoch": 0.5799006987863184, "grad_norm": 1.5997756037627353, "learning_rate": 3.956768433908031e-06, "loss": 0.7161, "step": 18921 }, { "epoch": 0.5799313473090597, "grad_norm": 1.1678923458185184, "learning_rate": 3.9562830445881615e-06, "loss": 0.6561, "step": 18922 }, { "epoch": 0.5799619958318009, "grad_norm": 1.2723120919671838, "learning_rate": 3.9557976655523025e-06, "loss": 0.6265, "step": 18923 }, { "epoch": 0.5799926443545421, "grad_norm": 1.2502274747351154, "learning_rate": 3.955312296805237e-06, "loss": 0.6215, "step": 18924 }, { "epoch": 0.5800232928772833, "grad_norm": 1.337582660288115, "learning_rate": 3.954826938351745e-06, "loss": 0.5744, "step": 18925 }, { "epoch": 0.5800539414000245, "grad_norm": 1.2135368640308637, "learning_rate": 3.9543415901966115e-06, "loss": 0.6336, "step": 18926 }, { "epoch": 0.5800845899227657, "grad_norm": 1.5946714670354167, "learning_rate": 3.953856252344617e-06, "loss": 0.6836, "step": 18927 }, { "epoch": 0.5801152384455069, "grad_norm": 1.3297637829227815, "learning_rate": 3.953370924800546e-06, "loss": 0.666, "step": 18928 }, { "epoch": 0.5801458869682481, "grad_norm": 1.2121126920865797, "learning_rate": 3.952885607569179e-06, "loss": 0.6843, "step": 18929 }, { "epoch": 0.5801765354909894, "grad_norm": 1.2846619047819283, "learning_rate": 3.952400300655297e-06, "loss": 0.6963, "step": 18930 }, { "epoch": 0.5802071840137305, "grad_norm": 1.1728138898480833, "learning_rate": 3.951915004063683e-06, "loss": 0.5473, "step": 18931 }, { "epoch": 0.5802378325364718, "grad_norm": 1.1941198744736325, "learning_rate": 3.95142971779912e-06, "loss": 0.5496, "step": 18932 }, { "epoch": 0.5802684810592129, "grad_norm": 1.2868773993231086, "learning_rate": 3.950944441866386e-06, "loss": 0.6433, "step": 18933 }, { "epoch": 0.5802991295819542, "grad_norm": 1.345958306883375, "learning_rate": 3.950459176270267e-06, "loss": 0.6078, "step": 18934 }, { "epoch": 0.5803297781046953, "grad_norm": 1.3049565884690995, "learning_rate": 3.9499739210155405e-06, "loss": 0.6019, "step": 18935 }, { "epoch": 0.5803604266274366, "grad_norm": 1.2580007408611336, "learning_rate": 3.94948867610699e-06, "loss": 0.6269, "step": 18936 }, { "epoch": 0.5803910751501777, "grad_norm": 1.2568392294315462, "learning_rate": 3.949003441549398e-06, "loss": 0.5294, "step": 18937 }, { "epoch": 0.580421723672919, "grad_norm": 1.2201840369925248, "learning_rate": 3.948518217347541e-06, "loss": 0.6333, "step": 18938 }, { "epoch": 0.5804523721956601, "grad_norm": 1.2865796742625726, "learning_rate": 3.948033003506206e-06, "loss": 0.6434, "step": 18939 }, { "epoch": 0.5804830207184014, "grad_norm": 1.3803513895077113, "learning_rate": 3.94754780003017e-06, "loss": 0.7222, "step": 18940 }, { "epoch": 0.5805136692411426, "grad_norm": 1.3042753354268917, "learning_rate": 3.9470626069242145e-06, "loss": 0.6933, "step": 18941 }, { "epoch": 0.5805443177638838, "grad_norm": 1.2924895223404804, "learning_rate": 3.946577424193121e-06, "loss": 0.7069, "step": 18942 }, { "epoch": 0.580574966286625, "grad_norm": 1.510730002376932, "learning_rate": 3.94609225184167e-06, "loss": 0.6707, "step": 18943 }, { "epoch": 0.5806056148093662, "grad_norm": 1.5335995672812321, "learning_rate": 3.945607089874639e-06, "loss": 0.7022, "step": 18944 }, { "epoch": 0.5806362633321074, "grad_norm": 1.19802995265299, "learning_rate": 3.945121938296814e-06, "loss": 0.5613, "step": 18945 }, { "epoch": 0.5806669118548486, "grad_norm": 1.3486326864483575, "learning_rate": 3.94463679711297e-06, "loss": 0.7315, "step": 18946 }, { "epoch": 0.5806975603775898, "grad_norm": 1.271699424638412, "learning_rate": 3.9441516663278925e-06, "loss": 0.7004, "step": 18947 }, { "epoch": 0.580728208900331, "grad_norm": 1.2315435694930108, "learning_rate": 3.943666545946359e-06, "loss": 0.5612, "step": 18948 }, { "epoch": 0.5807588574230722, "grad_norm": 1.3488743541777992, "learning_rate": 3.9431814359731455e-06, "loss": 0.5936, "step": 18949 }, { "epoch": 0.5807895059458135, "grad_norm": 1.32734653725566, "learning_rate": 3.942696336413039e-06, "loss": 0.6079, "step": 18950 }, { "epoch": 0.5808201544685546, "grad_norm": 1.2281756174137883, "learning_rate": 3.942211247270816e-06, "loss": 0.6474, "step": 18951 }, { "epoch": 0.5808508029912958, "grad_norm": 0.5730150963260867, "learning_rate": 3.941726168551254e-06, "loss": 0.4055, "step": 18952 }, { "epoch": 0.580881451514037, "grad_norm": 1.3289047092932338, "learning_rate": 3.941241100259136e-06, "loss": 0.6696, "step": 18953 }, { "epoch": 0.5809121000367782, "grad_norm": 1.263923119848003, "learning_rate": 3.9407560423992405e-06, "loss": 0.597, "step": 18954 }, { "epoch": 0.5809427485595194, "grad_norm": 1.5465859357934293, "learning_rate": 3.940270994976347e-06, "loss": 0.6687, "step": 18955 }, { "epoch": 0.5809733970822606, "grad_norm": 1.2612887255511855, "learning_rate": 3.939785957995234e-06, "loss": 0.6082, "step": 18956 }, { "epoch": 0.5810040456050019, "grad_norm": 1.2889043714445507, "learning_rate": 3.9393009314606815e-06, "loss": 0.711, "step": 18957 }, { "epoch": 0.581034694127743, "grad_norm": 1.3243344131184793, "learning_rate": 3.938815915377468e-06, "loss": 0.6739, "step": 18958 }, { "epoch": 0.5810653426504843, "grad_norm": 1.354730650239171, "learning_rate": 3.938330909750374e-06, "loss": 0.6117, "step": 18959 }, { "epoch": 0.5810959911732254, "grad_norm": 0.45419094421297884, "learning_rate": 3.937845914584175e-06, "loss": 0.4152, "step": 18960 }, { "epoch": 0.5811266396959667, "grad_norm": 0.45259033192006304, "learning_rate": 3.937360929883654e-06, "loss": 0.4179, "step": 18961 }, { "epoch": 0.5811572882187078, "grad_norm": 1.479675954929991, "learning_rate": 3.936875955653587e-06, "loss": 0.6036, "step": 18962 }, { "epoch": 0.5811879367414491, "grad_norm": 0.4543360027667252, "learning_rate": 3.936390991898752e-06, "loss": 0.4157, "step": 18963 }, { "epoch": 0.5812185852641902, "grad_norm": 1.4242948797401387, "learning_rate": 3.93590603862393e-06, "loss": 0.712, "step": 18964 }, { "epoch": 0.5812492337869315, "grad_norm": 1.2388138239412663, "learning_rate": 3.935421095833898e-06, "loss": 0.5876, "step": 18965 }, { "epoch": 0.5812798823096726, "grad_norm": 1.217866483908995, "learning_rate": 3.934936163533434e-06, "loss": 0.7013, "step": 18966 }, { "epoch": 0.5813105308324139, "grad_norm": 0.4628171067244902, "learning_rate": 3.9344512417273165e-06, "loss": 0.4397, "step": 18967 }, { "epoch": 0.5813411793551551, "grad_norm": 0.447697485637214, "learning_rate": 3.9339663304203236e-06, "loss": 0.3843, "step": 18968 }, { "epoch": 0.5813718278778963, "grad_norm": 1.40674470312463, "learning_rate": 3.933481429617233e-06, "loss": 0.6512, "step": 18969 }, { "epoch": 0.5814024764006375, "grad_norm": 1.3349756408978772, "learning_rate": 3.932996539322825e-06, "loss": 0.6682, "step": 18970 }, { "epoch": 0.5814331249233787, "grad_norm": 1.4317561492555975, "learning_rate": 3.932511659541871e-06, "loss": 0.567, "step": 18971 }, { "epoch": 0.5814637734461199, "grad_norm": 1.3956124450827045, "learning_rate": 3.9320267902791564e-06, "loss": 0.6076, "step": 18972 }, { "epoch": 0.5814944219688611, "grad_norm": 1.2231823122551266, "learning_rate": 3.9315419315394525e-06, "loss": 0.588, "step": 18973 }, { "epoch": 0.5815250704916023, "grad_norm": 1.3235187602624703, "learning_rate": 3.931057083327541e-06, "loss": 0.6822, "step": 18974 }, { "epoch": 0.5815557190143436, "grad_norm": 1.446740868857148, "learning_rate": 3.930572245648197e-06, "loss": 0.5888, "step": 18975 }, { "epoch": 0.5815863675370847, "grad_norm": 1.2055338171871428, "learning_rate": 3.930087418506198e-06, "loss": 0.5466, "step": 18976 }, { "epoch": 0.581617016059826, "grad_norm": 1.3502854501854702, "learning_rate": 3.929602601906322e-06, "loss": 0.6826, "step": 18977 }, { "epoch": 0.5816476645825671, "grad_norm": 1.237488248214712, "learning_rate": 3.929117795853345e-06, "loss": 0.6487, "step": 18978 }, { "epoch": 0.5816783131053084, "grad_norm": 1.3631986345007459, "learning_rate": 3.928633000352043e-06, "loss": 0.6341, "step": 18979 }, { "epoch": 0.5817089616280495, "grad_norm": 1.4005125912745653, "learning_rate": 3.928148215407197e-06, "loss": 0.6263, "step": 18980 }, { "epoch": 0.5817396101507908, "grad_norm": 1.4109846253688816, "learning_rate": 3.927663441023578e-06, "loss": 0.687, "step": 18981 }, { "epoch": 0.5817702586735319, "grad_norm": 1.393061258276448, "learning_rate": 3.927178677205969e-06, "loss": 0.5537, "step": 18982 }, { "epoch": 0.5818009071962731, "grad_norm": 1.4271406260510087, "learning_rate": 3.92669392395914e-06, "loss": 0.6341, "step": 18983 }, { "epoch": 0.5818315557190143, "grad_norm": 1.695771791998473, "learning_rate": 3.926209181287871e-06, "loss": 0.7119, "step": 18984 }, { "epoch": 0.5818622042417555, "grad_norm": 1.2355368588079059, "learning_rate": 3.925724449196938e-06, "loss": 0.7164, "step": 18985 }, { "epoch": 0.5818928527644968, "grad_norm": 1.0869286407129208, "learning_rate": 3.925239727691118e-06, "loss": 0.6242, "step": 18986 }, { "epoch": 0.5819235012872379, "grad_norm": 3.534906834853455, "learning_rate": 3.924755016775184e-06, "loss": 0.7389, "step": 18987 }, { "epoch": 0.5819541498099792, "grad_norm": 1.2383716590055367, "learning_rate": 3.924270316453915e-06, "loss": 0.6478, "step": 18988 }, { "epoch": 0.5819847983327203, "grad_norm": 1.3314458733886014, "learning_rate": 3.923785626732087e-06, "loss": 0.6731, "step": 18989 }, { "epoch": 0.5820154468554616, "grad_norm": 1.3595595381965169, "learning_rate": 3.923300947614471e-06, "loss": 0.6422, "step": 18990 }, { "epoch": 0.5820460953782027, "grad_norm": 1.4943307393265606, "learning_rate": 3.92281627910585e-06, "loss": 0.6431, "step": 18991 }, { "epoch": 0.582076743900944, "grad_norm": 0.4728036512198947, "learning_rate": 3.922331621210992e-06, "loss": 0.4071, "step": 18992 }, { "epoch": 0.5821073924236851, "grad_norm": 1.1425245281330576, "learning_rate": 3.92184697393468e-06, "loss": 0.5547, "step": 18993 }, { "epoch": 0.5821380409464264, "grad_norm": 1.2330212925798036, "learning_rate": 3.9213623372816845e-06, "loss": 0.6891, "step": 18994 }, { "epoch": 0.5821686894691676, "grad_norm": 1.3621323899388305, "learning_rate": 3.920877711256781e-06, "loss": 0.6678, "step": 18995 }, { "epoch": 0.5821993379919088, "grad_norm": 1.2288033001549439, "learning_rate": 3.920393095864746e-06, "loss": 0.639, "step": 18996 }, { "epoch": 0.58222998651465, "grad_norm": 1.3085325690937664, "learning_rate": 3.919908491110354e-06, "loss": 0.6941, "step": 18997 }, { "epoch": 0.5822606350373912, "grad_norm": 1.3797668350337644, "learning_rate": 3.9194238969983795e-06, "loss": 0.5731, "step": 18998 }, { "epoch": 0.5822912835601324, "grad_norm": 1.2977818384869544, "learning_rate": 3.918939313533598e-06, "loss": 0.6676, "step": 18999 }, { "epoch": 0.5823219320828736, "grad_norm": 1.352959015989466, "learning_rate": 3.918454740720784e-06, "loss": 0.7393, "step": 19000 }, { "epoch": 0.5823525806056148, "grad_norm": 1.3573997682460945, "learning_rate": 3.917970178564713e-06, "loss": 0.634, "step": 19001 }, { "epoch": 0.582383229128356, "grad_norm": 1.1410218057062094, "learning_rate": 3.91748562707016e-06, "loss": 0.6726, "step": 19002 }, { "epoch": 0.5824138776510972, "grad_norm": 1.3229259493133372, "learning_rate": 3.917001086241895e-06, "loss": 0.6441, "step": 19003 }, { "epoch": 0.5824445261738385, "grad_norm": 1.4446467525237605, "learning_rate": 3.916516556084697e-06, "loss": 0.6491, "step": 19004 }, { "epoch": 0.5824751746965796, "grad_norm": 1.3067741547961977, "learning_rate": 3.916032036603339e-06, "loss": 0.6163, "step": 19005 }, { "epoch": 0.5825058232193209, "grad_norm": 1.2858538955724617, "learning_rate": 3.9155475278025935e-06, "loss": 0.6582, "step": 19006 }, { "epoch": 0.582536471742062, "grad_norm": 0.4395978422911504, "learning_rate": 3.915063029687236e-06, "loss": 0.4073, "step": 19007 }, { "epoch": 0.5825671202648033, "grad_norm": 1.2395914170122377, "learning_rate": 3.91457854226204e-06, "loss": 0.6693, "step": 19008 }, { "epoch": 0.5825977687875444, "grad_norm": 0.4461463101594397, "learning_rate": 3.9140940655317795e-06, "loss": 0.4034, "step": 19009 }, { "epoch": 0.5826284173102857, "grad_norm": 1.3788261699813855, "learning_rate": 3.913609599501228e-06, "loss": 0.6682, "step": 19010 }, { "epoch": 0.5826590658330268, "grad_norm": 1.267119554659262, "learning_rate": 3.913125144175159e-06, "loss": 0.6921, "step": 19011 }, { "epoch": 0.5826897143557681, "grad_norm": 1.9134585412002543, "learning_rate": 3.912640699558346e-06, "loss": 0.6501, "step": 19012 }, { "epoch": 0.5827203628785093, "grad_norm": 1.3565883482612089, "learning_rate": 3.912156265655564e-06, "loss": 0.7407, "step": 19013 }, { "epoch": 0.5827510114012504, "grad_norm": 0.4859682003580811, "learning_rate": 3.9116718424715825e-06, "loss": 0.428, "step": 19014 }, { "epoch": 0.5827816599239917, "grad_norm": 1.315790214766693, "learning_rate": 3.9111874300111786e-06, "loss": 0.6193, "step": 19015 }, { "epoch": 0.5828123084467328, "grad_norm": 1.5676033962789193, "learning_rate": 3.910703028279123e-06, "loss": 0.7193, "step": 19016 }, { "epoch": 0.5828429569694741, "grad_norm": 1.2931274001683433, "learning_rate": 3.9102186372801875e-06, "loss": 0.6275, "step": 19017 }, { "epoch": 0.5828736054922152, "grad_norm": 1.288462921931081, "learning_rate": 3.909734257019148e-06, "loss": 0.5634, "step": 19018 }, { "epoch": 0.5829042540149565, "grad_norm": 1.2993963985628831, "learning_rate": 3.909249887500775e-06, "loss": 0.6338, "step": 19019 }, { "epoch": 0.5829349025376976, "grad_norm": 1.29479619083931, "learning_rate": 3.9087655287298435e-06, "loss": 0.6584, "step": 19020 }, { "epoch": 0.5829655510604389, "grad_norm": 1.2723206945270233, "learning_rate": 3.908281180711123e-06, "loss": 0.5569, "step": 19021 }, { "epoch": 0.58299619958318, "grad_norm": 1.3708928760928143, "learning_rate": 3.907796843449387e-06, "loss": 0.7013, "step": 19022 }, { "epoch": 0.5830268481059213, "grad_norm": 1.3425886057222665, "learning_rate": 3.9073125169494095e-06, "loss": 0.6646, "step": 19023 }, { "epoch": 0.5830574966286625, "grad_norm": 0.47332396255113796, "learning_rate": 3.906828201215963e-06, "loss": 0.4421, "step": 19024 }, { "epoch": 0.5830881451514037, "grad_norm": 1.1946036472922548, "learning_rate": 3.9063438962538145e-06, "loss": 0.5965, "step": 19025 }, { "epoch": 0.5831187936741449, "grad_norm": 0.44785265401512664, "learning_rate": 3.9058596020677406e-06, "loss": 0.4114, "step": 19026 }, { "epoch": 0.5831494421968861, "grad_norm": 1.2512510072931757, "learning_rate": 3.9053753186625114e-06, "loss": 0.5938, "step": 19027 }, { "epoch": 0.5831800907196273, "grad_norm": 1.43767219345544, "learning_rate": 3.9048910460429e-06, "loss": 0.676, "step": 19028 }, { "epoch": 0.5832107392423685, "grad_norm": 1.3039881326208427, "learning_rate": 3.904406784213678e-06, "loss": 0.5541, "step": 19029 }, { "epoch": 0.5832413877651097, "grad_norm": 0.45384246653201676, "learning_rate": 3.9039225331796145e-06, "loss": 0.4156, "step": 19030 }, { "epoch": 0.583272036287851, "grad_norm": 1.097853204390821, "learning_rate": 3.903438292945485e-06, "loss": 0.5801, "step": 19031 }, { "epoch": 0.5833026848105921, "grad_norm": 1.3273215802666174, "learning_rate": 3.902954063516058e-06, "loss": 0.6259, "step": 19032 }, { "epoch": 0.5833333333333334, "grad_norm": 1.0840747173509102, "learning_rate": 3.902469844896103e-06, "loss": 0.705, "step": 19033 }, { "epoch": 0.5833639818560745, "grad_norm": 1.3264540177592632, "learning_rate": 3.901985637090397e-06, "loss": 0.6713, "step": 19034 }, { "epoch": 0.5833946303788158, "grad_norm": 1.3084634881355823, "learning_rate": 3.901501440103706e-06, "loss": 0.641, "step": 19035 }, { "epoch": 0.5834252789015569, "grad_norm": 1.201161024732654, "learning_rate": 3.9010172539408006e-06, "loss": 0.594, "step": 19036 }, { "epoch": 0.5834559274242982, "grad_norm": 1.5068713000439287, "learning_rate": 3.9005330786064545e-06, "loss": 0.6189, "step": 19037 }, { "epoch": 0.5834865759470393, "grad_norm": 0.43363078190828175, "learning_rate": 3.900048914105436e-06, "loss": 0.4033, "step": 19038 }, { "epoch": 0.5835172244697806, "grad_norm": 1.255880941491022, "learning_rate": 3.89956476044252e-06, "loss": 0.5486, "step": 19039 }, { "epoch": 0.5835478729925218, "grad_norm": 1.1616564951714692, "learning_rate": 3.899080617622472e-06, "loss": 0.6173, "step": 19040 }, { "epoch": 0.583578521515263, "grad_norm": 0.4806025248140617, "learning_rate": 3.898596485650065e-06, "loss": 0.4118, "step": 19041 }, { "epoch": 0.5836091700380042, "grad_norm": 1.297652065374878, "learning_rate": 3.898112364530068e-06, "loss": 0.7162, "step": 19042 }, { "epoch": 0.5836398185607454, "grad_norm": 1.2432304224940192, "learning_rate": 3.897628254267254e-06, "loss": 0.6364, "step": 19043 }, { "epoch": 0.5836704670834866, "grad_norm": 1.4641079173122045, "learning_rate": 3.897144154866387e-06, "loss": 0.6992, "step": 19044 }, { "epoch": 0.5837011156062277, "grad_norm": 1.3042448325251519, "learning_rate": 3.896660066332244e-06, "loss": 0.6469, "step": 19045 }, { "epoch": 0.583731764128969, "grad_norm": 1.3676713519214958, "learning_rate": 3.896175988669589e-06, "loss": 0.7017, "step": 19046 }, { "epoch": 0.5837624126517101, "grad_norm": 0.46218329790370494, "learning_rate": 3.8956919218831975e-06, "loss": 0.4078, "step": 19047 }, { "epoch": 0.5837930611744514, "grad_norm": 1.630768065945499, "learning_rate": 3.895207865977835e-06, "loss": 0.4831, "step": 19048 }, { "epoch": 0.5838237096971925, "grad_norm": 1.2306271876393253, "learning_rate": 3.89472382095827e-06, "loss": 0.6904, "step": 19049 }, { "epoch": 0.5838543582199338, "grad_norm": 1.3747927820209702, "learning_rate": 3.894239786829277e-06, "loss": 0.67, "step": 19050 }, { "epoch": 0.583885006742675, "grad_norm": 1.4959192807877415, "learning_rate": 3.8937557635956205e-06, "loss": 0.5713, "step": 19051 }, { "epoch": 0.5839156552654162, "grad_norm": 1.1824384442991913, "learning_rate": 3.893271751262071e-06, "loss": 0.522, "step": 19052 }, { "epoch": 0.5839463037881574, "grad_norm": 1.2455611809059781, "learning_rate": 3.8927877498334e-06, "loss": 0.5995, "step": 19053 }, { "epoch": 0.5839769523108986, "grad_norm": 1.2604421559922605, "learning_rate": 3.892303759314372e-06, "loss": 0.631, "step": 19054 }, { "epoch": 0.5840076008336398, "grad_norm": 1.2495331507424259, "learning_rate": 3.891819779709761e-06, "loss": 0.6044, "step": 19055 }, { "epoch": 0.584038249356381, "grad_norm": 1.077454012460802, "learning_rate": 3.8913358110243335e-06, "loss": 0.6894, "step": 19056 }, { "epoch": 0.5840688978791222, "grad_norm": 1.1830541593537431, "learning_rate": 3.890851853262855e-06, "loss": 0.6283, "step": 19057 }, { "epoch": 0.5840995464018635, "grad_norm": 1.2955299636128532, "learning_rate": 3.8903679064301e-06, "loss": 0.7676, "step": 19058 }, { "epoch": 0.5841301949246046, "grad_norm": 0.48883011516380215, "learning_rate": 3.889883970530833e-06, "loss": 0.4123, "step": 19059 }, { "epoch": 0.5841608434473459, "grad_norm": 1.153479190722221, "learning_rate": 3.889400045569822e-06, "loss": 0.6365, "step": 19060 }, { "epoch": 0.584191491970087, "grad_norm": 1.359590174429003, "learning_rate": 3.888916131551837e-06, "loss": 0.7677, "step": 19061 }, { "epoch": 0.5842221404928283, "grad_norm": 1.2668552217738756, "learning_rate": 3.888432228481647e-06, "loss": 0.6129, "step": 19062 }, { "epoch": 0.5842527890155694, "grad_norm": 1.2903513668305795, "learning_rate": 3.887948336364017e-06, "loss": 0.6579, "step": 19063 }, { "epoch": 0.5842834375383107, "grad_norm": 1.2909507939335592, "learning_rate": 3.887464455203717e-06, "loss": 0.6974, "step": 19064 }, { "epoch": 0.5843140860610518, "grad_norm": 1.2781103631674313, "learning_rate": 3.886980585005515e-06, "loss": 0.6453, "step": 19065 }, { "epoch": 0.5843447345837931, "grad_norm": 1.2863166066465213, "learning_rate": 3.886496725774178e-06, "loss": 0.5872, "step": 19066 }, { "epoch": 0.5843753831065343, "grad_norm": 1.2900428372695119, "learning_rate": 3.886012877514475e-06, "loss": 0.6552, "step": 19067 }, { "epoch": 0.5844060316292755, "grad_norm": 1.392898525541713, "learning_rate": 3.885529040231168e-06, "loss": 0.6899, "step": 19068 }, { "epoch": 0.5844366801520167, "grad_norm": 1.2960939662923112, "learning_rate": 3.885045213929032e-06, "loss": 0.6212, "step": 19069 }, { "epoch": 0.5844673286747579, "grad_norm": 1.2430235451284735, "learning_rate": 3.884561398612831e-06, "loss": 0.6534, "step": 19070 }, { "epoch": 0.5844979771974991, "grad_norm": 0.48163935563025784, "learning_rate": 3.88407759428733e-06, "loss": 0.4216, "step": 19071 }, { "epoch": 0.5845286257202403, "grad_norm": 1.3607527565032629, "learning_rate": 3.883593800957299e-06, "loss": 0.6905, "step": 19072 }, { "epoch": 0.5845592742429815, "grad_norm": 1.344489308368291, "learning_rate": 3.883110018627503e-06, "loss": 0.6272, "step": 19073 }, { "epoch": 0.5845899227657227, "grad_norm": 1.2615022114351488, "learning_rate": 3.88262624730271e-06, "loss": 0.621, "step": 19074 }, { "epoch": 0.5846205712884639, "grad_norm": 1.259410454529415, "learning_rate": 3.882142486987688e-06, "loss": 0.6645, "step": 19075 }, { "epoch": 0.584651219811205, "grad_norm": 1.2016237151198983, "learning_rate": 3.8816587376872e-06, "loss": 0.6056, "step": 19076 }, { "epoch": 0.5846818683339463, "grad_norm": 0.4522130557619985, "learning_rate": 3.881174999406017e-06, "loss": 0.4144, "step": 19077 }, { "epoch": 0.5847125168566875, "grad_norm": 1.3863161697221804, "learning_rate": 3.880691272148902e-06, "loss": 0.6297, "step": 19078 }, { "epoch": 0.5847431653794287, "grad_norm": 1.222885326803335, "learning_rate": 3.880207555920621e-06, "loss": 0.6057, "step": 19079 }, { "epoch": 0.5847738139021699, "grad_norm": 1.229570197111086, "learning_rate": 3.879723850725943e-06, "loss": 0.6457, "step": 19080 }, { "epoch": 0.5848044624249111, "grad_norm": 1.2299867160541695, "learning_rate": 3.879240156569631e-06, "loss": 0.5755, "step": 19081 }, { "epoch": 0.5848351109476523, "grad_norm": 1.2358837978247863, "learning_rate": 3.878756473456453e-06, "loss": 0.665, "step": 19082 }, { "epoch": 0.5848657594703935, "grad_norm": 1.4995858167820084, "learning_rate": 3.878272801391176e-06, "loss": 0.5964, "step": 19083 }, { "epoch": 0.5848964079931347, "grad_norm": 1.2899030846411468, "learning_rate": 3.877789140378561e-06, "loss": 0.658, "step": 19084 }, { "epoch": 0.584927056515876, "grad_norm": 1.285774998189506, "learning_rate": 3.87730549042338e-06, "loss": 0.6582, "step": 19085 }, { "epoch": 0.5849577050386171, "grad_norm": 2.0213458319168405, "learning_rate": 3.876821851530395e-06, "loss": 0.6165, "step": 19086 }, { "epoch": 0.5849883535613584, "grad_norm": 1.2772606011370848, "learning_rate": 3.876338223704368e-06, "loss": 0.6285, "step": 19087 }, { "epoch": 0.5850190020840995, "grad_norm": 1.1555793874333973, "learning_rate": 3.875854606950072e-06, "loss": 0.65, "step": 19088 }, { "epoch": 0.5850496506068408, "grad_norm": 1.270159971822993, "learning_rate": 3.875371001272266e-06, "loss": 0.6321, "step": 19089 }, { "epoch": 0.5850802991295819, "grad_norm": 0.48846305841589044, "learning_rate": 3.874887406675718e-06, "loss": 0.4256, "step": 19090 }, { "epoch": 0.5851109476523232, "grad_norm": 1.3276660714751416, "learning_rate": 3.874403823165192e-06, "loss": 0.6411, "step": 19091 }, { "epoch": 0.5851415961750643, "grad_norm": 1.0908630444984007, "learning_rate": 3.873920250745453e-06, "loss": 0.6249, "step": 19092 }, { "epoch": 0.5851722446978056, "grad_norm": 1.3102796890806854, "learning_rate": 3.873436689421266e-06, "loss": 0.6259, "step": 19093 }, { "epoch": 0.5852028932205467, "grad_norm": 1.2641817514406242, "learning_rate": 3.872953139197397e-06, "loss": 0.6495, "step": 19094 }, { "epoch": 0.585233541743288, "grad_norm": 0.43035899426820107, "learning_rate": 3.872469600078607e-06, "loss": 0.4108, "step": 19095 }, { "epoch": 0.5852641902660292, "grad_norm": 1.2970466268264227, "learning_rate": 3.871986072069663e-06, "loss": 0.6185, "step": 19096 }, { "epoch": 0.5852948387887704, "grad_norm": 1.2645055840954031, "learning_rate": 3.871502555175331e-06, "loss": 0.7094, "step": 19097 }, { "epoch": 0.5853254873115116, "grad_norm": 1.2359294401896672, "learning_rate": 3.8710190494003694e-06, "loss": 0.5815, "step": 19098 }, { "epoch": 0.5853561358342528, "grad_norm": 1.0809610266068836, "learning_rate": 3.870535554749549e-06, "loss": 0.54, "step": 19099 }, { "epoch": 0.585386784356994, "grad_norm": 1.406986193418573, "learning_rate": 3.870052071227628e-06, "loss": 0.6204, "step": 19100 }, { "epoch": 0.5854174328797352, "grad_norm": 1.3757725246824852, "learning_rate": 3.869568598839376e-06, "loss": 0.6855, "step": 19101 }, { "epoch": 0.5854480814024764, "grad_norm": 0.48355785519800437, "learning_rate": 3.869085137589552e-06, "loss": 0.4151, "step": 19102 }, { "epoch": 0.5854787299252177, "grad_norm": 1.1582620089815818, "learning_rate": 3.868601687482922e-06, "loss": 0.6077, "step": 19103 }, { "epoch": 0.5855093784479588, "grad_norm": 1.3260243449802527, "learning_rate": 3.8681182485242494e-06, "loss": 0.558, "step": 19104 }, { "epoch": 0.5855400269707001, "grad_norm": 1.3069557090314625, "learning_rate": 3.867634820718297e-06, "loss": 0.5988, "step": 19105 }, { "epoch": 0.5855706754934412, "grad_norm": 1.2942340155248966, "learning_rate": 3.867151404069828e-06, "loss": 0.6154, "step": 19106 }, { "epoch": 0.5856013240161824, "grad_norm": 0.42774338895016345, "learning_rate": 3.8666679985836065e-06, "loss": 0.4035, "step": 19107 }, { "epoch": 0.5856319725389236, "grad_norm": 1.3419687431670608, "learning_rate": 3.8661846042643945e-06, "loss": 0.7355, "step": 19108 }, { "epoch": 0.5856626210616648, "grad_norm": 1.2876023511686927, "learning_rate": 3.865701221116957e-06, "loss": 0.6684, "step": 19109 }, { "epoch": 0.585693269584406, "grad_norm": 1.2686560909071762, "learning_rate": 3.865217849146055e-06, "loss": 0.7079, "step": 19110 }, { "epoch": 0.5857239181071472, "grad_norm": 1.241668226264654, "learning_rate": 3.864734488356451e-06, "loss": 0.7522, "step": 19111 }, { "epoch": 0.5857545666298885, "grad_norm": 1.2571795579066296, "learning_rate": 3.864251138752911e-06, "loss": 0.6762, "step": 19112 }, { "epoch": 0.5857852151526296, "grad_norm": 1.319794194044041, "learning_rate": 3.863767800340193e-06, "loss": 0.5503, "step": 19113 }, { "epoch": 0.5858158636753709, "grad_norm": 1.195650926134014, "learning_rate": 3.863284473123061e-06, "loss": 0.6388, "step": 19114 }, { "epoch": 0.585846512198112, "grad_norm": 1.3843340165729061, "learning_rate": 3.862801157106279e-06, "loss": 0.5801, "step": 19115 }, { "epoch": 0.5858771607208533, "grad_norm": 1.1461409417668156, "learning_rate": 3.862317852294609e-06, "loss": 0.5711, "step": 19116 }, { "epoch": 0.5859078092435944, "grad_norm": 1.118683663762017, "learning_rate": 3.8618345586928105e-06, "loss": 0.5801, "step": 19117 }, { "epoch": 0.5859384577663357, "grad_norm": 1.2592317750780027, "learning_rate": 3.861351276305649e-06, "loss": 0.6643, "step": 19118 }, { "epoch": 0.5859691062890768, "grad_norm": 1.240097206487919, "learning_rate": 3.860868005137883e-06, "loss": 0.6862, "step": 19119 }, { "epoch": 0.5859997548118181, "grad_norm": 1.298830929920728, "learning_rate": 3.8603847451942776e-06, "loss": 0.5752, "step": 19120 }, { "epoch": 0.5860304033345592, "grad_norm": 1.3057595745051895, "learning_rate": 3.859901496479593e-06, "loss": 0.6445, "step": 19121 }, { "epoch": 0.5860610518573005, "grad_norm": 1.3414308812446, "learning_rate": 3.85941825899859e-06, "loss": 0.5939, "step": 19122 }, { "epoch": 0.5860917003800417, "grad_norm": 1.1470056313324302, "learning_rate": 3.858935032756031e-06, "loss": 0.6056, "step": 19123 }, { "epoch": 0.5861223489027829, "grad_norm": 1.3813028797993518, "learning_rate": 3.858451817756676e-06, "loss": 0.652, "step": 19124 }, { "epoch": 0.5861529974255241, "grad_norm": 1.3424086914510498, "learning_rate": 3.857968614005287e-06, "loss": 0.6037, "step": 19125 }, { "epoch": 0.5861836459482653, "grad_norm": 1.2798114077814429, "learning_rate": 3.857485421506627e-06, "loss": 0.5732, "step": 19126 }, { "epoch": 0.5862142944710065, "grad_norm": 1.1143119195863243, "learning_rate": 3.857002240265454e-06, "loss": 0.5745, "step": 19127 }, { "epoch": 0.5862449429937477, "grad_norm": 1.2255490712030346, "learning_rate": 3.856519070286532e-06, "loss": 0.5857, "step": 19128 }, { "epoch": 0.5862755915164889, "grad_norm": 1.1285419859472845, "learning_rate": 3.856035911574621e-06, "loss": 0.5479, "step": 19129 }, { "epoch": 0.5863062400392302, "grad_norm": 1.272525674464215, "learning_rate": 3.855552764134478e-06, "loss": 0.5738, "step": 19130 }, { "epoch": 0.5863368885619713, "grad_norm": 1.2162143537840544, "learning_rate": 3.855069627970869e-06, "loss": 0.6192, "step": 19131 }, { "epoch": 0.5863675370847126, "grad_norm": 1.495124694042809, "learning_rate": 3.854586503088551e-06, "loss": 0.6359, "step": 19132 }, { "epoch": 0.5863981856074537, "grad_norm": 1.3595649720295788, "learning_rate": 3.854103389492283e-06, "loss": 0.5768, "step": 19133 }, { "epoch": 0.586428834130195, "grad_norm": 1.137602205353844, "learning_rate": 3.85362028718683e-06, "loss": 0.6432, "step": 19134 }, { "epoch": 0.5864594826529361, "grad_norm": 0.44280360423100074, "learning_rate": 3.853137196176949e-06, "loss": 0.3961, "step": 19135 }, { "epoch": 0.5864901311756774, "grad_norm": 1.3604353448879365, "learning_rate": 3.852654116467401e-06, "loss": 0.5496, "step": 19136 }, { "epoch": 0.5865207796984185, "grad_norm": 1.489654560120424, "learning_rate": 3.852171048062945e-06, "loss": 0.6657, "step": 19137 }, { "epoch": 0.5865514282211597, "grad_norm": 1.278862848278417, "learning_rate": 3.851687990968341e-06, "loss": 0.6183, "step": 19138 }, { "epoch": 0.586582076743901, "grad_norm": 1.2847995346083776, "learning_rate": 3.85120494518835e-06, "loss": 0.6293, "step": 19139 }, { "epoch": 0.5866127252666421, "grad_norm": 1.415773970529735, "learning_rate": 3.850721910727731e-06, "loss": 0.6331, "step": 19140 }, { "epoch": 0.5866433737893834, "grad_norm": 1.236196011560034, "learning_rate": 3.850238887591241e-06, "loss": 0.6626, "step": 19141 }, { "epoch": 0.5866740223121245, "grad_norm": 1.2196826910018257, "learning_rate": 3.849755875783644e-06, "loss": 0.4866, "step": 19142 }, { "epoch": 0.5867046708348658, "grad_norm": 1.3289866527797547, "learning_rate": 3.849272875309696e-06, "loss": 0.649, "step": 19143 }, { "epoch": 0.5867353193576069, "grad_norm": 1.2204176664149031, "learning_rate": 3.848789886174155e-06, "loss": 0.6438, "step": 19144 }, { "epoch": 0.5867659678803482, "grad_norm": 0.4563188200782988, "learning_rate": 3.848306908381783e-06, "loss": 0.444, "step": 19145 }, { "epoch": 0.5867966164030893, "grad_norm": 1.2946921021981088, "learning_rate": 3.847823941937338e-06, "loss": 0.5989, "step": 19146 }, { "epoch": 0.5868272649258306, "grad_norm": 1.328018315981653, "learning_rate": 3.847340986845578e-06, "loss": 0.6287, "step": 19147 }, { "epoch": 0.5868579134485717, "grad_norm": 1.332978623520846, "learning_rate": 3.846858043111262e-06, "loss": 0.6075, "step": 19148 }, { "epoch": 0.586888561971313, "grad_norm": 1.24628531676896, "learning_rate": 3.846375110739149e-06, "loss": 0.6514, "step": 19149 }, { "epoch": 0.5869192104940542, "grad_norm": 1.371112705896306, "learning_rate": 3.8458921897339975e-06, "loss": 0.6351, "step": 19150 }, { "epoch": 0.5869498590167954, "grad_norm": 1.354832087793194, "learning_rate": 3.845409280100567e-06, "loss": 0.5396, "step": 19151 }, { "epoch": 0.5869805075395366, "grad_norm": 1.2237860489010959, "learning_rate": 3.84492638184361e-06, "loss": 0.6213, "step": 19152 }, { "epoch": 0.5870111560622778, "grad_norm": 1.1806694254126264, "learning_rate": 3.844443494967893e-06, "loss": 0.5654, "step": 19153 }, { "epoch": 0.587041804585019, "grad_norm": 1.4036852612409625, "learning_rate": 3.8439606194781665e-06, "loss": 0.681, "step": 19154 }, { "epoch": 0.5870724531077602, "grad_norm": 1.3054805766539461, "learning_rate": 3.843477755379195e-06, "loss": 0.7051, "step": 19155 }, { "epoch": 0.5871031016305014, "grad_norm": 1.2450522044018721, "learning_rate": 3.842994902675732e-06, "loss": 0.6845, "step": 19156 }, { "epoch": 0.5871337501532427, "grad_norm": 1.3747720644900043, "learning_rate": 3.842512061372535e-06, "loss": 0.66, "step": 19157 }, { "epoch": 0.5871643986759838, "grad_norm": 1.3730563934303324, "learning_rate": 3.842029231474364e-06, "loss": 0.6432, "step": 19158 }, { "epoch": 0.5871950471987251, "grad_norm": 1.1612592110721838, "learning_rate": 3.841546412985977e-06, "loss": 0.5268, "step": 19159 }, { "epoch": 0.5872256957214662, "grad_norm": 3.6315850243209367, "learning_rate": 3.841063605912126e-06, "loss": 0.6634, "step": 19160 }, { "epoch": 0.5872563442442075, "grad_norm": 1.221434906365374, "learning_rate": 3.840580810257574e-06, "loss": 0.626, "step": 19161 }, { "epoch": 0.5872869927669486, "grad_norm": 1.2334221741197793, "learning_rate": 3.840098026027075e-06, "loss": 0.6329, "step": 19162 }, { "epoch": 0.5873176412896899, "grad_norm": 1.260630943366307, "learning_rate": 3.839615253225387e-06, "loss": 0.6995, "step": 19163 }, { "epoch": 0.587348289812431, "grad_norm": 1.1926964844918475, "learning_rate": 3.839132491857269e-06, "loss": 0.6361, "step": 19164 }, { "epoch": 0.5873789383351723, "grad_norm": 1.354667783060894, "learning_rate": 3.838649741927472e-06, "loss": 0.6812, "step": 19165 }, { "epoch": 0.5874095868579134, "grad_norm": 1.1400285143466629, "learning_rate": 3.838167003440759e-06, "loss": 0.5772, "step": 19166 }, { "epoch": 0.5874402353806547, "grad_norm": 1.4618103096156663, "learning_rate": 3.837684276401883e-06, "loss": 0.7122, "step": 19167 }, { "epoch": 0.5874708839033959, "grad_norm": 1.2643064341318828, "learning_rate": 3.837201560815601e-06, "loss": 0.6169, "step": 19168 }, { "epoch": 0.587501532426137, "grad_norm": 1.2685369784872518, "learning_rate": 3.83671885668667e-06, "loss": 0.6409, "step": 19169 }, { "epoch": 0.5875321809488783, "grad_norm": 1.2260312749622233, "learning_rate": 3.836236164019845e-06, "loss": 0.6842, "step": 19170 }, { "epoch": 0.5875628294716194, "grad_norm": 1.216811096548373, "learning_rate": 3.835753482819883e-06, "loss": 0.53, "step": 19171 }, { "epoch": 0.5875934779943607, "grad_norm": 1.152483566855208, "learning_rate": 3.835270813091539e-06, "loss": 0.6231, "step": 19172 }, { "epoch": 0.5876241265171018, "grad_norm": 1.3040956270629618, "learning_rate": 3.834788154839571e-06, "loss": 0.6788, "step": 19173 }, { "epoch": 0.5876547750398431, "grad_norm": 1.2901564816783824, "learning_rate": 3.834305508068734e-06, "loss": 0.6036, "step": 19174 }, { "epoch": 0.5876854235625842, "grad_norm": 0.4510045996841939, "learning_rate": 3.833822872783782e-06, "loss": 0.4025, "step": 19175 }, { "epoch": 0.5877160720853255, "grad_norm": 1.2674235029794434, "learning_rate": 3.833340248989471e-06, "loss": 0.5576, "step": 19176 }, { "epoch": 0.5877467206080667, "grad_norm": 1.2001035817987515, "learning_rate": 3.832857636690559e-06, "loss": 0.571, "step": 19177 }, { "epoch": 0.5877773691308079, "grad_norm": 1.3271562942226225, "learning_rate": 3.832375035891798e-06, "loss": 0.5573, "step": 19178 }, { "epoch": 0.5878080176535491, "grad_norm": 1.2618395066991235, "learning_rate": 3.831892446597944e-06, "loss": 0.6002, "step": 19179 }, { "epoch": 0.5878386661762903, "grad_norm": 1.2463053418743906, "learning_rate": 3.831409868813754e-06, "loss": 0.6876, "step": 19180 }, { "epoch": 0.5878693146990315, "grad_norm": 1.174597544684441, "learning_rate": 3.83092730254398e-06, "loss": 0.5508, "step": 19181 }, { "epoch": 0.5878999632217727, "grad_norm": 1.3003868106409704, "learning_rate": 3.830444747793379e-06, "loss": 0.584, "step": 19182 }, { "epoch": 0.5879306117445139, "grad_norm": 1.2138304796282895, "learning_rate": 3.829962204566707e-06, "loss": 0.5967, "step": 19183 }, { "epoch": 0.5879612602672551, "grad_norm": 1.2085454902888861, "learning_rate": 3.829479672868713e-06, "loss": 0.6747, "step": 19184 }, { "epoch": 0.5879919087899963, "grad_norm": 0.43318924813561743, "learning_rate": 3.828997152704159e-06, "loss": 0.3943, "step": 19185 }, { "epoch": 0.5880225573127376, "grad_norm": 1.167748070345314, "learning_rate": 3.828514644077794e-06, "loss": 0.5477, "step": 19186 }, { "epoch": 0.5880532058354787, "grad_norm": 1.2585798382920035, "learning_rate": 3.8280321469943734e-06, "loss": 0.5662, "step": 19187 }, { "epoch": 0.58808385435822, "grad_norm": 1.2427887262866395, "learning_rate": 3.827549661458653e-06, "loss": 0.6459, "step": 19188 }, { "epoch": 0.5881145028809611, "grad_norm": 1.4537349421120316, "learning_rate": 3.827067187475384e-06, "loss": 0.6654, "step": 19189 }, { "epoch": 0.5881451514037024, "grad_norm": 1.2250423619239623, "learning_rate": 3.826584725049325e-06, "loss": 0.6973, "step": 19190 }, { "epoch": 0.5881757999264435, "grad_norm": 1.2917132806728437, "learning_rate": 3.826102274185225e-06, "loss": 0.6279, "step": 19191 }, { "epoch": 0.5882064484491848, "grad_norm": 1.0512305919196006, "learning_rate": 3.82561983488784e-06, "loss": 0.6252, "step": 19192 }, { "epoch": 0.5882370969719259, "grad_norm": 1.2487064655671194, "learning_rate": 3.825137407161923e-06, "loss": 0.6595, "step": 19193 }, { "epoch": 0.5882677454946672, "grad_norm": 1.311550436398756, "learning_rate": 3.8246549910122285e-06, "loss": 0.6614, "step": 19194 }, { "epoch": 0.5882983940174084, "grad_norm": 1.5744605193615342, "learning_rate": 3.824172586443507e-06, "loss": 0.6292, "step": 19195 }, { "epoch": 0.5883290425401496, "grad_norm": 1.2216597846422799, "learning_rate": 3.823690193460517e-06, "loss": 0.6893, "step": 19196 }, { "epoch": 0.5883596910628908, "grad_norm": 1.356195681892952, "learning_rate": 3.8232078120680075e-06, "loss": 0.6903, "step": 19197 }, { "epoch": 0.588390339585632, "grad_norm": 1.3199109767562431, "learning_rate": 3.822725442270731e-06, "loss": 0.6828, "step": 19198 }, { "epoch": 0.5884209881083732, "grad_norm": 1.1320835278912447, "learning_rate": 3.822243084073443e-06, "loss": 0.5915, "step": 19199 }, { "epoch": 0.5884516366311143, "grad_norm": 1.2395362625259627, "learning_rate": 3.821760737480894e-06, "loss": 0.5634, "step": 19200 }, { "epoch": 0.5884822851538556, "grad_norm": 1.1990573721441367, "learning_rate": 3.82127840249784e-06, "loss": 0.6305, "step": 19201 }, { "epoch": 0.5885129336765967, "grad_norm": 1.327280366450411, "learning_rate": 3.820796079129031e-06, "loss": 0.6446, "step": 19202 }, { "epoch": 0.588543582199338, "grad_norm": 0.46191164501966075, "learning_rate": 3.8203137673792185e-06, "loss": 0.3821, "step": 19203 }, { "epoch": 0.5885742307220792, "grad_norm": 0.4457919801320983, "learning_rate": 3.819831467253158e-06, "loss": 0.3988, "step": 19204 }, { "epoch": 0.5886048792448204, "grad_norm": 6.523103609779343, "learning_rate": 3.8193491787556e-06, "loss": 0.6263, "step": 19205 }, { "epoch": 0.5886355277675616, "grad_norm": 0.4847527704977365, "learning_rate": 3.818866901891295e-06, "loss": 0.4047, "step": 19206 }, { "epoch": 0.5886661762903028, "grad_norm": 1.304626909952971, "learning_rate": 3.818384636664998e-06, "loss": 0.6809, "step": 19207 }, { "epoch": 0.588696824813044, "grad_norm": 1.2172575112094732, "learning_rate": 3.817902383081458e-06, "loss": 0.6161, "step": 19208 }, { "epoch": 0.5887274733357852, "grad_norm": 1.3333115522264911, "learning_rate": 3.817420141145431e-06, "loss": 0.6496, "step": 19209 }, { "epoch": 0.5887581218585264, "grad_norm": 1.308360213199189, "learning_rate": 3.816937910861663e-06, "loss": 0.6261, "step": 19210 }, { "epoch": 0.5887887703812676, "grad_norm": 1.281515377098617, "learning_rate": 3.81645569223491e-06, "loss": 0.5369, "step": 19211 }, { "epoch": 0.5888194189040088, "grad_norm": 1.993962087820929, "learning_rate": 3.815973485269921e-06, "loss": 0.57, "step": 19212 }, { "epoch": 0.5888500674267501, "grad_norm": 1.3292919348148706, "learning_rate": 3.815491289971449e-06, "loss": 0.7208, "step": 19213 }, { "epoch": 0.5888807159494912, "grad_norm": 1.2577440774216884, "learning_rate": 3.815009106344244e-06, "loss": 0.6131, "step": 19214 }, { "epoch": 0.5889113644722325, "grad_norm": 1.2073834150691747, "learning_rate": 3.814526934393058e-06, "loss": 0.6246, "step": 19215 }, { "epoch": 0.5889420129949736, "grad_norm": 1.4618548974001846, "learning_rate": 3.814044774122642e-06, "loss": 0.5782, "step": 19216 }, { "epoch": 0.5889726615177149, "grad_norm": 0.4657674945784344, "learning_rate": 3.813562625537743e-06, "loss": 0.3942, "step": 19217 }, { "epoch": 0.589003310040456, "grad_norm": 1.2982760976942416, "learning_rate": 3.8130804886431194e-06, "loss": 0.5433, "step": 19218 }, { "epoch": 0.5890339585631973, "grad_norm": 0.45990671364080615, "learning_rate": 3.8125983634435147e-06, "loss": 0.4241, "step": 19219 }, { "epoch": 0.5890646070859384, "grad_norm": 1.1464338484320171, "learning_rate": 3.812116249943683e-06, "loss": 0.6877, "step": 19220 }, { "epoch": 0.5890952556086797, "grad_norm": 1.2401189798024308, "learning_rate": 3.8116341481483738e-06, "loss": 0.5808, "step": 19221 }, { "epoch": 0.5891259041314209, "grad_norm": 1.3138122633414222, "learning_rate": 3.811152058062337e-06, "loss": 0.692, "step": 19222 }, { "epoch": 0.5891565526541621, "grad_norm": 1.3025079375184896, "learning_rate": 3.8106699796903236e-06, "loss": 0.6476, "step": 19223 }, { "epoch": 0.5891872011769033, "grad_norm": 1.4110615135557152, "learning_rate": 3.8101879130370827e-06, "loss": 0.5433, "step": 19224 }, { "epoch": 0.5892178496996445, "grad_norm": 1.155606214923329, "learning_rate": 3.8097058581073644e-06, "loss": 0.5983, "step": 19225 }, { "epoch": 0.5892484982223857, "grad_norm": 1.2078946870105802, "learning_rate": 3.809223814905921e-06, "loss": 0.5366, "step": 19226 }, { "epoch": 0.5892791467451269, "grad_norm": 1.2015471431024154, "learning_rate": 3.8087417834374964e-06, "loss": 0.5404, "step": 19227 }, { "epoch": 0.5893097952678681, "grad_norm": 1.341985171906061, "learning_rate": 3.8082597637068476e-06, "loss": 0.612, "step": 19228 }, { "epoch": 0.5893404437906093, "grad_norm": 1.397455208221806, "learning_rate": 3.8077777557187185e-06, "loss": 0.5666, "step": 19229 }, { "epoch": 0.5893710923133505, "grad_norm": 1.2371691811454333, "learning_rate": 3.807295759477859e-06, "loss": 0.7145, "step": 19230 }, { "epoch": 0.5894017408360916, "grad_norm": 1.175317467770519, "learning_rate": 3.8068137749890214e-06, "loss": 0.6556, "step": 19231 }, { "epoch": 0.5894323893588329, "grad_norm": 0.4675439090128253, "learning_rate": 3.8063318022569528e-06, "loss": 0.405, "step": 19232 }, { "epoch": 0.5894630378815741, "grad_norm": 1.2827069895991738, "learning_rate": 3.8058498412864016e-06, "loss": 0.6206, "step": 19233 }, { "epoch": 0.5894936864043153, "grad_norm": 1.1715211022136434, "learning_rate": 3.805367892082118e-06, "loss": 0.606, "step": 19234 }, { "epoch": 0.5895243349270565, "grad_norm": 1.356717472337106, "learning_rate": 3.804885954648849e-06, "loss": 0.6389, "step": 19235 }, { "epoch": 0.5895549834497977, "grad_norm": 1.2549490818795555, "learning_rate": 3.804404028991346e-06, "loss": 0.5732, "step": 19236 }, { "epoch": 0.5895856319725389, "grad_norm": 1.1928505033031276, "learning_rate": 3.8039221151143566e-06, "loss": 0.6259, "step": 19237 }, { "epoch": 0.5896162804952801, "grad_norm": 0.44095402234798425, "learning_rate": 3.8034402130226255e-06, "loss": 0.4009, "step": 19238 }, { "epoch": 0.5896469290180213, "grad_norm": 1.168326252345616, "learning_rate": 3.8029583227209077e-06, "loss": 0.6466, "step": 19239 }, { "epoch": 0.5896775775407626, "grad_norm": 1.3424591266718298, "learning_rate": 3.8024764442139467e-06, "loss": 0.6972, "step": 19240 }, { "epoch": 0.5897082260635037, "grad_norm": 1.0360638022550104, "learning_rate": 3.8019945775064904e-06, "loss": 0.4475, "step": 19241 }, { "epoch": 0.589738874586245, "grad_norm": 1.1550588032375242, "learning_rate": 3.8015127226032888e-06, "loss": 0.5084, "step": 19242 }, { "epoch": 0.5897695231089861, "grad_norm": 1.3363117121334167, "learning_rate": 3.80103087950909e-06, "loss": 0.6, "step": 19243 }, { "epoch": 0.5898001716317274, "grad_norm": 0.4530410355551694, "learning_rate": 3.800549048228639e-06, "loss": 0.4152, "step": 19244 }, { "epoch": 0.5898308201544685, "grad_norm": 1.2792701141436937, "learning_rate": 3.8000672287666863e-06, "loss": 0.6444, "step": 19245 }, { "epoch": 0.5898614686772098, "grad_norm": 1.290952362374825, "learning_rate": 3.799585421127977e-06, "loss": 0.6295, "step": 19246 }, { "epoch": 0.5898921171999509, "grad_norm": 1.3160531419143344, "learning_rate": 3.799103625317261e-06, "loss": 0.5353, "step": 19247 }, { "epoch": 0.5899227657226922, "grad_norm": 1.1538519549234643, "learning_rate": 3.7986218413392844e-06, "loss": 0.5999, "step": 19248 }, { "epoch": 0.5899534142454333, "grad_norm": 1.1908183737054066, "learning_rate": 3.798140069198792e-06, "loss": 0.6107, "step": 19249 }, { "epoch": 0.5899840627681746, "grad_norm": 1.378240165470381, "learning_rate": 3.797658308900536e-06, "loss": 0.5535, "step": 19250 }, { "epoch": 0.5900147112909158, "grad_norm": 1.28595739711884, "learning_rate": 3.797176560449259e-06, "loss": 0.6232, "step": 19251 }, { "epoch": 0.590045359813657, "grad_norm": 1.3436325185635647, "learning_rate": 3.7966948238497083e-06, "loss": 0.6323, "step": 19252 }, { "epoch": 0.5900760083363982, "grad_norm": 1.2028154546593839, "learning_rate": 3.7962130991066325e-06, "loss": 0.6687, "step": 19253 }, { "epoch": 0.5901066568591394, "grad_norm": 1.251894456605036, "learning_rate": 3.795731386224776e-06, "loss": 0.6859, "step": 19254 }, { "epoch": 0.5901373053818806, "grad_norm": 1.1139790330361914, "learning_rate": 3.795249685208887e-06, "loss": 0.5732, "step": 19255 }, { "epoch": 0.5901679539046218, "grad_norm": 1.311635858173343, "learning_rate": 3.7947679960637113e-06, "loss": 0.6465, "step": 19256 }, { "epoch": 0.590198602427363, "grad_norm": 1.3247498062209746, "learning_rate": 3.794286318793994e-06, "loss": 0.6423, "step": 19257 }, { "epoch": 0.5902292509501043, "grad_norm": 1.266748292870361, "learning_rate": 3.7938046534044826e-06, "loss": 0.6395, "step": 19258 }, { "epoch": 0.5902598994728454, "grad_norm": 1.4172772891848433, "learning_rate": 3.7933229998999237e-06, "loss": 0.6437, "step": 19259 }, { "epoch": 0.5902905479955867, "grad_norm": 1.1475617657504196, "learning_rate": 3.7928413582850594e-06, "loss": 0.5168, "step": 19260 }, { "epoch": 0.5903211965183278, "grad_norm": 1.209875034758945, "learning_rate": 3.7923597285646406e-06, "loss": 0.6277, "step": 19261 }, { "epoch": 0.590351845041069, "grad_norm": 1.2364507634516746, "learning_rate": 3.7918781107434087e-06, "loss": 0.5613, "step": 19262 }, { "epoch": 0.5903824935638102, "grad_norm": 1.376075470089036, "learning_rate": 3.7913965048261123e-06, "loss": 0.5973, "step": 19263 }, { "epoch": 0.5904131420865514, "grad_norm": 0.4665014031229304, "learning_rate": 3.790914910817495e-06, "loss": 0.4195, "step": 19264 }, { "epoch": 0.5904437906092926, "grad_norm": 1.2546606411866779, "learning_rate": 3.790433328722301e-06, "loss": 0.5452, "step": 19265 }, { "epoch": 0.5904744391320338, "grad_norm": 1.2839940533006289, "learning_rate": 3.789951758545278e-06, "loss": 0.6929, "step": 19266 }, { "epoch": 0.590505087654775, "grad_norm": 1.2758440349493516, "learning_rate": 3.789470200291171e-06, "loss": 0.6006, "step": 19267 }, { "epoch": 0.5905357361775162, "grad_norm": 0.45883418585843966, "learning_rate": 3.788988653964722e-06, "loss": 0.4273, "step": 19268 }, { "epoch": 0.5905663847002575, "grad_norm": 1.202863809505618, "learning_rate": 3.7885071195706786e-06, "loss": 0.4876, "step": 19269 }, { "epoch": 0.5905970332229986, "grad_norm": 1.1846148045255274, "learning_rate": 3.7880255971137857e-06, "loss": 0.6129, "step": 19270 }, { "epoch": 0.5906276817457399, "grad_norm": 1.2580348990731285, "learning_rate": 3.7875440865987843e-06, "loss": 0.6019, "step": 19271 }, { "epoch": 0.590658330268481, "grad_norm": 1.3236495279928682, "learning_rate": 3.787062588030423e-06, "loss": 0.603, "step": 19272 }, { "epoch": 0.5906889787912223, "grad_norm": 1.2348951674676987, "learning_rate": 3.7865811014134425e-06, "loss": 0.6263, "step": 19273 }, { "epoch": 0.5907196273139634, "grad_norm": 1.3121514334181823, "learning_rate": 3.7860996267525906e-06, "loss": 0.6561, "step": 19274 }, { "epoch": 0.5907502758367047, "grad_norm": 1.1869586911263847, "learning_rate": 3.7856181640526093e-06, "loss": 0.5599, "step": 19275 }, { "epoch": 0.5907809243594458, "grad_norm": 1.0665102801077608, "learning_rate": 3.7851367133182414e-06, "loss": 0.5466, "step": 19276 }, { "epoch": 0.5908115728821871, "grad_norm": 1.2626018218842052, "learning_rate": 3.784655274554234e-06, "loss": 0.6682, "step": 19277 }, { "epoch": 0.5908422214049283, "grad_norm": 1.303733624297984, "learning_rate": 3.7841738477653305e-06, "loss": 0.5891, "step": 19278 }, { "epoch": 0.5908728699276695, "grad_norm": 1.3557537868244787, "learning_rate": 3.7836924329562697e-06, "loss": 0.6685, "step": 19279 }, { "epoch": 0.5909035184504107, "grad_norm": 1.2327025873873712, "learning_rate": 3.7832110301318013e-06, "loss": 0.526, "step": 19280 }, { "epoch": 0.5909341669731519, "grad_norm": 0.503812767807858, "learning_rate": 3.7827296392966634e-06, "loss": 0.395, "step": 19281 }, { "epoch": 0.5909648154958931, "grad_norm": 1.3215786205976936, "learning_rate": 3.7822482604556043e-06, "loss": 0.6086, "step": 19282 }, { "epoch": 0.5909954640186343, "grad_norm": 1.3586507309303228, "learning_rate": 3.7817668936133645e-06, "loss": 0.5816, "step": 19283 }, { "epoch": 0.5910261125413755, "grad_norm": 1.2785027046443407, "learning_rate": 3.7812855387746857e-06, "loss": 0.5796, "step": 19284 }, { "epoch": 0.5910567610641168, "grad_norm": 1.276500752040821, "learning_rate": 3.780804195944313e-06, "loss": 0.6154, "step": 19285 }, { "epoch": 0.5910874095868579, "grad_norm": 0.4488074678548738, "learning_rate": 3.7803228651269887e-06, "loss": 0.4292, "step": 19286 }, { "epoch": 0.5911180581095992, "grad_norm": 1.2606225897073235, "learning_rate": 3.7798415463274544e-06, "loss": 0.5967, "step": 19287 }, { "epoch": 0.5911487066323403, "grad_norm": 1.2345323099778556, "learning_rate": 3.7793602395504546e-06, "loss": 0.5523, "step": 19288 }, { "epoch": 0.5911793551550816, "grad_norm": 1.3238554312000155, "learning_rate": 3.7788789448007297e-06, "loss": 0.74, "step": 19289 }, { "epoch": 0.5912100036778227, "grad_norm": 0.4264503214484174, "learning_rate": 3.7783976620830235e-06, "loss": 0.423, "step": 19290 }, { "epoch": 0.591240652200564, "grad_norm": 1.4431861111109625, "learning_rate": 3.7779163914020795e-06, "loss": 0.522, "step": 19291 }, { "epoch": 0.5912713007233051, "grad_norm": 0.43245481477214737, "learning_rate": 3.777435132762634e-06, "loss": 0.4109, "step": 19292 }, { "epoch": 0.5913019492460463, "grad_norm": 1.2430091216127868, "learning_rate": 3.7769538861694365e-06, "loss": 0.6792, "step": 19293 }, { "epoch": 0.5913325977687875, "grad_norm": 1.235341115727503, "learning_rate": 3.7764726516272243e-06, "loss": 0.6163, "step": 19294 }, { "epoch": 0.5913632462915287, "grad_norm": 1.2406756997618928, "learning_rate": 3.7759914291407397e-06, "loss": 0.5561, "step": 19295 }, { "epoch": 0.59139389481427, "grad_norm": 1.4281845431019768, "learning_rate": 3.775510218714725e-06, "loss": 0.7286, "step": 19296 }, { "epoch": 0.5914245433370111, "grad_norm": 1.298277175779832, "learning_rate": 3.7750290203539214e-06, "loss": 0.6637, "step": 19297 }, { "epoch": 0.5914551918597524, "grad_norm": 1.422773117452066, "learning_rate": 3.7745478340630693e-06, "loss": 0.724, "step": 19298 }, { "epoch": 0.5914858403824935, "grad_norm": 1.4056692325921254, "learning_rate": 3.774066659846912e-06, "loss": 0.6181, "step": 19299 }, { "epoch": 0.5915164889052348, "grad_norm": 1.3414905169604716, "learning_rate": 3.773585497710189e-06, "loss": 0.6416, "step": 19300 }, { "epoch": 0.5915471374279759, "grad_norm": 1.2248966136221267, "learning_rate": 3.7731043476576424e-06, "loss": 0.5838, "step": 19301 }, { "epoch": 0.5915777859507172, "grad_norm": 1.5325102189714503, "learning_rate": 3.7726232096940134e-06, "loss": 0.6217, "step": 19302 }, { "epoch": 0.5916084344734583, "grad_norm": 1.246749723364679, "learning_rate": 3.772142083824039e-06, "loss": 0.6704, "step": 19303 }, { "epoch": 0.5916390829961996, "grad_norm": 1.4537310388061848, "learning_rate": 3.7716609700524664e-06, "loss": 0.6143, "step": 19304 }, { "epoch": 0.5916697315189408, "grad_norm": 1.2681128540432123, "learning_rate": 3.771179868384031e-06, "loss": 0.5938, "step": 19305 }, { "epoch": 0.591700380041682, "grad_norm": 1.2969666396462503, "learning_rate": 3.7706987788234738e-06, "loss": 0.5836, "step": 19306 }, { "epoch": 0.5917310285644232, "grad_norm": 0.43749551792089153, "learning_rate": 3.7702177013755376e-06, "loss": 0.3887, "step": 19307 }, { "epoch": 0.5917616770871644, "grad_norm": 0.47356725284842055, "learning_rate": 3.7697366360449592e-06, "loss": 0.4093, "step": 19308 }, { "epoch": 0.5917923256099056, "grad_norm": 1.2218166454733508, "learning_rate": 3.7692555828364824e-06, "loss": 0.6496, "step": 19309 }, { "epoch": 0.5918229741326468, "grad_norm": 1.1856314781564798, "learning_rate": 3.768774541754845e-06, "loss": 0.6695, "step": 19310 }, { "epoch": 0.591853622655388, "grad_norm": 1.112248193828835, "learning_rate": 3.768293512804786e-06, "loss": 0.588, "step": 19311 }, { "epoch": 0.5918842711781293, "grad_norm": 1.2479724219932697, "learning_rate": 3.7678124959910466e-06, "loss": 0.6212, "step": 19312 }, { "epoch": 0.5919149197008704, "grad_norm": 1.3187892148962874, "learning_rate": 3.767331491318368e-06, "loss": 0.6021, "step": 19313 }, { "epoch": 0.5919455682236117, "grad_norm": 1.4064721097606214, "learning_rate": 3.7668504987914846e-06, "loss": 0.6524, "step": 19314 }, { "epoch": 0.5919762167463528, "grad_norm": 1.243148914840082, "learning_rate": 3.76636951841514e-06, "loss": 0.6001, "step": 19315 }, { "epoch": 0.5920068652690941, "grad_norm": 1.2151555850032218, "learning_rate": 3.7658885501940713e-06, "loss": 0.5652, "step": 19316 }, { "epoch": 0.5920375137918352, "grad_norm": 1.3896707956238628, "learning_rate": 3.765407594133019e-06, "loss": 0.5649, "step": 19317 }, { "epoch": 0.5920681623145765, "grad_norm": 1.3547590747959364, "learning_rate": 3.7649266502367225e-06, "loss": 0.6949, "step": 19318 }, { "epoch": 0.5920988108373176, "grad_norm": 1.2814612738016282, "learning_rate": 3.764445718509918e-06, "loss": 0.6745, "step": 19319 }, { "epoch": 0.5921294593600589, "grad_norm": 1.315077426076696, "learning_rate": 3.7639647989573474e-06, "loss": 0.6204, "step": 19320 }, { "epoch": 0.5921601078828, "grad_norm": 1.4087997149896963, "learning_rate": 3.7634838915837477e-06, "loss": 0.6478, "step": 19321 }, { "epoch": 0.5921907564055413, "grad_norm": 1.0071275831897106, "learning_rate": 3.763002996393857e-06, "loss": 0.5548, "step": 19322 }, { "epoch": 0.5922214049282825, "grad_norm": 1.3276750046134147, "learning_rate": 3.7625221133924156e-06, "loss": 0.7081, "step": 19323 }, { "epoch": 0.5922520534510236, "grad_norm": 0.48062470853120376, "learning_rate": 3.76204124258416e-06, "loss": 0.3834, "step": 19324 }, { "epoch": 0.5922827019737649, "grad_norm": 0.716838352555457, "learning_rate": 3.7615603839738275e-06, "loss": 0.3919, "step": 19325 }, { "epoch": 0.592313350496506, "grad_norm": 1.6455438605140016, "learning_rate": 3.761079537566158e-06, "loss": 0.7402, "step": 19326 }, { "epoch": 0.5923439990192473, "grad_norm": 1.3237178846091697, "learning_rate": 3.7605987033658887e-06, "loss": 0.5975, "step": 19327 }, { "epoch": 0.5923746475419884, "grad_norm": 1.328732888568379, "learning_rate": 3.760117881377758e-06, "loss": 0.6084, "step": 19328 }, { "epoch": 0.5924052960647297, "grad_norm": 1.198680640523415, "learning_rate": 3.759637071606503e-06, "loss": 0.5786, "step": 19329 }, { "epoch": 0.5924359445874708, "grad_norm": 1.234100848684226, "learning_rate": 3.75915627405686e-06, "loss": 0.6651, "step": 19330 }, { "epoch": 0.5924665931102121, "grad_norm": 1.4157645070227458, "learning_rate": 3.758675488733569e-06, "loss": 0.6481, "step": 19331 }, { "epoch": 0.5924972416329533, "grad_norm": 1.3259589858540766, "learning_rate": 3.7581947156413673e-06, "loss": 0.6621, "step": 19332 }, { "epoch": 0.5925278901556945, "grad_norm": 1.2395752020080948, "learning_rate": 3.757713954784988e-06, "loss": 0.6689, "step": 19333 }, { "epoch": 0.5925585386784357, "grad_norm": 1.2792699041546003, "learning_rate": 3.757233206169173e-06, "loss": 0.666, "step": 19334 }, { "epoch": 0.5925891872011769, "grad_norm": 1.2453787671449204, "learning_rate": 3.7567524697986547e-06, "loss": 0.7087, "step": 19335 }, { "epoch": 0.5926198357239181, "grad_norm": 0.4525608443379306, "learning_rate": 3.7562717456781755e-06, "loss": 0.3824, "step": 19336 }, { "epoch": 0.5926504842466593, "grad_norm": 1.3509652052266463, "learning_rate": 3.755791033812468e-06, "loss": 0.5628, "step": 19337 }, { "epoch": 0.5926811327694005, "grad_norm": 1.1726123354938023, "learning_rate": 3.755310334206269e-06, "loss": 0.6476, "step": 19338 }, { "epoch": 0.5927117812921417, "grad_norm": 1.3857984601344764, "learning_rate": 3.7548296468643164e-06, "loss": 0.5806, "step": 19339 }, { "epoch": 0.5927424298148829, "grad_norm": 1.123280537165614, "learning_rate": 3.754348971791346e-06, "loss": 0.6165, "step": 19340 }, { "epoch": 0.5927730783376242, "grad_norm": 1.4243132392333002, "learning_rate": 3.753868308992093e-06, "loss": 0.5328, "step": 19341 }, { "epoch": 0.5928037268603653, "grad_norm": 1.2600504482565018, "learning_rate": 3.7533876584712953e-06, "loss": 0.5964, "step": 19342 }, { "epoch": 0.5928343753831066, "grad_norm": 1.411751154814706, "learning_rate": 3.7529070202336864e-06, "loss": 0.6102, "step": 19343 }, { "epoch": 0.5928650239058477, "grad_norm": 1.362874972352384, "learning_rate": 3.7524263942840056e-06, "loss": 0.7149, "step": 19344 }, { "epoch": 0.592895672428589, "grad_norm": 1.279892733383836, "learning_rate": 3.751945780626988e-06, "loss": 0.5699, "step": 19345 }, { "epoch": 0.5929263209513301, "grad_norm": 1.4021814556826966, "learning_rate": 3.7514651792673634e-06, "loss": 0.6253, "step": 19346 }, { "epoch": 0.5929569694740714, "grad_norm": 1.3112666630440368, "learning_rate": 3.750984590209876e-06, "loss": 0.6743, "step": 19347 }, { "epoch": 0.5929876179968125, "grad_norm": 1.2484088335353751, "learning_rate": 3.7505040134592557e-06, "loss": 0.5477, "step": 19348 }, { "epoch": 0.5930182665195538, "grad_norm": 1.3189190790236043, "learning_rate": 3.750023449020238e-06, "loss": 0.6099, "step": 19349 }, { "epoch": 0.593048915042295, "grad_norm": 0.45224500284098096, "learning_rate": 3.7495428968975606e-06, "loss": 0.3786, "step": 19350 }, { "epoch": 0.5930795635650362, "grad_norm": 1.413248995404663, "learning_rate": 3.749062357095956e-06, "loss": 0.735, "step": 19351 }, { "epoch": 0.5931102120877774, "grad_norm": 1.2528873492430044, "learning_rate": 3.7485818296201603e-06, "loss": 0.6224, "step": 19352 }, { "epoch": 0.5931408606105186, "grad_norm": 1.3253735137525258, "learning_rate": 3.7481013144749077e-06, "loss": 0.5632, "step": 19353 }, { "epoch": 0.5931715091332598, "grad_norm": 1.3637313538589642, "learning_rate": 3.7476208116649333e-06, "loss": 0.6463, "step": 19354 }, { "epoch": 0.5932021576560009, "grad_norm": 1.3172043206652897, "learning_rate": 3.747140321194972e-06, "loss": 0.649, "step": 19355 }, { "epoch": 0.5932328061787422, "grad_norm": 1.2609624691048231, "learning_rate": 3.746659843069759e-06, "loss": 0.6883, "step": 19356 }, { "epoch": 0.5932634547014833, "grad_norm": 1.30405418795218, "learning_rate": 3.7461793772940236e-06, "loss": 0.665, "step": 19357 }, { "epoch": 0.5932941032242246, "grad_norm": 0.45971139234371733, "learning_rate": 3.745698923872507e-06, "loss": 0.3858, "step": 19358 }, { "epoch": 0.5933247517469658, "grad_norm": 1.3130156501614534, "learning_rate": 3.7452184828099385e-06, "loss": 0.6192, "step": 19359 }, { "epoch": 0.593355400269707, "grad_norm": 1.2922707819881973, "learning_rate": 3.744738054111053e-06, "loss": 0.6489, "step": 19360 }, { "epoch": 0.5933860487924482, "grad_norm": 1.2807244195311112, "learning_rate": 3.744257637780585e-06, "loss": 0.5501, "step": 19361 }, { "epoch": 0.5934166973151894, "grad_norm": 1.305845234682663, "learning_rate": 3.743777233823267e-06, "loss": 0.6581, "step": 19362 }, { "epoch": 0.5934473458379306, "grad_norm": 0.4311783321865131, "learning_rate": 3.743296842243834e-06, "loss": 0.4002, "step": 19363 }, { "epoch": 0.5934779943606718, "grad_norm": 1.2701300760273067, "learning_rate": 3.7428164630470193e-06, "loss": 0.5959, "step": 19364 }, { "epoch": 0.593508642883413, "grad_norm": 1.2504708510917524, "learning_rate": 3.7423360962375544e-06, "loss": 0.5984, "step": 19365 }, { "epoch": 0.5935392914061542, "grad_norm": 1.4770190522248805, "learning_rate": 3.741855741820176e-06, "loss": 0.6919, "step": 19366 }, { "epoch": 0.5935699399288954, "grad_norm": 1.3623959587284977, "learning_rate": 3.741375399799614e-06, "loss": 0.6411, "step": 19367 }, { "epoch": 0.5936005884516367, "grad_norm": 1.1984959198694647, "learning_rate": 3.7408950701806003e-06, "loss": 0.5945, "step": 19368 }, { "epoch": 0.5936312369743778, "grad_norm": 1.309232958747993, "learning_rate": 3.7404147529678715e-06, "loss": 0.5801, "step": 19369 }, { "epoch": 0.5936618854971191, "grad_norm": 1.1887422918292079, "learning_rate": 3.7399344481661582e-06, "loss": 0.5558, "step": 19370 }, { "epoch": 0.5936925340198602, "grad_norm": 0.46748496882679363, "learning_rate": 3.739454155780192e-06, "loss": 0.4166, "step": 19371 }, { "epoch": 0.5937231825426015, "grad_norm": 0.4754526671291052, "learning_rate": 3.7389738758147075e-06, "loss": 0.4046, "step": 19372 }, { "epoch": 0.5937538310653426, "grad_norm": 1.3655987840714316, "learning_rate": 3.738493608274435e-06, "loss": 0.6308, "step": 19373 }, { "epoch": 0.5937844795880839, "grad_norm": 1.1381639693286913, "learning_rate": 3.7380133531641093e-06, "loss": 0.6312, "step": 19374 }, { "epoch": 0.593815128110825, "grad_norm": 1.1313791667902386, "learning_rate": 3.7375331104884617e-06, "loss": 0.5448, "step": 19375 }, { "epoch": 0.5938457766335663, "grad_norm": 1.2846730776815032, "learning_rate": 3.73705288025222e-06, "loss": 0.583, "step": 19376 }, { "epoch": 0.5938764251563075, "grad_norm": 0.4587610492402443, "learning_rate": 3.7365726624601228e-06, "loss": 0.3937, "step": 19377 }, { "epoch": 0.5939070736790487, "grad_norm": 1.4235604489591247, "learning_rate": 3.736092457116897e-06, "loss": 0.6881, "step": 19378 }, { "epoch": 0.5939377222017899, "grad_norm": 1.3146386829004415, "learning_rate": 3.7356122642272753e-06, "loss": 0.7519, "step": 19379 }, { "epoch": 0.5939683707245311, "grad_norm": 1.5430594807209999, "learning_rate": 3.73513208379599e-06, "loss": 0.6198, "step": 19380 }, { "epoch": 0.5939990192472723, "grad_norm": 0.44754779472090456, "learning_rate": 3.7346519158277707e-06, "loss": 0.4275, "step": 19381 }, { "epoch": 0.5940296677700135, "grad_norm": 1.4876782887948365, "learning_rate": 3.734171760327351e-06, "loss": 0.6311, "step": 19382 }, { "epoch": 0.5940603162927547, "grad_norm": 1.3045240887017748, "learning_rate": 3.7336916172994608e-06, "loss": 0.5574, "step": 19383 }, { "epoch": 0.594090964815496, "grad_norm": 1.4486495380130984, "learning_rate": 3.73321148674883e-06, "loss": 0.7344, "step": 19384 }, { "epoch": 0.5941216133382371, "grad_norm": 1.247387605310574, "learning_rate": 3.7327313686801926e-06, "loss": 0.6274, "step": 19385 }, { "epoch": 0.5941522618609782, "grad_norm": 1.3887023203601732, "learning_rate": 3.732251263098277e-06, "loss": 0.649, "step": 19386 }, { "epoch": 0.5941829103837195, "grad_norm": 0.47105725744246274, "learning_rate": 3.731771170007811e-06, "loss": 0.3937, "step": 19387 }, { "epoch": 0.5942135589064607, "grad_norm": 0.45007563212736007, "learning_rate": 3.7312910894135324e-06, "loss": 0.4084, "step": 19388 }, { "epoch": 0.5942442074292019, "grad_norm": 1.3658169256464432, "learning_rate": 3.730811021320163e-06, "loss": 0.5864, "step": 19389 }, { "epoch": 0.5942748559519431, "grad_norm": 1.130820342470837, "learning_rate": 3.730330965732441e-06, "loss": 0.6672, "step": 19390 }, { "epoch": 0.5943055044746843, "grad_norm": 1.4215563072765576, "learning_rate": 3.7298509226550916e-06, "loss": 0.6714, "step": 19391 }, { "epoch": 0.5943361529974255, "grad_norm": 1.1439720183757065, "learning_rate": 3.729370892092845e-06, "loss": 0.5986, "step": 19392 }, { "epoch": 0.5943668015201667, "grad_norm": 0.41605667611568775, "learning_rate": 3.7288908740504337e-06, "loss": 0.4004, "step": 19393 }, { "epoch": 0.5943974500429079, "grad_norm": 0.4277687810558684, "learning_rate": 3.7284108685325853e-06, "loss": 0.4047, "step": 19394 }, { "epoch": 0.5944280985656492, "grad_norm": 1.2792372905324303, "learning_rate": 3.727930875544029e-06, "loss": 0.6798, "step": 19395 }, { "epoch": 0.5944587470883903, "grad_norm": 1.317583693916366, "learning_rate": 3.727450895089497e-06, "loss": 0.5221, "step": 19396 }, { "epoch": 0.5944893956111316, "grad_norm": 0.44203817634993076, "learning_rate": 3.726970927173717e-06, "loss": 0.3991, "step": 19397 }, { "epoch": 0.5945200441338727, "grad_norm": 1.3158090150766264, "learning_rate": 3.7264909718014153e-06, "loss": 0.5847, "step": 19398 }, { "epoch": 0.594550692656614, "grad_norm": 1.3551944574009247, "learning_rate": 3.726011028977327e-06, "loss": 0.6859, "step": 19399 }, { "epoch": 0.5945813411793551, "grad_norm": 1.3200376735235666, "learning_rate": 3.725531098706175e-06, "loss": 0.6298, "step": 19400 }, { "epoch": 0.5946119897020964, "grad_norm": 1.3543715214637475, "learning_rate": 3.7250511809926943e-06, "loss": 0.5973, "step": 19401 }, { "epoch": 0.5946426382248375, "grad_norm": 1.2313400322937946, "learning_rate": 3.72457127584161e-06, "loss": 0.6443, "step": 19402 }, { "epoch": 0.5946732867475788, "grad_norm": 1.4083916841952477, "learning_rate": 3.724091383257649e-06, "loss": 0.652, "step": 19403 }, { "epoch": 0.59470393527032, "grad_norm": 1.563805907596109, "learning_rate": 3.723611503245544e-06, "loss": 0.6207, "step": 19404 }, { "epoch": 0.5947345837930612, "grad_norm": 1.2948774314187776, "learning_rate": 3.723131635810021e-06, "loss": 0.5378, "step": 19405 }, { "epoch": 0.5947652323158024, "grad_norm": 1.162568394714968, "learning_rate": 3.7226517809558084e-06, "loss": 0.5501, "step": 19406 }, { "epoch": 0.5947958808385436, "grad_norm": 1.1142025384769008, "learning_rate": 3.7221719386876342e-06, "loss": 0.6121, "step": 19407 }, { "epoch": 0.5948265293612848, "grad_norm": 1.1904917799056913, "learning_rate": 3.721692109010227e-06, "loss": 0.599, "step": 19408 }, { "epoch": 0.594857177884026, "grad_norm": 1.1298010640860336, "learning_rate": 3.7212122919283158e-06, "loss": 0.4685, "step": 19409 }, { "epoch": 0.5948878264067672, "grad_norm": 1.2918456868469834, "learning_rate": 3.7207324874466274e-06, "loss": 0.6266, "step": 19410 }, { "epoch": 0.5949184749295084, "grad_norm": 1.2664892602476294, "learning_rate": 3.720252695569887e-06, "loss": 0.5465, "step": 19411 }, { "epoch": 0.5949491234522496, "grad_norm": 1.2252218618176147, "learning_rate": 3.7197729163028252e-06, "loss": 0.635, "step": 19412 }, { "epoch": 0.5949797719749909, "grad_norm": 1.4016572726509884, "learning_rate": 3.7192931496501687e-06, "loss": 0.56, "step": 19413 }, { "epoch": 0.595010420497732, "grad_norm": 0.4821276724505922, "learning_rate": 3.718813395616644e-06, "loss": 0.4052, "step": 19414 }, { "epoch": 0.5950410690204733, "grad_norm": 1.3190794598227, "learning_rate": 3.7183336542069792e-06, "loss": 0.595, "step": 19415 }, { "epoch": 0.5950717175432144, "grad_norm": 1.2806346173414396, "learning_rate": 3.7178539254258992e-06, "loss": 0.62, "step": 19416 }, { "epoch": 0.5951023660659556, "grad_norm": 1.3664377517196318, "learning_rate": 3.7173742092781344e-06, "loss": 0.6024, "step": 19417 }, { "epoch": 0.5951330145886968, "grad_norm": 0.4753017440475851, "learning_rate": 3.7168945057684103e-06, "loss": 0.4316, "step": 19418 }, { "epoch": 0.595163663111438, "grad_norm": 1.4219193401904042, "learning_rate": 3.71641481490145e-06, "loss": 0.6733, "step": 19419 }, { "epoch": 0.5951943116341792, "grad_norm": 1.1477097631650153, "learning_rate": 3.7159351366819863e-06, "loss": 0.5359, "step": 19420 }, { "epoch": 0.5952249601569204, "grad_norm": 1.2305027036344645, "learning_rate": 3.7154554711147405e-06, "loss": 0.7239, "step": 19421 }, { "epoch": 0.5952556086796617, "grad_norm": 1.3066209670581987, "learning_rate": 3.7149758182044405e-06, "loss": 0.6652, "step": 19422 }, { "epoch": 0.5952862572024028, "grad_norm": 1.2198588828991068, "learning_rate": 3.714496177955813e-06, "loss": 0.6203, "step": 19423 }, { "epoch": 0.5953169057251441, "grad_norm": 1.2312522577261575, "learning_rate": 3.7140165503735835e-06, "loss": 0.6485, "step": 19424 }, { "epoch": 0.5953475542478852, "grad_norm": 1.13824537257071, "learning_rate": 3.7135369354624774e-06, "loss": 0.6428, "step": 19425 }, { "epoch": 0.5953782027706265, "grad_norm": 1.373875397279213, "learning_rate": 3.713057333227222e-06, "loss": 0.7077, "step": 19426 }, { "epoch": 0.5954088512933676, "grad_norm": 1.266187727648396, "learning_rate": 3.712577743672541e-06, "loss": 0.5955, "step": 19427 }, { "epoch": 0.5954394998161089, "grad_norm": 1.082475061278525, "learning_rate": 3.7120981668031608e-06, "loss": 0.6033, "step": 19428 }, { "epoch": 0.59547014833885, "grad_norm": 1.37035887123559, "learning_rate": 3.7116186026238094e-06, "loss": 0.7426, "step": 19429 }, { "epoch": 0.5955007968615913, "grad_norm": 1.6649891671445718, "learning_rate": 3.7111390511392054e-06, "loss": 0.7096, "step": 19430 }, { "epoch": 0.5955314453843324, "grad_norm": 0.49042092651379515, "learning_rate": 3.7106595123540818e-06, "loss": 0.4088, "step": 19431 }, { "epoch": 0.5955620939070737, "grad_norm": 1.3050220771774441, "learning_rate": 3.710179986273159e-06, "loss": 0.6396, "step": 19432 }, { "epoch": 0.5955927424298149, "grad_norm": 1.2558585642031828, "learning_rate": 3.709700472901161e-06, "loss": 0.6863, "step": 19433 }, { "epoch": 0.5956233909525561, "grad_norm": 1.4886578193871625, "learning_rate": 3.7092209722428162e-06, "loss": 0.5585, "step": 19434 }, { "epoch": 0.5956540394752973, "grad_norm": 1.1266075653031247, "learning_rate": 3.708741484302846e-06, "loss": 0.5715, "step": 19435 }, { "epoch": 0.5956846879980385, "grad_norm": 1.1381490087206758, "learning_rate": 3.708262009085978e-06, "loss": 0.6841, "step": 19436 }, { "epoch": 0.5957153365207797, "grad_norm": 1.2546732307917199, "learning_rate": 3.707782546596934e-06, "loss": 0.6077, "step": 19437 }, { "epoch": 0.5957459850435209, "grad_norm": 1.3404615567740243, "learning_rate": 3.7073030968404382e-06, "loss": 0.6312, "step": 19438 }, { "epoch": 0.5957766335662621, "grad_norm": 1.3598566078657988, "learning_rate": 3.706823659821217e-06, "loss": 0.5855, "step": 19439 }, { "epoch": 0.5958072820890034, "grad_norm": 1.3356671477122406, "learning_rate": 3.706344235543995e-06, "loss": 0.6125, "step": 19440 }, { "epoch": 0.5958379306117445, "grad_norm": 1.3359438092646096, "learning_rate": 3.7058648240134897e-06, "loss": 0.7032, "step": 19441 }, { "epoch": 0.5958685791344858, "grad_norm": 1.1605055071249366, "learning_rate": 3.7053854252344334e-06, "loss": 0.602, "step": 19442 }, { "epoch": 0.5958992276572269, "grad_norm": 0.4718290125202969, "learning_rate": 3.7049060392115425e-06, "loss": 0.3966, "step": 19443 }, { "epoch": 0.5959298761799682, "grad_norm": 1.2573199179316217, "learning_rate": 3.704426665949547e-06, "loss": 0.6115, "step": 19444 }, { "epoch": 0.5959605247027093, "grad_norm": 1.2913124980825357, "learning_rate": 3.7039473054531662e-06, "loss": 0.6509, "step": 19445 }, { "epoch": 0.5959911732254506, "grad_norm": 1.297983706811635, "learning_rate": 3.7034679577271226e-06, "loss": 0.6599, "step": 19446 }, { "epoch": 0.5960218217481917, "grad_norm": 1.434478783817704, "learning_rate": 3.7029886227761426e-06, "loss": 0.6845, "step": 19447 }, { "epoch": 0.5960524702709329, "grad_norm": 1.2593004480385235, "learning_rate": 3.7025093006049467e-06, "loss": 0.6262, "step": 19448 }, { "epoch": 0.5960831187936741, "grad_norm": 0.4398744469844607, "learning_rate": 3.702029991218258e-06, "loss": 0.4065, "step": 19449 }, { "epoch": 0.5961137673164153, "grad_norm": 1.2455526373329142, "learning_rate": 3.7015506946208014e-06, "loss": 0.6584, "step": 19450 }, { "epoch": 0.5961444158391566, "grad_norm": 1.1819830295623819, "learning_rate": 3.7010714108172986e-06, "loss": 0.5965, "step": 19451 }, { "epoch": 0.5961750643618977, "grad_norm": 1.273168825794981, "learning_rate": 3.7005921398124682e-06, "loss": 0.5774, "step": 19452 }, { "epoch": 0.596205712884639, "grad_norm": 1.3934489428240835, "learning_rate": 3.70011288161104e-06, "loss": 0.6418, "step": 19453 }, { "epoch": 0.5962363614073801, "grad_norm": 1.3955459764428393, "learning_rate": 3.6996336362177274e-06, "loss": 0.6122, "step": 19454 }, { "epoch": 0.5962670099301214, "grad_norm": 1.2388673761416578, "learning_rate": 3.699154403637262e-06, "loss": 0.6099, "step": 19455 }, { "epoch": 0.5962976584528625, "grad_norm": 0.44098443190772846, "learning_rate": 3.6986751838743596e-06, "loss": 0.4038, "step": 19456 }, { "epoch": 0.5963283069756038, "grad_norm": 1.2241923158883057, "learning_rate": 3.6981959769337423e-06, "loss": 0.6036, "step": 19457 }, { "epoch": 0.596358955498345, "grad_norm": 1.1774227371701746, "learning_rate": 3.6977167828201344e-06, "loss": 0.6027, "step": 19458 }, { "epoch": 0.5963896040210862, "grad_norm": 1.2194183921628299, "learning_rate": 3.6972376015382563e-06, "loss": 0.5924, "step": 19459 }, { "epoch": 0.5964202525438274, "grad_norm": 1.2339771213699575, "learning_rate": 3.6967584330928274e-06, "loss": 0.6294, "step": 19460 }, { "epoch": 0.5964509010665686, "grad_norm": 1.271374328192658, "learning_rate": 3.6962792774885735e-06, "loss": 0.677, "step": 19461 }, { "epoch": 0.5964815495893098, "grad_norm": 1.4801963559364075, "learning_rate": 3.695800134730212e-06, "loss": 0.7359, "step": 19462 }, { "epoch": 0.596512198112051, "grad_norm": 1.364964179929845, "learning_rate": 3.695321004822467e-06, "loss": 0.5467, "step": 19463 }, { "epoch": 0.5965428466347922, "grad_norm": 0.4663511218373436, "learning_rate": 3.6948418877700577e-06, "loss": 0.4158, "step": 19464 }, { "epoch": 0.5965734951575334, "grad_norm": 1.3211875954243624, "learning_rate": 3.6943627835777035e-06, "loss": 0.5787, "step": 19465 }, { "epoch": 0.5966041436802746, "grad_norm": 1.295674384121839, "learning_rate": 3.693883692250128e-06, "loss": 0.6478, "step": 19466 }, { "epoch": 0.5966347922030159, "grad_norm": 1.190465512006941, "learning_rate": 3.693404613792051e-06, "loss": 0.6091, "step": 19467 }, { "epoch": 0.596665440725757, "grad_norm": 1.1552925975369892, "learning_rate": 3.6929255482081916e-06, "loss": 0.5969, "step": 19468 }, { "epoch": 0.5966960892484983, "grad_norm": 1.20635361724399, "learning_rate": 3.692446495503272e-06, "loss": 0.6288, "step": 19469 }, { "epoch": 0.5967267377712394, "grad_norm": 1.2688218373656361, "learning_rate": 3.6919674556820108e-06, "loss": 0.6775, "step": 19470 }, { "epoch": 0.5967573862939807, "grad_norm": 0.4570501873059729, "learning_rate": 3.691488428749129e-06, "loss": 0.4156, "step": 19471 }, { "epoch": 0.5967880348167218, "grad_norm": 1.3836048774869258, "learning_rate": 3.691009414709349e-06, "loss": 0.5733, "step": 19472 }, { "epoch": 0.5968186833394631, "grad_norm": 1.519623064672259, "learning_rate": 3.6905304135673848e-06, "loss": 0.5771, "step": 19473 }, { "epoch": 0.5968493318622042, "grad_norm": 1.2874187030689792, "learning_rate": 3.6900514253279618e-06, "loss": 0.5744, "step": 19474 }, { "epoch": 0.5968799803849455, "grad_norm": 0.448463232709947, "learning_rate": 3.689572449995797e-06, "loss": 0.4092, "step": 19475 }, { "epoch": 0.5969106289076866, "grad_norm": 1.2795832323170588, "learning_rate": 3.6890934875756086e-06, "loss": 0.7035, "step": 19476 }, { "epoch": 0.5969412774304279, "grad_norm": 1.1954767065803609, "learning_rate": 3.6886145380721182e-06, "loss": 0.641, "step": 19477 }, { "epoch": 0.5969719259531691, "grad_norm": 1.348465805915103, "learning_rate": 3.6881356014900447e-06, "loss": 0.6102, "step": 19478 }, { "epoch": 0.5970025744759102, "grad_norm": 1.3120232231688174, "learning_rate": 3.687656677834106e-06, "loss": 0.5907, "step": 19479 }, { "epoch": 0.5970332229986515, "grad_norm": 1.2464208463690707, "learning_rate": 3.6871777671090233e-06, "loss": 0.6121, "step": 19480 }, { "epoch": 0.5970638715213926, "grad_norm": 1.2577001311063096, "learning_rate": 3.686698869319512e-06, "loss": 0.648, "step": 19481 }, { "epoch": 0.5970945200441339, "grad_norm": 1.322422334045282, "learning_rate": 3.686219984470294e-06, "loss": 0.6106, "step": 19482 }, { "epoch": 0.597125168566875, "grad_norm": 1.2823911002620194, "learning_rate": 3.685741112566088e-06, "loss": 0.6354, "step": 19483 }, { "epoch": 0.5971558170896163, "grad_norm": 1.2459573421603967, "learning_rate": 3.6852622536116076e-06, "loss": 0.6891, "step": 19484 }, { "epoch": 0.5971864656123574, "grad_norm": 1.1934909655340258, "learning_rate": 3.684783407611578e-06, "loss": 0.5992, "step": 19485 }, { "epoch": 0.5972171141350987, "grad_norm": 0.4740234406990959, "learning_rate": 3.684304574570713e-06, "loss": 0.4229, "step": 19486 }, { "epoch": 0.5972477626578399, "grad_norm": 1.491388792550158, "learning_rate": 3.6838257544937307e-06, "loss": 0.6624, "step": 19487 }, { "epoch": 0.5972784111805811, "grad_norm": 1.3270005047704942, "learning_rate": 3.68334694738535e-06, "loss": 0.5586, "step": 19488 }, { "epoch": 0.5973090597033223, "grad_norm": 1.3161762967837396, "learning_rate": 3.6828681532502884e-06, "loss": 0.6486, "step": 19489 }, { "epoch": 0.5973397082260635, "grad_norm": 1.434649812233458, "learning_rate": 3.6823893720932656e-06, "loss": 0.6555, "step": 19490 }, { "epoch": 0.5973703567488047, "grad_norm": 1.1622291762603258, "learning_rate": 3.6819106039189967e-06, "loss": 0.7086, "step": 19491 }, { "epoch": 0.5974010052715459, "grad_norm": 1.212196355706885, "learning_rate": 3.681431848732199e-06, "loss": 0.7075, "step": 19492 }, { "epoch": 0.5974316537942871, "grad_norm": 1.294318879620444, "learning_rate": 3.6809531065375914e-06, "loss": 0.662, "step": 19493 }, { "epoch": 0.5974623023170283, "grad_norm": 1.3377975089530731, "learning_rate": 3.680474377339892e-06, "loss": 0.6119, "step": 19494 }, { "epoch": 0.5974929508397695, "grad_norm": 1.388596673983971, "learning_rate": 3.6799956611438124e-06, "loss": 0.7301, "step": 19495 }, { "epoch": 0.5975235993625108, "grad_norm": 1.7642950932131658, "learning_rate": 3.679516957954077e-06, "loss": 0.635, "step": 19496 }, { "epoch": 0.5975542478852519, "grad_norm": 1.2520852285533672, "learning_rate": 3.6790382677753954e-06, "loss": 0.6484, "step": 19497 }, { "epoch": 0.5975848964079932, "grad_norm": 1.3913203887191112, "learning_rate": 3.6785595906124903e-06, "loss": 0.6671, "step": 19498 }, { "epoch": 0.5976155449307343, "grad_norm": 1.2181420531079106, "learning_rate": 3.678080926470076e-06, "loss": 0.5583, "step": 19499 }, { "epoch": 0.5976461934534756, "grad_norm": 1.5117197900697321, "learning_rate": 3.6776022753528664e-06, "loss": 0.5996, "step": 19500 }, { "epoch": 0.5976768419762167, "grad_norm": 1.5001936697683182, "learning_rate": 3.6771236372655817e-06, "loss": 0.6358, "step": 19501 }, { "epoch": 0.597707490498958, "grad_norm": 1.1855965754401767, "learning_rate": 3.6766450122129355e-06, "loss": 0.5374, "step": 19502 }, { "epoch": 0.5977381390216991, "grad_norm": 1.237386410611732, "learning_rate": 3.6761664001996437e-06, "loss": 0.5445, "step": 19503 }, { "epoch": 0.5977687875444404, "grad_norm": 1.3363112370914612, "learning_rate": 3.6756878012304242e-06, "loss": 0.6005, "step": 19504 }, { "epoch": 0.5977994360671816, "grad_norm": 0.44136602560680094, "learning_rate": 3.6752092153099934e-06, "loss": 0.3975, "step": 19505 }, { "epoch": 0.5978300845899228, "grad_norm": 0.44636507327354985, "learning_rate": 3.674730642443061e-06, "loss": 0.3965, "step": 19506 }, { "epoch": 0.597860733112664, "grad_norm": 1.299660992927694, "learning_rate": 3.6742520826343508e-06, "loss": 0.6788, "step": 19507 }, { "epoch": 0.5978913816354052, "grad_norm": 1.3178231337896162, "learning_rate": 3.673773535888571e-06, "loss": 0.6454, "step": 19508 }, { "epoch": 0.5979220301581464, "grad_norm": 1.271257501062863, "learning_rate": 3.673295002210442e-06, "loss": 0.6144, "step": 19509 }, { "epoch": 0.5979526786808875, "grad_norm": 1.2216285846278696, "learning_rate": 3.672816481604676e-06, "loss": 0.6252, "step": 19510 }, { "epoch": 0.5979833272036288, "grad_norm": 1.2261047741408666, "learning_rate": 3.672337974075988e-06, "loss": 0.6829, "step": 19511 }, { "epoch": 0.5980139757263699, "grad_norm": 1.3821302790256877, "learning_rate": 3.671859479629094e-06, "loss": 0.7285, "step": 19512 }, { "epoch": 0.5980446242491112, "grad_norm": 2.948375992476536, "learning_rate": 3.671380998268709e-06, "loss": 0.554, "step": 19513 }, { "epoch": 0.5980752727718524, "grad_norm": 1.2005194960224284, "learning_rate": 3.670902529999546e-06, "loss": 0.6027, "step": 19514 }, { "epoch": 0.5981059212945936, "grad_norm": 1.2146952976589254, "learning_rate": 3.670424074826322e-06, "loss": 0.5954, "step": 19515 }, { "epoch": 0.5981365698173348, "grad_norm": 1.2515825246771546, "learning_rate": 3.6699456327537477e-06, "loss": 0.6301, "step": 19516 }, { "epoch": 0.598167218340076, "grad_norm": 1.3358317267246786, "learning_rate": 3.6694672037865416e-06, "loss": 0.5758, "step": 19517 }, { "epoch": 0.5981978668628172, "grad_norm": 1.2504904550323448, "learning_rate": 3.6689887879294146e-06, "loss": 0.5501, "step": 19518 }, { "epoch": 0.5982285153855584, "grad_norm": 1.0787733100526657, "learning_rate": 3.6685103851870808e-06, "loss": 0.5509, "step": 19519 }, { "epoch": 0.5982591639082996, "grad_norm": 0.4682051771115555, "learning_rate": 3.6680319955642556e-06, "loss": 0.4039, "step": 19520 }, { "epoch": 0.5982898124310408, "grad_norm": 1.2720173219276492, "learning_rate": 3.6675536190656525e-06, "loss": 0.6577, "step": 19521 }, { "epoch": 0.598320460953782, "grad_norm": 1.1422583822273993, "learning_rate": 3.6670752556959834e-06, "loss": 0.5805, "step": 19522 }, { "epoch": 0.5983511094765233, "grad_norm": 0.44734802196797835, "learning_rate": 3.6665969054599633e-06, "loss": 0.4295, "step": 19523 }, { "epoch": 0.5983817579992644, "grad_norm": 1.2037350887419598, "learning_rate": 3.6661185683623047e-06, "loss": 0.5453, "step": 19524 }, { "epoch": 0.5984124065220057, "grad_norm": 1.5274698734603362, "learning_rate": 3.665640244407721e-06, "loss": 0.594, "step": 19525 }, { "epoch": 0.5984430550447468, "grad_norm": 1.4833369143427344, "learning_rate": 3.6651619336009275e-06, "loss": 0.6748, "step": 19526 }, { "epoch": 0.5984737035674881, "grad_norm": 1.3602850543486626, "learning_rate": 3.664683635946632e-06, "loss": 0.6312, "step": 19527 }, { "epoch": 0.5985043520902292, "grad_norm": 1.2244259329684373, "learning_rate": 3.664205351449553e-06, "loss": 0.6051, "step": 19528 }, { "epoch": 0.5985350006129705, "grad_norm": 0.4467927643140506, "learning_rate": 3.663727080114399e-06, "loss": 0.4105, "step": 19529 }, { "epoch": 0.5985656491357116, "grad_norm": 0.4399993670563102, "learning_rate": 3.663248821945884e-06, "loss": 0.394, "step": 19530 }, { "epoch": 0.5985962976584529, "grad_norm": 0.4570797340434601, "learning_rate": 3.6627705769487204e-06, "loss": 0.429, "step": 19531 }, { "epoch": 0.598626946181194, "grad_norm": 1.237967615935823, "learning_rate": 3.662292345127621e-06, "loss": 0.5893, "step": 19532 }, { "epoch": 0.5986575947039353, "grad_norm": 1.3612834634077462, "learning_rate": 3.6618141264872964e-06, "loss": 0.6658, "step": 19533 }, { "epoch": 0.5986882432266765, "grad_norm": 1.2094848127471796, "learning_rate": 3.6613359210324606e-06, "loss": 0.6666, "step": 19534 }, { "epoch": 0.5987188917494177, "grad_norm": 1.3367352180747145, "learning_rate": 3.6608577287678226e-06, "loss": 0.7625, "step": 19535 }, { "epoch": 0.5987495402721589, "grad_norm": 1.2196229549498805, "learning_rate": 3.6603795496980983e-06, "loss": 0.6105, "step": 19536 }, { "epoch": 0.5987801887949001, "grad_norm": 1.3833791851378778, "learning_rate": 3.6599013838279975e-06, "loss": 0.6186, "step": 19537 }, { "epoch": 0.5988108373176413, "grad_norm": 1.2727318140716204, "learning_rate": 3.659423231162228e-06, "loss": 0.5943, "step": 19538 }, { "epoch": 0.5988414858403825, "grad_norm": 1.4478857634230629, "learning_rate": 3.658945091705508e-06, "loss": 0.6534, "step": 19539 }, { "epoch": 0.5988721343631237, "grad_norm": 1.4130651629082465, "learning_rate": 3.6584669654625436e-06, "loss": 0.5652, "step": 19540 }, { "epoch": 0.5989027828858648, "grad_norm": 1.1818154495110622, "learning_rate": 3.657988852438047e-06, "loss": 0.6657, "step": 19541 }, { "epoch": 0.5989334314086061, "grad_norm": 1.3749316819417594, "learning_rate": 3.6575107526367297e-06, "loss": 0.5939, "step": 19542 }, { "epoch": 0.5989640799313473, "grad_norm": 0.4531699776313835, "learning_rate": 3.657032666063302e-06, "loss": 0.4089, "step": 19543 }, { "epoch": 0.5989947284540885, "grad_norm": 1.1813771280835936, "learning_rate": 3.6565545927224762e-06, "loss": 0.6055, "step": 19544 }, { "epoch": 0.5990253769768297, "grad_norm": 1.2043139289232905, "learning_rate": 3.6560765326189617e-06, "loss": 0.5302, "step": 19545 }, { "epoch": 0.5990560254995709, "grad_norm": 1.4031773690543246, "learning_rate": 3.655598485757468e-06, "loss": 0.7068, "step": 19546 }, { "epoch": 0.5990866740223121, "grad_norm": 1.7510309886680662, "learning_rate": 3.655120452142707e-06, "loss": 0.7086, "step": 19547 }, { "epoch": 0.5991173225450533, "grad_norm": 1.1863617588653166, "learning_rate": 3.6546424317793893e-06, "loss": 0.5981, "step": 19548 }, { "epoch": 0.5991479710677945, "grad_norm": 1.3330885709399252, "learning_rate": 3.6541644246722212e-06, "loss": 0.5848, "step": 19549 }, { "epoch": 0.5991786195905358, "grad_norm": 0.4281664249799192, "learning_rate": 3.653686430825919e-06, "loss": 0.399, "step": 19550 }, { "epoch": 0.5992092681132769, "grad_norm": 1.3410024915397114, "learning_rate": 3.653208450245187e-06, "loss": 0.5978, "step": 19551 }, { "epoch": 0.5992399166360182, "grad_norm": 1.2540529587449007, "learning_rate": 3.6527304829347356e-06, "loss": 0.5836, "step": 19552 }, { "epoch": 0.5992705651587593, "grad_norm": 1.2501979961050027, "learning_rate": 3.652252528899277e-06, "loss": 0.6284, "step": 19553 }, { "epoch": 0.5993012136815006, "grad_norm": 0.4409281780218271, "learning_rate": 3.651774588143518e-06, "loss": 0.3987, "step": 19554 }, { "epoch": 0.5993318622042417, "grad_norm": 1.6106589439680745, "learning_rate": 3.65129666067217e-06, "loss": 0.6106, "step": 19555 }, { "epoch": 0.599362510726983, "grad_norm": 0.4545391492460685, "learning_rate": 3.6508187464899402e-06, "loss": 0.4026, "step": 19556 }, { "epoch": 0.5993931592497241, "grad_norm": 0.4474537960492454, "learning_rate": 3.650340845601539e-06, "loss": 0.3958, "step": 19557 }, { "epoch": 0.5994238077724654, "grad_norm": 1.385608988125085, "learning_rate": 3.6498629580116747e-06, "loss": 0.5373, "step": 19558 }, { "epoch": 0.5994544562952066, "grad_norm": 1.56094948094864, "learning_rate": 3.6493850837250576e-06, "loss": 0.7067, "step": 19559 }, { "epoch": 0.5994851048179478, "grad_norm": 1.2251460360248358, "learning_rate": 3.6489072227463924e-06, "loss": 0.6845, "step": 19560 }, { "epoch": 0.599515753340689, "grad_norm": 1.2712669289267553, "learning_rate": 3.648429375080391e-06, "loss": 0.6216, "step": 19561 }, { "epoch": 0.5995464018634302, "grad_norm": 1.2612050149780762, "learning_rate": 3.6479515407317603e-06, "loss": 0.6186, "step": 19562 }, { "epoch": 0.5995770503861714, "grad_norm": 1.30945608133111, "learning_rate": 3.6474737197052094e-06, "loss": 0.5511, "step": 19563 }, { "epoch": 0.5996076989089126, "grad_norm": 1.3231312119022935, "learning_rate": 3.6469959120054464e-06, "loss": 0.6444, "step": 19564 }, { "epoch": 0.5996383474316538, "grad_norm": 1.365486403828818, "learning_rate": 3.6465181176371777e-06, "loss": 0.5816, "step": 19565 }, { "epoch": 0.599668995954395, "grad_norm": 1.3791725227416278, "learning_rate": 3.6460403366051132e-06, "loss": 0.6314, "step": 19566 }, { "epoch": 0.5996996444771362, "grad_norm": 0.4372532046945679, "learning_rate": 3.6455625689139617e-06, "loss": 0.3761, "step": 19567 }, { "epoch": 0.5997302929998775, "grad_norm": 1.3725850990621666, "learning_rate": 3.645084814568425e-06, "loss": 0.5699, "step": 19568 }, { "epoch": 0.5997609415226186, "grad_norm": 1.2271628771876135, "learning_rate": 3.6446070735732168e-06, "loss": 0.6645, "step": 19569 }, { "epoch": 0.5997915900453599, "grad_norm": 0.4390552255432691, "learning_rate": 3.6441293459330394e-06, "loss": 0.4203, "step": 19570 }, { "epoch": 0.599822238568101, "grad_norm": 1.3772645664850731, "learning_rate": 3.6436516316526054e-06, "loss": 0.6453, "step": 19571 }, { "epoch": 0.5998528870908422, "grad_norm": 1.5695979916387497, "learning_rate": 3.643173930736618e-06, "loss": 0.6099, "step": 19572 }, { "epoch": 0.5998835356135834, "grad_norm": 1.2908962759734273, "learning_rate": 3.642696243189784e-06, "loss": 0.7419, "step": 19573 }, { "epoch": 0.5999141841363246, "grad_norm": 0.4751198787838312, "learning_rate": 3.6422185690168123e-06, "loss": 0.4342, "step": 19574 }, { "epoch": 0.5999448326590658, "grad_norm": 0.4403092409509089, "learning_rate": 3.641740908222408e-06, "loss": 0.4073, "step": 19575 }, { "epoch": 0.599975481181807, "grad_norm": 1.412346727984967, "learning_rate": 3.6412632608112775e-06, "loss": 0.6931, "step": 19576 }, { "epoch": 0.6000061297045483, "grad_norm": 1.536977060632832, "learning_rate": 3.6407856267881283e-06, "loss": 0.7338, "step": 19577 }, { "epoch": 0.6000367782272894, "grad_norm": 1.1337086264357008, "learning_rate": 3.6403080061576677e-06, "loss": 0.5766, "step": 19578 }, { "epoch": 0.6000674267500307, "grad_norm": 1.2922955791667354, "learning_rate": 3.6398303989245964e-06, "loss": 0.5871, "step": 19579 }, { "epoch": 0.6000980752727718, "grad_norm": 1.2776356413074366, "learning_rate": 3.6393528050936277e-06, "loss": 0.6391, "step": 19580 }, { "epoch": 0.6001287237955131, "grad_norm": 1.3205232159225886, "learning_rate": 3.6388752246694613e-06, "loss": 0.6648, "step": 19581 }, { "epoch": 0.6001593723182542, "grad_norm": 1.279217659222372, "learning_rate": 3.638397657656808e-06, "loss": 0.6535, "step": 19582 }, { "epoch": 0.6001900208409955, "grad_norm": 1.3350127420892635, "learning_rate": 3.63792010406037e-06, "loss": 0.6843, "step": 19583 }, { "epoch": 0.6002206693637366, "grad_norm": 1.1508339779493106, "learning_rate": 3.637442563884853e-06, "loss": 0.6154, "step": 19584 }, { "epoch": 0.6002513178864779, "grad_norm": 1.25920856422439, "learning_rate": 3.636965037134964e-06, "loss": 0.6459, "step": 19585 }, { "epoch": 0.600281966409219, "grad_norm": 1.2584844655987735, "learning_rate": 3.6364875238154073e-06, "loss": 0.5796, "step": 19586 }, { "epoch": 0.6003126149319603, "grad_norm": 1.4323848417407452, "learning_rate": 3.6360100239308867e-06, "loss": 0.637, "step": 19587 }, { "epoch": 0.6003432634547015, "grad_norm": 0.46584816631692777, "learning_rate": 3.6355325374861096e-06, "loss": 0.4054, "step": 19588 }, { "epoch": 0.6003739119774427, "grad_norm": 1.0877181147531754, "learning_rate": 3.635055064485778e-06, "loss": 0.5736, "step": 19589 }, { "epoch": 0.6004045605001839, "grad_norm": 1.3167762359325206, "learning_rate": 3.634577604934599e-06, "loss": 0.6307, "step": 19590 }, { "epoch": 0.6004352090229251, "grad_norm": 1.2642408984478042, "learning_rate": 3.634100158837278e-06, "loss": 0.6974, "step": 19591 }, { "epoch": 0.6004658575456663, "grad_norm": 1.336658268364546, "learning_rate": 3.633622726198514e-06, "loss": 0.5814, "step": 19592 }, { "epoch": 0.6004965060684075, "grad_norm": 1.2569840219011121, "learning_rate": 3.6331453070230182e-06, "loss": 0.6392, "step": 19593 }, { "epoch": 0.6005271545911487, "grad_norm": 1.2775098488897074, "learning_rate": 3.6326679013154904e-06, "loss": 0.5774, "step": 19594 }, { "epoch": 0.60055780311389, "grad_norm": 1.2241165361255189, "learning_rate": 3.632190509080634e-06, "loss": 0.6415, "step": 19595 }, { "epoch": 0.6005884516366311, "grad_norm": 1.5645647966767215, "learning_rate": 3.631713130323157e-06, "loss": 0.6345, "step": 19596 }, { "epoch": 0.6006191001593724, "grad_norm": 1.3815166142834443, "learning_rate": 3.631235765047758e-06, "loss": 0.5785, "step": 19597 }, { "epoch": 0.6006497486821135, "grad_norm": 1.2919391894527417, "learning_rate": 3.6307584132591445e-06, "loss": 0.6727, "step": 19598 }, { "epoch": 0.6006803972048548, "grad_norm": 1.2630692854870307, "learning_rate": 3.6302810749620193e-06, "loss": 0.5477, "step": 19599 }, { "epoch": 0.6007110457275959, "grad_norm": 1.2756950614000122, "learning_rate": 3.629803750161084e-06, "loss": 0.6312, "step": 19600 }, { "epoch": 0.6007416942503372, "grad_norm": 1.351590377866401, "learning_rate": 3.629326438861044e-06, "loss": 0.6698, "step": 19601 }, { "epoch": 0.6007723427730783, "grad_norm": 1.3552844169981142, "learning_rate": 3.6288491410666015e-06, "loss": 0.5611, "step": 19602 }, { "epoch": 0.6008029912958195, "grad_norm": 1.1836286256257298, "learning_rate": 3.6283718567824575e-06, "loss": 0.56, "step": 19603 }, { "epoch": 0.6008336398185607, "grad_norm": 1.2761810749240354, "learning_rate": 3.6278945860133184e-06, "loss": 0.6135, "step": 19604 }, { "epoch": 0.6008642883413019, "grad_norm": 1.3040947887254557, "learning_rate": 3.6274173287638848e-06, "loss": 0.6013, "step": 19605 }, { "epoch": 0.6008949368640432, "grad_norm": 0.4555253333207497, "learning_rate": 3.626940085038858e-06, "loss": 0.4183, "step": 19606 }, { "epoch": 0.6009255853867843, "grad_norm": 1.4645721603201631, "learning_rate": 3.6264628548429427e-06, "loss": 0.6798, "step": 19607 }, { "epoch": 0.6009562339095256, "grad_norm": 1.2283060877364398, "learning_rate": 3.62598563818084e-06, "loss": 0.616, "step": 19608 }, { "epoch": 0.6009868824322667, "grad_norm": 0.45934028151087675, "learning_rate": 3.6255084350572523e-06, "loss": 0.3975, "step": 19609 }, { "epoch": 0.601017530955008, "grad_norm": 1.3293795513447806, "learning_rate": 3.6250312454768827e-06, "loss": 0.5777, "step": 19610 }, { "epoch": 0.6010481794777491, "grad_norm": 1.3328244259990105, "learning_rate": 3.6245540694444303e-06, "loss": 0.6027, "step": 19611 }, { "epoch": 0.6010788280004904, "grad_norm": 0.459890280884749, "learning_rate": 3.6240769069646016e-06, "loss": 0.4285, "step": 19612 }, { "epoch": 0.6011094765232315, "grad_norm": 1.4558753744600037, "learning_rate": 3.6235997580420934e-06, "loss": 0.6613, "step": 19613 }, { "epoch": 0.6011401250459728, "grad_norm": 1.6087745976002854, "learning_rate": 3.623122622681608e-06, "loss": 0.6322, "step": 19614 }, { "epoch": 0.601170773568714, "grad_norm": 1.2150312799905005, "learning_rate": 3.6226455008878486e-06, "loss": 0.5827, "step": 19615 }, { "epoch": 0.6012014220914552, "grad_norm": 1.4819034482268427, "learning_rate": 3.622168392665515e-06, "loss": 0.5873, "step": 19616 }, { "epoch": 0.6012320706141964, "grad_norm": 1.272016107442706, "learning_rate": 3.6216912980193094e-06, "loss": 0.6822, "step": 19617 }, { "epoch": 0.6012627191369376, "grad_norm": 1.2851004620474389, "learning_rate": 3.621214216953932e-06, "loss": 0.7112, "step": 19618 }, { "epoch": 0.6012933676596788, "grad_norm": 1.033131558086962, "learning_rate": 3.620737149474083e-06, "loss": 0.6312, "step": 19619 }, { "epoch": 0.60132401618242, "grad_norm": 1.3683472320780792, "learning_rate": 3.6202600955844642e-06, "loss": 0.6556, "step": 19620 }, { "epoch": 0.6013546647051612, "grad_norm": 1.3118196257091426, "learning_rate": 3.6197830552897773e-06, "loss": 0.5763, "step": 19621 }, { "epoch": 0.6013853132279025, "grad_norm": 1.4825952934334006, "learning_rate": 3.619306028594718e-06, "loss": 0.662, "step": 19622 }, { "epoch": 0.6014159617506436, "grad_norm": 1.2942648168003326, "learning_rate": 3.6188290155039925e-06, "loss": 0.638, "step": 19623 }, { "epoch": 0.6014466102733849, "grad_norm": 1.259028500593874, "learning_rate": 3.618352016022295e-06, "loss": 0.6459, "step": 19624 }, { "epoch": 0.601477258796126, "grad_norm": 1.2228670679524187, "learning_rate": 3.617875030154332e-06, "loss": 0.5719, "step": 19625 }, { "epoch": 0.6015079073188673, "grad_norm": 1.2788727529860833, "learning_rate": 3.6173980579047984e-06, "loss": 0.6744, "step": 19626 }, { "epoch": 0.6015385558416084, "grad_norm": 1.16796051956745, "learning_rate": 3.6169210992783948e-06, "loss": 0.5951, "step": 19627 }, { "epoch": 0.6015692043643497, "grad_norm": 1.486149077502474, "learning_rate": 3.6164441542798225e-06, "loss": 0.636, "step": 19628 }, { "epoch": 0.6015998528870908, "grad_norm": 1.3920779469329667, "learning_rate": 3.61596722291378e-06, "loss": 0.5586, "step": 19629 }, { "epoch": 0.6016305014098321, "grad_norm": 1.3398589576693305, "learning_rate": 3.6154903051849656e-06, "loss": 0.6121, "step": 19630 }, { "epoch": 0.6016611499325732, "grad_norm": 1.3234218374801576, "learning_rate": 3.6150134010980796e-06, "loss": 0.6861, "step": 19631 }, { "epoch": 0.6016917984553145, "grad_norm": 1.4489347101002528, "learning_rate": 3.6145365106578235e-06, "loss": 0.612, "step": 19632 }, { "epoch": 0.6017224469780557, "grad_norm": 1.1998259600338854, "learning_rate": 3.614059633868889e-06, "loss": 0.6337, "step": 19633 }, { "epoch": 0.6017530955007968, "grad_norm": 1.3068983624950252, "learning_rate": 3.613582770735984e-06, "loss": 0.7684, "step": 19634 }, { "epoch": 0.6017837440235381, "grad_norm": 0.4501110057985646, "learning_rate": 3.6131059212637986e-06, "loss": 0.3762, "step": 19635 }, { "epoch": 0.6018143925462792, "grad_norm": 1.4636441466902146, "learning_rate": 3.612629085457039e-06, "loss": 0.7211, "step": 19636 }, { "epoch": 0.6018450410690205, "grad_norm": 1.229890816660319, "learning_rate": 3.6121522633203987e-06, "loss": 0.5724, "step": 19637 }, { "epoch": 0.6018756895917616, "grad_norm": 1.3072840518456657, "learning_rate": 3.6116754548585765e-06, "loss": 0.6553, "step": 19638 }, { "epoch": 0.6019063381145029, "grad_norm": 1.295831081643105, "learning_rate": 3.611198660076273e-06, "loss": 0.6407, "step": 19639 }, { "epoch": 0.601936986637244, "grad_norm": 1.2083276923861992, "learning_rate": 3.610721878978183e-06, "loss": 0.6096, "step": 19640 }, { "epoch": 0.6019676351599853, "grad_norm": 1.4642336926874582, "learning_rate": 3.610245111569005e-06, "loss": 0.6437, "step": 19641 }, { "epoch": 0.6019982836827265, "grad_norm": 1.2834190240020567, "learning_rate": 3.609768357853439e-06, "loss": 0.6907, "step": 19642 }, { "epoch": 0.6020289322054677, "grad_norm": 1.247709280130671, "learning_rate": 3.6092916178361802e-06, "loss": 0.6392, "step": 19643 }, { "epoch": 0.6020595807282089, "grad_norm": 1.2133628481513812, "learning_rate": 3.6088148915219277e-06, "loss": 0.6799, "step": 19644 }, { "epoch": 0.6020902292509501, "grad_norm": 1.1859572670977225, "learning_rate": 3.6083381789153792e-06, "loss": 0.5174, "step": 19645 }, { "epoch": 0.6021208777736913, "grad_norm": 0.47914930871625355, "learning_rate": 3.6078614800212273e-06, "loss": 0.4268, "step": 19646 }, { "epoch": 0.6021515262964325, "grad_norm": 1.405222032604702, "learning_rate": 3.6073847948441756e-06, "loss": 0.6294, "step": 19647 }, { "epoch": 0.6021821748191737, "grad_norm": 0.43729993812118195, "learning_rate": 3.606908123388917e-06, "loss": 0.4032, "step": 19648 }, { "epoch": 0.602212823341915, "grad_norm": 1.3130311782288056, "learning_rate": 3.606431465660148e-06, "loss": 0.5215, "step": 19649 }, { "epoch": 0.6022434718646561, "grad_norm": 0.451335973486104, "learning_rate": 3.605954821662567e-06, "loss": 0.4223, "step": 19650 }, { "epoch": 0.6022741203873974, "grad_norm": 1.306498167527082, "learning_rate": 3.6054781914008696e-06, "loss": 0.6315, "step": 19651 }, { "epoch": 0.6023047689101385, "grad_norm": 1.4485266160517123, "learning_rate": 3.6050015748797528e-06, "loss": 0.6506, "step": 19652 }, { "epoch": 0.6023354174328798, "grad_norm": 1.4002005348247577, "learning_rate": 3.6045249721039122e-06, "loss": 0.779, "step": 19653 }, { "epoch": 0.6023660659556209, "grad_norm": 1.275622282570278, "learning_rate": 3.604048383078044e-06, "loss": 0.6397, "step": 19654 }, { "epoch": 0.6023967144783622, "grad_norm": 1.3108087867986464, "learning_rate": 3.6035718078068455e-06, "loss": 0.6833, "step": 19655 }, { "epoch": 0.6024273630011033, "grad_norm": 1.3546772909132092, "learning_rate": 3.6030952462950103e-06, "loss": 0.6155, "step": 19656 }, { "epoch": 0.6024580115238446, "grad_norm": 1.279942746237792, "learning_rate": 3.6026186985472344e-06, "loss": 0.707, "step": 19657 }, { "epoch": 0.6024886600465857, "grad_norm": 1.2140059018480993, "learning_rate": 3.602142164568214e-06, "loss": 0.5721, "step": 19658 }, { "epoch": 0.602519308569327, "grad_norm": 1.2957605899483615, "learning_rate": 3.6016656443626458e-06, "loss": 0.6691, "step": 19659 }, { "epoch": 0.6025499570920682, "grad_norm": 1.317210792296508, "learning_rate": 3.6011891379352224e-06, "loss": 0.6643, "step": 19660 }, { "epoch": 0.6025806056148094, "grad_norm": 1.3610916463069724, "learning_rate": 3.600712645290641e-06, "loss": 0.6677, "step": 19661 }, { "epoch": 0.6026112541375506, "grad_norm": 1.4384329843451393, "learning_rate": 3.600236166433595e-06, "loss": 0.6997, "step": 19662 }, { "epoch": 0.6026419026602918, "grad_norm": 1.2760192215764221, "learning_rate": 3.5997597013687813e-06, "loss": 0.638, "step": 19663 }, { "epoch": 0.602672551183033, "grad_norm": 1.3085107088206034, "learning_rate": 3.5992832501008943e-06, "loss": 0.7186, "step": 19664 }, { "epoch": 0.6027031997057741, "grad_norm": 1.379310448461374, "learning_rate": 3.5988068126346254e-06, "loss": 0.6091, "step": 19665 }, { "epoch": 0.6027338482285154, "grad_norm": 1.3775292319700054, "learning_rate": 3.5983303889746745e-06, "loss": 0.5987, "step": 19666 }, { "epoch": 0.6027644967512565, "grad_norm": 0.465315504921405, "learning_rate": 3.597853979125732e-06, "loss": 0.3975, "step": 19667 }, { "epoch": 0.6027951452739978, "grad_norm": 1.2547706561445215, "learning_rate": 3.5973775830924907e-06, "loss": 0.5904, "step": 19668 }, { "epoch": 0.602825793796739, "grad_norm": 1.2693309404595179, "learning_rate": 3.5969012008796487e-06, "loss": 0.6094, "step": 19669 }, { "epoch": 0.6028564423194802, "grad_norm": 1.312067784990304, "learning_rate": 3.5964248324918977e-06, "loss": 0.6084, "step": 19670 }, { "epoch": 0.6028870908422214, "grad_norm": 1.337489398837451, "learning_rate": 3.5959484779339327e-06, "loss": 0.6407, "step": 19671 }, { "epoch": 0.6029177393649626, "grad_norm": 1.2076178451701767, "learning_rate": 3.5954721372104464e-06, "loss": 0.629, "step": 19672 }, { "epoch": 0.6029483878877038, "grad_norm": 1.2605373571646554, "learning_rate": 3.594995810326132e-06, "loss": 0.5995, "step": 19673 }, { "epoch": 0.602979036410445, "grad_norm": 1.2774186005517694, "learning_rate": 3.5945194972856834e-06, "loss": 0.5529, "step": 19674 }, { "epoch": 0.6030096849331862, "grad_norm": 1.369688677740224, "learning_rate": 3.594043198093795e-06, "loss": 0.6081, "step": 19675 }, { "epoch": 0.6030403334559274, "grad_norm": 1.3569163661362533, "learning_rate": 3.5935669127551566e-06, "loss": 0.6373, "step": 19676 }, { "epoch": 0.6030709819786686, "grad_norm": 1.2501936299438197, "learning_rate": 3.5930906412744656e-06, "loss": 0.6459, "step": 19677 }, { "epoch": 0.6031016305014099, "grad_norm": 1.2285152943145479, "learning_rate": 3.5926143836564093e-06, "loss": 0.7281, "step": 19678 }, { "epoch": 0.603132279024151, "grad_norm": 1.364417620780569, "learning_rate": 3.5921381399056864e-06, "loss": 0.6107, "step": 19679 }, { "epoch": 0.6031629275468923, "grad_norm": 0.45504705522699584, "learning_rate": 3.591661910026987e-06, "loss": 0.3975, "step": 19680 }, { "epoch": 0.6031935760696334, "grad_norm": 1.3501370017271586, "learning_rate": 3.5911856940250006e-06, "loss": 0.5918, "step": 19681 }, { "epoch": 0.6032242245923747, "grad_norm": 1.1086309586435628, "learning_rate": 3.5907094919044237e-06, "loss": 0.5409, "step": 19682 }, { "epoch": 0.6032548731151158, "grad_norm": 0.4589596294282509, "learning_rate": 3.5902333036699465e-06, "loss": 0.4257, "step": 19683 }, { "epoch": 0.6032855216378571, "grad_norm": 1.493961796846049, "learning_rate": 3.58975712932626e-06, "loss": 0.738, "step": 19684 }, { "epoch": 0.6033161701605982, "grad_norm": 1.200898029736037, "learning_rate": 3.5892809688780594e-06, "loss": 0.5859, "step": 19685 }, { "epoch": 0.6033468186833395, "grad_norm": 1.3085661652303056, "learning_rate": 3.5888048223300343e-06, "loss": 0.6601, "step": 19686 }, { "epoch": 0.6033774672060807, "grad_norm": 1.1974060316535564, "learning_rate": 3.588328689686874e-06, "loss": 0.5864, "step": 19687 }, { "epoch": 0.6034081157288219, "grad_norm": 1.3914091253292902, "learning_rate": 3.587852570953275e-06, "loss": 0.6449, "step": 19688 }, { "epoch": 0.6034387642515631, "grad_norm": 1.8999687956302174, "learning_rate": 3.587376466133923e-06, "loss": 0.6525, "step": 19689 }, { "epoch": 0.6034694127743043, "grad_norm": 0.44110695981015546, "learning_rate": 3.5869003752335152e-06, "loss": 0.4025, "step": 19690 }, { "epoch": 0.6035000612970455, "grad_norm": 1.174732716817674, "learning_rate": 3.5864242982567386e-06, "loss": 0.6632, "step": 19691 }, { "epoch": 0.6035307098197867, "grad_norm": 0.4399002062967314, "learning_rate": 3.5859482352082837e-06, "loss": 0.3695, "step": 19692 }, { "epoch": 0.6035613583425279, "grad_norm": 0.4471209136802136, "learning_rate": 3.5854721860928436e-06, "loss": 0.4026, "step": 19693 }, { "epoch": 0.6035920068652691, "grad_norm": 1.2672872367083698, "learning_rate": 3.5849961509151088e-06, "loss": 0.6552, "step": 19694 }, { "epoch": 0.6036226553880103, "grad_norm": 1.505263334760349, "learning_rate": 3.584520129679767e-06, "loss": 0.6329, "step": 19695 }, { "epoch": 0.6036533039107514, "grad_norm": 1.5321258910504005, "learning_rate": 3.5840441223915123e-06, "loss": 0.5532, "step": 19696 }, { "epoch": 0.6036839524334927, "grad_norm": 1.2480457344494251, "learning_rate": 3.5835681290550315e-06, "loss": 0.6399, "step": 19697 }, { "epoch": 0.6037146009562339, "grad_norm": 1.3443352549918295, "learning_rate": 3.5830921496750178e-06, "loss": 0.683, "step": 19698 }, { "epoch": 0.6037452494789751, "grad_norm": 1.3096999092519044, "learning_rate": 3.58261618425616e-06, "loss": 0.7244, "step": 19699 }, { "epoch": 0.6037758980017163, "grad_norm": 1.3515046710333498, "learning_rate": 3.5821402328031463e-06, "loss": 0.622, "step": 19700 }, { "epoch": 0.6038065465244575, "grad_norm": 1.2328649836908963, "learning_rate": 3.5816642953206686e-06, "loss": 0.631, "step": 19701 }, { "epoch": 0.6038371950471987, "grad_norm": 1.436612534192641, "learning_rate": 3.5811883718134154e-06, "loss": 0.6032, "step": 19702 }, { "epoch": 0.6038678435699399, "grad_norm": 1.2908396077475621, "learning_rate": 3.5807124622860756e-06, "loss": 0.5947, "step": 19703 }, { "epoch": 0.6038984920926811, "grad_norm": 1.1902274728405922, "learning_rate": 3.580236566743339e-06, "loss": 0.6436, "step": 19704 }, { "epoch": 0.6039291406154224, "grad_norm": 1.2237497833951727, "learning_rate": 3.5797606851898946e-06, "loss": 0.5308, "step": 19705 }, { "epoch": 0.6039597891381635, "grad_norm": 0.4965220606352878, "learning_rate": 3.5792848176304323e-06, "loss": 0.395, "step": 19706 }, { "epoch": 0.6039904376609048, "grad_norm": 0.458881681427192, "learning_rate": 3.578808964069641e-06, "loss": 0.4109, "step": 19707 }, { "epoch": 0.6040210861836459, "grad_norm": 1.4835480016597877, "learning_rate": 3.578333124512206e-06, "loss": 0.6169, "step": 19708 }, { "epoch": 0.6040517347063872, "grad_norm": 1.3048099724342739, "learning_rate": 3.5778572989628215e-06, "loss": 0.6813, "step": 19709 }, { "epoch": 0.6040823832291283, "grad_norm": 1.108242025333283, "learning_rate": 3.5773814874261716e-06, "loss": 0.6997, "step": 19710 }, { "epoch": 0.6041130317518696, "grad_norm": 1.3935361075082644, "learning_rate": 3.5769056899069455e-06, "loss": 0.593, "step": 19711 }, { "epoch": 0.6041436802746107, "grad_norm": 1.4940950564121616, "learning_rate": 3.576429906409832e-06, "loss": 0.5348, "step": 19712 }, { "epoch": 0.604174328797352, "grad_norm": 1.1664170031846133, "learning_rate": 3.575954136939519e-06, "loss": 0.5649, "step": 19713 }, { "epoch": 0.6042049773200932, "grad_norm": 0.4748526122340445, "learning_rate": 3.575478381500693e-06, "loss": 0.4135, "step": 19714 }, { "epoch": 0.6042356258428344, "grad_norm": 1.3119192637193022, "learning_rate": 3.575002640098045e-06, "loss": 0.5824, "step": 19715 }, { "epoch": 0.6042662743655756, "grad_norm": 1.370005924147112, "learning_rate": 3.5745269127362584e-06, "loss": 0.6085, "step": 19716 }, { "epoch": 0.6042969228883168, "grad_norm": 1.361794159894663, "learning_rate": 3.5740511994200245e-06, "loss": 0.5926, "step": 19717 }, { "epoch": 0.604327571411058, "grad_norm": 0.44731240777494874, "learning_rate": 3.57357550015403e-06, "loss": 0.3839, "step": 19718 }, { "epoch": 0.6043582199337992, "grad_norm": 1.2682537131020128, "learning_rate": 3.573099814942958e-06, "loss": 0.6206, "step": 19719 }, { "epoch": 0.6043888684565404, "grad_norm": 1.311478567452931, "learning_rate": 3.5726241437915014e-06, "loss": 0.6186, "step": 19720 }, { "epoch": 0.6044195169792816, "grad_norm": 1.3402161642544772, "learning_rate": 3.572148486704344e-06, "loss": 0.6869, "step": 19721 }, { "epoch": 0.6044501655020228, "grad_norm": 1.1278709421132227, "learning_rate": 3.5716728436861715e-06, "loss": 0.5621, "step": 19722 }, { "epoch": 0.6044808140247641, "grad_norm": 1.2750651891312168, "learning_rate": 3.5711972147416723e-06, "loss": 0.5676, "step": 19723 }, { "epoch": 0.6045114625475052, "grad_norm": 1.3330719165904, "learning_rate": 3.570721599875532e-06, "loss": 0.6709, "step": 19724 }, { "epoch": 0.6045421110702465, "grad_norm": 1.2105793346704339, "learning_rate": 3.5702459990924386e-06, "loss": 0.7044, "step": 19725 }, { "epoch": 0.6045727595929876, "grad_norm": 1.4163217749703723, "learning_rate": 3.5697704123970767e-06, "loss": 0.707, "step": 19726 }, { "epoch": 0.6046034081157288, "grad_norm": 1.1264776424574827, "learning_rate": 3.5692948397941322e-06, "loss": 0.6476, "step": 19727 }, { "epoch": 0.60463405663847, "grad_norm": 1.3707416949697828, "learning_rate": 3.5688192812882927e-06, "loss": 0.6595, "step": 19728 }, { "epoch": 0.6046647051612112, "grad_norm": 1.2660997174389745, "learning_rate": 3.5683437368842444e-06, "loss": 0.7059, "step": 19729 }, { "epoch": 0.6046953536839524, "grad_norm": 1.3291822119811207, "learning_rate": 3.5678682065866684e-06, "loss": 0.6065, "step": 19730 }, { "epoch": 0.6047260022066936, "grad_norm": 1.2350477409315865, "learning_rate": 3.567392690400256e-06, "loss": 0.6137, "step": 19731 }, { "epoch": 0.6047566507294349, "grad_norm": 0.44937811985763343, "learning_rate": 3.5669171883296896e-06, "loss": 0.4056, "step": 19732 }, { "epoch": 0.604787299252176, "grad_norm": 1.3760691421861992, "learning_rate": 3.5664417003796524e-06, "loss": 0.6254, "step": 19733 }, { "epoch": 0.6048179477749173, "grad_norm": 1.2984191516918708, "learning_rate": 3.5659662265548344e-06, "loss": 0.6522, "step": 19734 }, { "epoch": 0.6048485962976584, "grad_norm": 1.300120200000106, "learning_rate": 3.5654907668599165e-06, "loss": 0.5535, "step": 19735 }, { "epoch": 0.6048792448203997, "grad_norm": 1.3018042155247262, "learning_rate": 3.5650153212995864e-06, "loss": 0.5828, "step": 19736 }, { "epoch": 0.6049098933431408, "grad_norm": 1.2585606519669414, "learning_rate": 3.564539889878527e-06, "loss": 0.7062, "step": 19737 }, { "epoch": 0.6049405418658821, "grad_norm": 1.3189038546629341, "learning_rate": 3.564064472601423e-06, "loss": 0.6226, "step": 19738 }, { "epoch": 0.6049711903886232, "grad_norm": 1.2707394050195011, "learning_rate": 3.5635890694729596e-06, "loss": 0.5959, "step": 19739 }, { "epoch": 0.6050018389113645, "grad_norm": 1.4953467869087016, "learning_rate": 3.5631136804978215e-06, "loss": 0.5972, "step": 19740 }, { "epoch": 0.6050324874341056, "grad_norm": 1.1836052203977576, "learning_rate": 3.5626383056806896e-06, "loss": 0.656, "step": 19741 }, { "epoch": 0.6050631359568469, "grad_norm": 1.4628134513917204, "learning_rate": 3.562162945026253e-06, "loss": 0.573, "step": 19742 }, { "epoch": 0.6050937844795881, "grad_norm": 1.1879289923691854, "learning_rate": 3.5616875985391897e-06, "loss": 0.6049, "step": 19743 }, { "epoch": 0.6051244330023293, "grad_norm": 1.3669999629533927, "learning_rate": 3.5612122662241894e-06, "loss": 0.6529, "step": 19744 }, { "epoch": 0.6051550815250705, "grad_norm": 1.2797684362619421, "learning_rate": 3.560736948085932e-06, "loss": 0.5609, "step": 19745 }, { "epoch": 0.6051857300478117, "grad_norm": 1.1068270671689997, "learning_rate": 3.5602616441291003e-06, "loss": 0.5539, "step": 19746 }, { "epoch": 0.6052163785705529, "grad_norm": 1.331572736036529, "learning_rate": 3.55978635435838e-06, "loss": 0.6714, "step": 19747 }, { "epoch": 0.6052470270932941, "grad_norm": 1.2748083433597956, "learning_rate": 3.5593110787784535e-06, "loss": 0.6752, "step": 19748 }, { "epoch": 0.6052776756160353, "grad_norm": 0.4669450373446505, "learning_rate": 3.558835817394003e-06, "loss": 0.4161, "step": 19749 }, { "epoch": 0.6053083241387766, "grad_norm": 1.5117392151265827, "learning_rate": 3.5583605702097122e-06, "loss": 0.5892, "step": 19750 }, { "epoch": 0.6053389726615177, "grad_norm": 1.291948863275341, "learning_rate": 3.557885337230263e-06, "loss": 0.6494, "step": 19751 }, { "epoch": 0.605369621184259, "grad_norm": 1.283559986124282, "learning_rate": 3.5574101184603405e-06, "loss": 0.6601, "step": 19752 }, { "epoch": 0.6054002697070001, "grad_norm": 1.1645895935052526, "learning_rate": 3.5569349139046237e-06, "loss": 0.5539, "step": 19753 }, { "epoch": 0.6054309182297414, "grad_norm": 1.2783625659734823, "learning_rate": 3.556459723567796e-06, "loss": 0.6468, "step": 19754 }, { "epoch": 0.6054615667524825, "grad_norm": 1.5256979968447462, "learning_rate": 3.5559845474545406e-06, "loss": 0.6211, "step": 19755 }, { "epoch": 0.6054922152752238, "grad_norm": 1.3040091405589667, "learning_rate": 3.5555093855695396e-06, "loss": 0.6004, "step": 19756 }, { "epoch": 0.6055228637979649, "grad_norm": 1.2619700444706292, "learning_rate": 3.5550342379174725e-06, "loss": 0.6508, "step": 19757 }, { "epoch": 0.6055535123207061, "grad_norm": 1.2486365526041363, "learning_rate": 3.5545591045030238e-06, "loss": 0.6247, "step": 19758 }, { "epoch": 0.6055841608434473, "grad_norm": 1.2519693940696093, "learning_rate": 3.5540839853308754e-06, "loss": 0.5714, "step": 19759 }, { "epoch": 0.6056148093661885, "grad_norm": 1.4047661216096925, "learning_rate": 3.5536088804057044e-06, "loss": 0.6723, "step": 19760 }, { "epoch": 0.6056454578889298, "grad_norm": 1.410005031371944, "learning_rate": 3.553133789732198e-06, "loss": 0.661, "step": 19761 }, { "epoch": 0.6056761064116709, "grad_norm": 1.3060746536891277, "learning_rate": 3.5526587133150314e-06, "loss": 0.65, "step": 19762 }, { "epoch": 0.6057067549344122, "grad_norm": 1.3516004995262356, "learning_rate": 3.5521836511588925e-06, "loss": 0.7342, "step": 19763 }, { "epoch": 0.6057374034571533, "grad_norm": 1.2616068332464065, "learning_rate": 3.5517086032684567e-06, "loss": 0.5941, "step": 19764 }, { "epoch": 0.6057680519798946, "grad_norm": 1.3431475478771473, "learning_rate": 3.5512335696484064e-06, "loss": 0.6182, "step": 19765 }, { "epoch": 0.6057987005026357, "grad_norm": 1.4050856868078487, "learning_rate": 3.550758550303423e-06, "loss": 0.7164, "step": 19766 }, { "epoch": 0.605829349025377, "grad_norm": 1.1963782471191216, "learning_rate": 3.5502835452381866e-06, "loss": 0.6477, "step": 19767 }, { "epoch": 0.6058599975481181, "grad_norm": 1.168510053243682, "learning_rate": 3.5498085544573755e-06, "loss": 0.5375, "step": 19768 }, { "epoch": 0.6058906460708594, "grad_norm": 1.4740273505703012, "learning_rate": 3.549333577965674e-06, "loss": 0.7027, "step": 19769 }, { "epoch": 0.6059212945936006, "grad_norm": 0.4806388633679882, "learning_rate": 3.5488586157677586e-06, "loss": 0.4197, "step": 19770 }, { "epoch": 0.6059519431163418, "grad_norm": 1.242054760967492, "learning_rate": 3.5483836678683108e-06, "loss": 0.5608, "step": 19771 }, { "epoch": 0.605982591639083, "grad_norm": 0.460387394155606, "learning_rate": 3.547908734272012e-06, "loss": 0.4143, "step": 19772 }, { "epoch": 0.6060132401618242, "grad_norm": 0.4442913162064676, "learning_rate": 3.5474338149835363e-06, "loss": 0.3809, "step": 19773 }, { "epoch": 0.6060438886845654, "grad_norm": 1.2828319333961775, "learning_rate": 3.5469589100075707e-06, "loss": 0.6652, "step": 19774 }, { "epoch": 0.6060745372073066, "grad_norm": 1.194987647819432, "learning_rate": 3.546484019348789e-06, "loss": 0.6036, "step": 19775 }, { "epoch": 0.6061051857300478, "grad_norm": 1.3341391102984077, "learning_rate": 3.5460091430118714e-06, "loss": 0.6466, "step": 19776 }, { "epoch": 0.606135834252789, "grad_norm": 1.2336263423792944, "learning_rate": 3.5455342810014987e-06, "loss": 0.6007, "step": 19777 }, { "epoch": 0.6061664827755302, "grad_norm": 1.2276521641801015, "learning_rate": 3.5450594333223476e-06, "loss": 0.5911, "step": 19778 }, { "epoch": 0.6061971312982715, "grad_norm": 1.3563306741965169, "learning_rate": 3.5445845999790994e-06, "loss": 0.6287, "step": 19779 }, { "epoch": 0.6062277798210126, "grad_norm": 1.182097646660836, "learning_rate": 3.544109780976432e-06, "loss": 0.6739, "step": 19780 }, { "epoch": 0.6062584283437539, "grad_norm": 1.339447038384568, "learning_rate": 3.543634976319022e-06, "loss": 0.5503, "step": 19781 }, { "epoch": 0.606289076866495, "grad_norm": 1.4306977662908036, "learning_rate": 3.54316018601155e-06, "loss": 0.5938, "step": 19782 }, { "epoch": 0.6063197253892363, "grad_norm": 0.5075624457464155, "learning_rate": 3.542685410058695e-06, "loss": 0.3836, "step": 19783 }, { "epoch": 0.6063503739119774, "grad_norm": 1.2203294271205054, "learning_rate": 3.5422106484651297e-06, "loss": 0.5092, "step": 19784 }, { "epoch": 0.6063810224347187, "grad_norm": 1.4135708575696282, "learning_rate": 3.5417359012355395e-06, "loss": 0.6515, "step": 19785 }, { "epoch": 0.6064116709574598, "grad_norm": 1.3434726056988333, "learning_rate": 3.541261168374598e-06, "loss": 0.6915, "step": 19786 }, { "epoch": 0.6064423194802011, "grad_norm": 1.2812556880987311, "learning_rate": 3.5407864498869815e-06, "loss": 0.6987, "step": 19787 }, { "epoch": 0.6064729680029423, "grad_norm": 1.5131679779404301, "learning_rate": 3.5403117457773708e-06, "loss": 0.6764, "step": 19788 }, { "epoch": 0.6065036165256834, "grad_norm": 1.4397979078076948, "learning_rate": 3.539837056050441e-06, "loss": 0.6691, "step": 19789 }, { "epoch": 0.6065342650484247, "grad_norm": 1.1948611061386318, "learning_rate": 3.5393623807108714e-06, "loss": 0.4714, "step": 19790 }, { "epoch": 0.6065649135711658, "grad_norm": 0.4679175048095649, "learning_rate": 3.5388877197633378e-06, "loss": 0.4081, "step": 19791 }, { "epoch": 0.6065955620939071, "grad_norm": 0.441880306895453, "learning_rate": 3.5384130732125165e-06, "loss": 0.3728, "step": 19792 }, { "epoch": 0.6066262106166482, "grad_norm": 1.2115975650763555, "learning_rate": 3.5379384410630858e-06, "loss": 0.608, "step": 19793 }, { "epoch": 0.6066568591393895, "grad_norm": 0.4300866236172552, "learning_rate": 3.5374638233197233e-06, "loss": 0.393, "step": 19794 }, { "epoch": 0.6066875076621306, "grad_norm": 1.4449681303027526, "learning_rate": 3.5369892199871e-06, "loss": 0.6902, "step": 19795 }, { "epoch": 0.6067181561848719, "grad_norm": 1.212239017871153, "learning_rate": 3.5365146310699007e-06, "loss": 0.5819, "step": 19796 }, { "epoch": 0.606748804707613, "grad_norm": 1.1389624029474104, "learning_rate": 3.536040056572794e-06, "loss": 0.4927, "step": 19797 }, { "epoch": 0.6067794532303543, "grad_norm": 0.4430224729775387, "learning_rate": 3.5355654965004604e-06, "loss": 0.3967, "step": 19798 }, { "epoch": 0.6068101017530955, "grad_norm": 1.3790276599514752, "learning_rate": 3.535090950857575e-06, "loss": 0.6189, "step": 19799 }, { "epoch": 0.6068407502758367, "grad_norm": 1.2934100423666284, "learning_rate": 3.534616419648812e-06, "loss": 0.6635, "step": 19800 }, { "epoch": 0.6068713987985779, "grad_norm": 1.2481787229112855, "learning_rate": 3.534141902878849e-06, "loss": 0.6785, "step": 19801 }, { "epoch": 0.6069020473213191, "grad_norm": 1.2262196473158948, "learning_rate": 3.533667400552362e-06, "loss": 0.6891, "step": 19802 }, { "epoch": 0.6069326958440603, "grad_norm": 1.365509253057487, "learning_rate": 3.533192912674023e-06, "loss": 0.6811, "step": 19803 }, { "epoch": 0.6069633443668015, "grad_norm": 1.3234240881796953, "learning_rate": 3.5327184392485124e-06, "loss": 0.6383, "step": 19804 }, { "epoch": 0.6069939928895427, "grad_norm": 1.4302645088980765, "learning_rate": 3.5322439802804993e-06, "loss": 0.5993, "step": 19805 }, { "epoch": 0.607024641412284, "grad_norm": 1.299649231959423, "learning_rate": 3.5317695357746645e-06, "loss": 0.6636, "step": 19806 }, { "epoch": 0.6070552899350251, "grad_norm": 1.2845154216266186, "learning_rate": 3.5312951057356793e-06, "loss": 0.6686, "step": 19807 }, { "epoch": 0.6070859384577664, "grad_norm": 1.1336260087310368, "learning_rate": 3.5308206901682186e-06, "loss": 0.6479, "step": 19808 }, { "epoch": 0.6071165869805075, "grad_norm": 1.3763916395011568, "learning_rate": 3.530346289076958e-06, "loss": 0.6123, "step": 19809 }, { "epoch": 0.6071472355032488, "grad_norm": 1.2457378698942825, "learning_rate": 3.529871902466572e-06, "loss": 0.5396, "step": 19810 }, { "epoch": 0.6071778840259899, "grad_norm": 1.4197353900522016, "learning_rate": 3.5293975303417322e-06, "loss": 0.5813, "step": 19811 }, { "epoch": 0.6072085325487312, "grad_norm": 0.4749549659523186, "learning_rate": 3.5289231727071166e-06, "loss": 0.4037, "step": 19812 }, { "epoch": 0.6072391810714723, "grad_norm": 1.5321132325733098, "learning_rate": 3.528448829567398e-06, "loss": 0.6662, "step": 19813 }, { "epoch": 0.6072698295942136, "grad_norm": 1.3135183397970238, "learning_rate": 3.527974500927247e-06, "loss": 0.6227, "step": 19814 }, { "epoch": 0.6073004781169548, "grad_norm": 1.353575729373304, "learning_rate": 3.527500186791343e-06, "loss": 0.7228, "step": 19815 }, { "epoch": 0.607331126639696, "grad_norm": 1.3556739764040078, "learning_rate": 3.5270258871643526e-06, "loss": 0.7545, "step": 19816 }, { "epoch": 0.6073617751624372, "grad_norm": 1.3719754232921582, "learning_rate": 3.5265516020509573e-06, "loss": 0.622, "step": 19817 }, { "epoch": 0.6073924236851784, "grad_norm": 1.1201799254691502, "learning_rate": 3.526077331455824e-06, "loss": 0.6131, "step": 19818 }, { "epoch": 0.6074230722079196, "grad_norm": 1.1860593947341425, "learning_rate": 3.5256030753836267e-06, "loss": 0.6273, "step": 19819 }, { "epoch": 0.6074537207306607, "grad_norm": 1.193486522889473, "learning_rate": 3.525128833839041e-06, "loss": 0.5594, "step": 19820 }, { "epoch": 0.607484369253402, "grad_norm": 1.4548597912050076, "learning_rate": 3.5246546068267382e-06, "loss": 0.5419, "step": 19821 }, { "epoch": 0.6075150177761431, "grad_norm": 0.4592461859276225, "learning_rate": 3.5241803943513907e-06, "loss": 0.415, "step": 19822 }, { "epoch": 0.6075456662988844, "grad_norm": 1.380828305937527, "learning_rate": 3.523706196417672e-06, "loss": 0.6276, "step": 19823 }, { "epoch": 0.6075763148216256, "grad_norm": 1.2500643732273806, "learning_rate": 3.523232013030252e-06, "loss": 0.5779, "step": 19824 }, { "epoch": 0.6076069633443668, "grad_norm": 1.225700409353182, "learning_rate": 3.522757844193807e-06, "loss": 0.5655, "step": 19825 }, { "epoch": 0.607637611867108, "grad_norm": 1.2925561700450592, "learning_rate": 3.5222836899130077e-06, "loss": 0.6191, "step": 19826 }, { "epoch": 0.6076682603898492, "grad_norm": 1.087187969261106, "learning_rate": 3.521809550192522e-06, "loss": 0.5554, "step": 19827 }, { "epoch": 0.6076989089125904, "grad_norm": 1.2230080577500473, "learning_rate": 3.5213354250370278e-06, "loss": 0.6558, "step": 19828 }, { "epoch": 0.6077295574353316, "grad_norm": 1.203495256279851, "learning_rate": 3.5208613144511934e-06, "loss": 0.6506, "step": 19829 }, { "epoch": 0.6077602059580728, "grad_norm": 1.0707950209963095, "learning_rate": 3.520387218439689e-06, "loss": 0.5743, "step": 19830 }, { "epoch": 0.607790854480814, "grad_norm": 1.3347417892302593, "learning_rate": 3.5199131370071905e-06, "loss": 0.6143, "step": 19831 }, { "epoch": 0.6078215030035552, "grad_norm": 1.3241452257646282, "learning_rate": 3.519439070158365e-06, "loss": 0.6972, "step": 19832 }, { "epoch": 0.6078521515262965, "grad_norm": 0.4936332584820222, "learning_rate": 3.518965017897885e-06, "loss": 0.4101, "step": 19833 }, { "epoch": 0.6078828000490376, "grad_norm": 1.5128293060316682, "learning_rate": 3.5184909802304228e-06, "loss": 0.701, "step": 19834 }, { "epoch": 0.6079134485717789, "grad_norm": 1.2291506135048207, "learning_rate": 3.518016957160647e-06, "loss": 0.6409, "step": 19835 }, { "epoch": 0.60794409709452, "grad_norm": 1.2501182522129404, "learning_rate": 3.51754294869323e-06, "loss": 0.6302, "step": 19836 }, { "epoch": 0.6079747456172613, "grad_norm": 1.3899368220545576, "learning_rate": 3.517068954832843e-06, "loss": 0.7412, "step": 19837 }, { "epoch": 0.6080053941400024, "grad_norm": 1.3057727092613858, "learning_rate": 3.516594975584151e-06, "loss": 0.6384, "step": 19838 }, { "epoch": 0.6080360426627437, "grad_norm": 1.3881261765959014, "learning_rate": 3.516121010951832e-06, "loss": 0.5133, "step": 19839 }, { "epoch": 0.6080666911854848, "grad_norm": 1.1138985604727416, "learning_rate": 3.515647060940551e-06, "loss": 0.6121, "step": 19840 }, { "epoch": 0.6080973397082261, "grad_norm": 1.3339191247728421, "learning_rate": 3.5151731255549794e-06, "loss": 0.6636, "step": 19841 }, { "epoch": 0.6081279882309673, "grad_norm": 1.4243332940049345, "learning_rate": 3.5146992047997864e-06, "loss": 0.6036, "step": 19842 }, { "epoch": 0.6081586367537085, "grad_norm": 1.4056899820939652, "learning_rate": 3.514225298679642e-06, "loss": 0.6062, "step": 19843 }, { "epoch": 0.6081892852764497, "grad_norm": 1.3292127264038147, "learning_rate": 3.513751407199217e-06, "loss": 0.6691, "step": 19844 }, { "epoch": 0.6082199337991909, "grad_norm": 1.28289417178572, "learning_rate": 3.5132775303631793e-06, "loss": 0.7116, "step": 19845 }, { "epoch": 0.6082505823219321, "grad_norm": 0.45439923739789256, "learning_rate": 3.5128036681761975e-06, "loss": 0.4092, "step": 19846 }, { "epoch": 0.6082812308446733, "grad_norm": 1.260247546716952, "learning_rate": 3.5123298206429425e-06, "loss": 0.5579, "step": 19847 }, { "epoch": 0.6083118793674145, "grad_norm": 1.4088812255053333, "learning_rate": 3.5118559877680834e-06, "loss": 0.6654, "step": 19848 }, { "epoch": 0.6083425278901557, "grad_norm": 1.1422101101916788, "learning_rate": 3.5113821695562867e-06, "loss": 0.6692, "step": 19849 }, { "epoch": 0.6083731764128969, "grad_norm": 1.3530395074365242, "learning_rate": 3.5109083660122233e-06, "loss": 0.5606, "step": 19850 }, { "epoch": 0.608403824935638, "grad_norm": 1.490298463437664, "learning_rate": 3.510434577140559e-06, "loss": 0.6553, "step": 19851 }, { "epoch": 0.6084344734583793, "grad_norm": 0.4584938871566055, "learning_rate": 3.5099608029459653e-06, "loss": 0.4064, "step": 19852 }, { "epoch": 0.6084651219811205, "grad_norm": 1.4091690559463095, "learning_rate": 3.5094870434331093e-06, "loss": 0.6204, "step": 19853 }, { "epoch": 0.6084957705038617, "grad_norm": 1.3318278716831577, "learning_rate": 3.5090132986066572e-06, "loss": 0.6211, "step": 19854 }, { "epoch": 0.6085264190266029, "grad_norm": 1.3925311416711252, "learning_rate": 3.50853956847128e-06, "loss": 0.616, "step": 19855 }, { "epoch": 0.6085570675493441, "grad_norm": 0.44439882779625167, "learning_rate": 3.508065853031645e-06, "loss": 0.4079, "step": 19856 }, { "epoch": 0.6085877160720853, "grad_norm": 1.324682832349806, "learning_rate": 3.507592152292416e-06, "loss": 0.6645, "step": 19857 }, { "epoch": 0.6086183645948265, "grad_norm": 1.2638560863039858, "learning_rate": 3.5071184662582664e-06, "loss": 0.6473, "step": 19858 }, { "epoch": 0.6086490131175677, "grad_norm": 1.4099973232840837, "learning_rate": 3.5066447949338573e-06, "loss": 0.6536, "step": 19859 }, { "epoch": 0.608679661640309, "grad_norm": 1.3050014775886851, "learning_rate": 3.5061711383238623e-06, "loss": 0.7153, "step": 19860 }, { "epoch": 0.6087103101630501, "grad_norm": 1.2790153995048903, "learning_rate": 3.5056974964329443e-06, "loss": 0.6625, "step": 19861 }, { "epoch": 0.6087409586857914, "grad_norm": 1.2894159294181882, "learning_rate": 3.50522386926577e-06, "loss": 0.6018, "step": 19862 }, { "epoch": 0.6087716072085325, "grad_norm": 0.4428274902492137, "learning_rate": 3.5047502568270085e-06, "loss": 0.4149, "step": 19863 }, { "epoch": 0.6088022557312738, "grad_norm": 0.44394229063580004, "learning_rate": 3.504276659121325e-06, "loss": 0.3809, "step": 19864 }, { "epoch": 0.6088329042540149, "grad_norm": 1.3620414750911434, "learning_rate": 3.5038030761533858e-06, "loss": 0.6936, "step": 19865 }, { "epoch": 0.6088635527767562, "grad_norm": 1.2046595752174571, "learning_rate": 3.5033295079278585e-06, "loss": 0.5536, "step": 19866 }, { "epoch": 0.6088942012994973, "grad_norm": 1.4244182245565116, "learning_rate": 3.5028559544494095e-06, "loss": 0.5849, "step": 19867 }, { "epoch": 0.6089248498222386, "grad_norm": 1.3627305972181805, "learning_rate": 3.5023824157227003e-06, "loss": 0.5874, "step": 19868 }, { "epoch": 0.6089554983449798, "grad_norm": 0.47673966897989717, "learning_rate": 3.501908891752404e-06, "loss": 0.3964, "step": 19869 }, { "epoch": 0.608986146867721, "grad_norm": 1.414418761365972, "learning_rate": 3.5014353825431796e-06, "loss": 0.7293, "step": 19870 }, { "epoch": 0.6090167953904622, "grad_norm": 1.2876797946011949, "learning_rate": 3.5009618880996986e-06, "loss": 0.6107, "step": 19871 }, { "epoch": 0.6090474439132034, "grad_norm": 1.4061323033167912, "learning_rate": 3.5004884084266235e-06, "loss": 0.6617, "step": 19872 }, { "epoch": 0.6090780924359446, "grad_norm": 1.2495778176766135, "learning_rate": 3.5000149435286172e-06, "loss": 0.6786, "step": 19873 }, { "epoch": 0.6091087409586858, "grad_norm": 1.341793350967137, "learning_rate": 3.49954149341035e-06, "loss": 0.712, "step": 19874 }, { "epoch": 0.609139389481427, "grad_norm": 1.2778355666541918, "learning_rate": 3.4990680580764837e-06, "loss": 0.5885, "step": 19875 }, { "epoch": 0.6091700380041682, "grad_norm": 1.1675684064477234, "learning_rate": 3.4985946375316828e-06, "loss": 0.5941, "step": 19876 }, { "epoch": 0.6092006865269094, "grad_norm": 1.2833825310820226, "learning_rate": 3.4981212317806133e-06, "loss": 0.6543, "step": 19877 }, { "epoch": 0.6092313350496507, "grad_norm": 1.2797563275765804, "learning_rate": 3.49764784082794e-06, "loss": 0.6581, "step": 19878 }, { "epoch": 0.6092619835723918, "grad_norm": 0.4416447014493989, "learning_rate": 3.4971744646783267e-06, "loss": 0.4146, "step": 19879 }, { "epoch": 0.6092926320951331, "grad_norm": 1.1474254086495388, "learning_rate": 3.49670110333644e-06, "loss": 0.627, "step": 19880 }, { "epoch": 0.6093232806178742, "grad_norm": 1.3172115835472915, "learning_rate": 3.496227756806938e-06, "loss": 0.7213, "step": 19881 }, { "epoch": 0.6093539291406154, "grad_norm": 1.399622165125134, "learning_rate": 3.495754425094493e-06, "loss": 0.6685, "step": 19882 }, { "epoch": 0.6093845776633566, "grad_norm": 1.4403382238690068, "learning_rate": 3.4952811082037626e-06, "loss": 0.5646, "step": 19883 }, { "epoch": 0.6094152261860978, "grad_norm": 1.2818653661187105, "learning_rate": 3.4948078061394116e-06, "loss": 0.6501, "step": 19884 }, { "epoch": 0.609445874708839, "grad_norm": 1.5452222588365627, "learning_rate": 3.4943345189061052e-06, "loss": 0.6546, "step": 19885 }, { "epoch": 0.6094765232315802, "grad_norm": 1.261381626925942, "learning_rate": 3.493861246508506e-06, "loss": 0.4916, "step": 19886 }, { "epoch": 0.6095071717543215, "grad_norm": 1.2044467935019396, "learning_rate": 3.493387988951277e-06, "loss": 0.5816, "step": 19887 }, { "epoch": 0.6095378202770626, "grad_norm": 1.356470215437457, "learning_rate": 3.492914746239081e-06, "loss": 0.6558, "step": 19888 }, { "epoch": 0.6095684687998039, "grad_norm": 1.307894091570431, "learning_rate": 3.4924415183765826e-06, "loss": 0.6524, "step": 19889 }, { "epoch": 0.609599117322545, "grad_norm": 1.310806846816319, "learning_rate": 3.491968305368443e-06, "loss": 0.767, "step": 19890 }, { "epoch": 0.6096297658452863, "grad_norm": 1.2288068911114067, "learning_rate": 3.4914951072193274e-06, "loss": 0.5332, "step": 19891 }, { "epoch": 0.6096604143680274, "grad_norm": 1.3018363922865706, "learning_rate": 3.4910219239338938e-06, "loss": 0.616, "step": 19892 }, { "epoch": 0.6096910628907687, "grad_norm": 1.088771134324148, "learning_rate": 3.4905487555168093e-06, "loss": 0.59, "step": 19893 }, { "epoch": 0.6097217114135098, "grad_norm": 1.2181233455341354, "learning_rate": 3.490075601972734e-06, "loss": 0.6197, "step": 19894 }, { "epoch": 0.6097523599362511, "grad_norm": 1.389572716568673, "learning_rate": 3.4896024633063288e-06, "loss": 0.6264, "step": 19895 }, { "epoch": 0.6097830084589922, "grad_norm": 1.1093016786221492, "learning_rate": 3.489129339522258e-06, "loss": 0.595, "step": 19896 }, { "epoch": 0.6098136569817335, "grad_norm": 1.1196419516009335, "learning_rate": 3.4886562306251815e-06, "loss": 0.5518, "step": 19897 }, { "epoch": 0.6098443055044747, "grad_norm": 1.2480836550739272, "learning_rate": 3.4881831366197627e-06, "loss": 0.6525, "step": 19898 }, { "epoch": 0.6098749540272159, "grad_norm": 1.3026045969575752, "learning_rate": 3.4877100575106622e-06, "loss": 0.5756, "step": 19899 }, { "epoch": 0.6099056025499571, "grad_norm": 1.2560974703618024, "learning_rate": 3.4872369933025404e-06, "loss": 0.578, "step": 19900 }, { "epoch": 0.6099362510726983, "grad_norm": 1.10759655588538, "learning_rate": 3.4867639440000617e-06, "loss": 0.5628, "step": 19901 }, { "epoch": 0.6099668995954395, "grad_norm": 1.445857631900858, "learning_rate": 3.486290909607884e-06, "loss": 0.6555, "step": 19902 }, { "epoch": 0.6099975481181807, "grad_norm": 1.2896900718111273, "learning_rate": 3.4858178901306684e-06, "loss": 0.562, "step": 19903 }, { "epoch": 0.6100281966409219, "grad_norm": 1.1494360227468599, "learning_rate": 3.4853448855730775e-06, "loss": 0.5192, "step": 19904 }, { "epoch": 0.6100588451636632, "grad_norm": 1.3656454597902343, "learning_rate": 3.48487189593977e-06, "loss": 0.6464, "step": 19905 }, { "epoch": 0.6100894936864043, "grad_norm": 1.5470498804706654, "learning_rate": 3.484398921235408e-06, "loss": 0.5561, "step": 19906 }, { "epoch": 0.6101201422091456, "grad_norm": 1.4808005480342596, "learning_rate": 3.4839259614646516e-06, "loss": 0.7354, "step": 19907 }, { "epoch": 0.6101507907318867, "grad_norm": 1.3277392570094535, "learning_rate": 3.483453016632159e-06, "loss": 0.641, "step": 19908 }, { "epoch": 0.610181439254628, "grad_norm": 1.2786232540373272, "learning_rate": 3.4829800867425933e-06, "loss": 0.5915, "step": 19909 }, { "epoch": 0.6102120877773691, "grad_norm": 1.3131574545239184, "learning_rate": 3.4825071718006142e-06, "loss": 0.6752, "step": 19910 }, { "epoch": 0.6102427363001104, "grad_norm": 1.3471024305241601, "learning_rate": 3.4820342718108767e-06, "loss": 0.5893, "step": 19911 }, { "epoch": 0.6102733848228515, "grad_norm": 1.271251092882293, "learning_rate": 3.4815613867780474e-06, "loss": 0.6707, "step": 19912 }, { "epoch": 0.6103040333455927, "grad_norm": 1.3877550340582478, "learning_rate": 3.481088516706781e-06, "loss": 0.6752, "step": 19913 }, { "epoch": 0.610334681868334, "grad_norm": 0.4707583905283618, "learning_rate": 3.4806156616017374e-06, "loss": 0.425, "step": 19914 }, { "epoch": 0.6103653303910751, "grad_norm": 1.2254487349354495, "learning_rate": 3.480142821467577e-06, "loss": 0.6232, "step": 19915 }, { "epoch": 0.6103959789138164, "grad_norm": 1.3353659610908095, "learning_rate": 3.4796699963089577e-06, "loss": 0.6489, "step": 19916 }, { "epoch": 0.6104266274365575, "grad_norm": 1.2765029819190434, "learning_rate": 3.4791971861305395e-06, "loss": 0.5493, "step": 19917 }, { "epoch": 0.6104572759592988, "grad_norm": 0.4683943438488829, "learning_rate": 3.4787243909369806e-06, "loss": 0.4081, "step": 19918 }, { "epoch": 0.6104879244820399, "grad_norm": 0.4550577187951875, "learning_rate": 3.478251610732939e-06, "loss": 0.3976, "step": 19919 }, { "epoch": 0.6105185730047812, "grad_norm": 1.5486661258065122, "learning_rate": 3.4777788455230744e-06, "loss": 0.6256, "step": 19920 }, { "epoch": 0.6105492215275223, "grad_norm": 1.179427633463551, "learning_rate": 3.477306095312045e-06, "loss": 0.5333, "step": 19921 }, { "epoch": 0.6105798700502636, "grad_norm": 0.47130402959031675, "learning_rate": 3.476833360104505e-06, "loss": 0.4044, "step": 19922 }, { "epoch": 0.6106105185730047, "grad_norm": 1.1458894170629592, "learning_rate": 3.476360639905119e-06, "loss": 0.6696, "step": 19923 }, { "epoch": 0.610641167095746, "grad_norm": 1.2715096761548044, "learning_rate": 3.4758879347185386e-06, "loss": 0.6301, "step": 19924 }, { "epoch": 0.6106718156184872, "grad_norm": 1.3975847928926823, "learning_rate": 3.475415244549427e-06, "loss": 0.5568, "step": 19925 }, { "epoch": 0.6107024641412284, "grad_norm": 1.3851822432003942, "learning_rate": 3.4749425694024386e-06, "loss": 0.648, "step": 19926 }, { "epoch": 0.6107331126639696, "grad_norm": 1.3177241659347732, "learning_rate": 3.4744699092822296e-06, "loss": 0.5477, "step": 19927 }, { "epoch": 0.6107637611867108, "grad_norm": 1.3086070971025918, "learning_rate": 3.4739972641934606e-06, "loss": 0.5721, "step": 19928 }, { "epoch": 0.610794409709452, "grad_norm": 1.2173841174351538, "learning_rate": 3.4735246341407867e-06, "loss": 0.6979, "step": 19929 }, { "epoch": 0.6108250582321932, "grad_norm": 1.409579653400933, "learning_rate": 3.473052019128864e-06, "loss": 0.715, "step": 19930 }, { "epoch": 0.6108557067549344, "grad_norm": 1.178395597660228, "learning_rate": 3.472579419162352e-06, "loss": 0.579, "step": 19931 }, { "epoch": 0.6108863552776757, "grad_norm": 1.2879606924942937, "learning_rate": 3.472106834245904e-06, "loss": 0.6558, "step": 19932 }, { "epoch": 0.6109170038004168, "grad_norm": 1.3371201970630051, "learning_rate": 3.4716342643841796e-06, "loss": 0.7165, "step": 19933 }, { "epoch": 0.6109476523231581, "grad_norm": 0.4334073482742696, "learning_rate": 3.471161709581835e-06, "loss": 0.3987, "step": 19934 }, { "epoch": 0.6109783008458992, "grad_norm": 1.3055833006326552, "learning_rate": 3.470689169843522e-06, "loss": 0.4947, "step": 19935 }, { "epoch": 0.6110089493686405, "grad_norm": 1.2016453220873033, "learning_rate": 3.4702166451739026e-06, "loss": 0.6332, "step": 19936 }, { "epoch": 0.6110395978913816, "grad_norm": 1.2550517627244204, "learning_rate": 3.4697441355776296e-06, "loss": 0.622, "step": 19937 }, { "epoch": 0.6110702464141229, "grad_norm": 1.3363430624529524, "learning_rate": 3.4692716410593587e-06, "loss": 0.6258, "step": 19938 }, { "epoch": 0.611100894936864, "grad_norm": 1.3958844434274882, "learning_rate": 3.468799161623746e-06, "loss": 0.6799, "step": 19939 }, { "epoch": 0.6111315434596053, "grad_norm": 0.45245757514979734, "learning_rate": 3.468326697275447e-06, "loss": 0.4008, "step": 19940 }, { "epoch": 0.6111621919823464, "grad_norm": 1.4755769383574848, "learning_rate": 3.467854248019116e-06, "loss": 0.7448, "step": 19941 }, { "epoch": 0.6111928405050877, "grad_norm": 0.46050403326904377, "learning_rate": 3.4673818138594107e-06, "loss": 0.392, "step": 19942 }, { "epoch": 0.6112234890278289, "grad_norm": 0.4543405496517175, "learning_rate": 3.466909394800983e-06, "loss": 0.4054, "step": 19943 }, { "epoch": 0.61125413755057, "grad_norm": 1.1874349425014248, "learning_rate": 3.4664369908484912e-06, "loss": 0.4828, "step": 19944 }, { "epoch": 0.6112847860733113, "grad_norm": 1.3412683835215888, "learning_rate": 3.4659646020065874e-06, "loss": 0.7149, "step": 19945 }, { "epoch": 0.6113154345960524, "grad_norm": 1.4584070366589772, "learning_rate": 3.4654922282799256e-06, "loss": 0.6573, "step": 19946 }, { "epoch": 0.6113460831187937, "grad_norm": 1.2252505946305994, "learning_rate": 3.4650198696731627e-06, "loss": 0.6311, "step": 19947 }, { "epoch": 0.6113767316415348, "grad_norm": 1.4697810672694887, "learning_rate": 3.4645475261909524e-06, "loss": 0.6932, "step": 19948 }, { "epoch": 0.6114073801642761, "grad_norm": 1.3217342041824571, "learning_rate": 3.464075197837946e-06, "loss": 0.6977, "step": 19949 }, { "epoch": 0.6114380286870172, "grad_norm": 1.4047174105612434, "learning_rate": 3.463602884618801e-06, "loss": 0.6229, "step": 19950 }, { "epoch": 0.6114686772097585, "grad_norm": 1.3642369228947524, "learning_rate": 3.4631305865381693e-06, "loss": 0.657, "step": 19951 }, { "epoch": 0.6114993257324997, "grad_norm": 1.2809793098872468, "learning_rate": 3.4626583036007055e-06, "loss": 0.6383, "step": 19952 }, { "epoch": 0.6115299742552409, "grad_norm": 1.291592451147842, "learning_rate": 3.462186035811065e-06, "loss": 0.5906, "step": 19953 }, { "epoch": 0.6115606227779821, "grad_norm": 1.2576948280367424, "learning_rate": 3.4617137831738945e-06, "loss": 0.6344, "step": 19954 }, { "epoch": 0.6115912713007233, "grad_norm": 1.2142481460747188, "learning_rate": 3.4612415456938553e-06, "loss": 0.5888, "step": 19955 }, { "epoch": 0.6116219198234645, "grad_norm": 1.2993323128123262, "learning_rate": 3.4607693233755958e-06, "loss": 0.4847, "step": 19956 }, { "epoch": 0.6116525683462057, "grad_norm": 1.234860929090931, "learning_rate": 3.460297116223769e-06, "loss": 0.6464, "step": 19957 }, { "epoch": 0.6116832168689469, "grad_norm": 1.4052728275973683, "learning_rate": 3.4598249242430304e-06, "loss": 0.5882, "step": 19958 }, { "epoch": 0.6117138653916881, "grad_norm": 1.3429502846740016, "learning_rate": 3.4593527474380288e-06, "loss": 0.7337, "step": 19959 }, { "epoch": 0.6117445139144293, "grad_norm": 1.3002091062628802, "learning_rate": 3.45888058581342e-06, "loss": 0.636, "step": 19960 }, { "epoch": 0.6117751624371706, "grad_norm": 1.4045707504946916, "learning_rate": 3.458408439373856e-06, "loss": 0.621, "step": 19961 }, { "epoch": 0.6118058109599117, "grad_norm": 1.2636602614843049, "learning_rate": 3.4579363081239857e-06, "loss": 0.572, "step": 19962 }, { "epoch": 0.611836459482653, "grad_norm": 1.130406111587275, "learning_rate": 3.4574641920684653e-06, "loss": 0.6235, "step": 19963 }, { "epoch": 0.6118671080053941, "grad_norm": 1.1565348015555446, "learning_rate": 3.4569920912119458e-06, "loss": 0.6527, "step": 19964 }, { "epoch": 0.6118977565281354, "grad_norm": 1.3255406873610633, "learning_rate": 3.456520005559075e-06, "loss": 0.5422, "step": 19965 }, { "epoch": 0.6119284050508765, "grad_norm": 1.2118090099160916, "learning_rate": 3.4560479351145103e-06, "loss": 0.5991, "step": 19966 }, { "epoch": 0.6119590535736178, "grad_norm": 0.47327356114418506, "learning_rate": 3.4555758798829e-06, "loss": 0.4076, "step": 19967 }, { "epoch": 0.611989702096359, "grad_norm": 1.2521325738155062, "learning_rate": 3.4551038398688943e-06, "loss": 0.6464, "step": 19968 }, { "epoch": 0.6120203506191002, "grad_norm": 1.3357464290131384, "learning_rate": 3.4546318150771463e-06, "loss": 0.6815, "step": 19969 }, { "epoch": 0.6120509991418414, "grad_norm": 1.3180142525097454, "learning_rate": 3.454159805512306e-06, "loss": 0.5225, "step": 19970 }, { "epoch": 0.6120816476645826, "grad_norm": 1.4131660278075608, "learning_rate": 3.453687811179025e-06, "loss": 0.6044, "step": 19971 }, { "epoch": 0.6121122961873238, "grad_norm": 1.2565914115424535, "learning_rate": 3.4532158320819543e-06, "loss": 0.6678, "step": 19972 }, { "epoch": 0.612142944710065, "grad_norm": 1.5335064343758582, "learning_rate": 3.452743868225743e-06, "loss": 0.6319, "step": 19973 }, { "epoch": 0.6121735932328062, "grad_norm": 0.4661512830568911, "learning_rate": 3.4522719196150423e-06, "loss": 0.4043, "step": 19974 }, { "epoch": 0.6122042417555473, "grad_norm": 1.1989885068894273, "learning_rate": 3.4517999862545045e-06, "loss": 0.6046, "step": 19975 }, { "epoch": 0.6122348902782886, "grad_norm": 1.562173670466403, "learning_rate": 3.4513280681487738e-06, "loss": 0.696, "step": 19976 }, { "epoch": 0.6122655388010297, "grad_norm": 1.3200578280127075, "learning_rate": 3.4508561653025076e-06, "loss": 0.6073, "step": 19977 }, { "epoch": 0.612296187323771, "grad_norm": 1.2822934888535793, "learning_rate": 3.450384277720348e-06, "loss": 0.5685, "step": 19978 }, { "epoch": 0.6123268358465122, "grad_norm": 1.2399134552840065, "learning_rate": 3.449912405406952e-06, "loss": 0.6403, "step": 19979 }, { "epoch": 0.6123574843692534, "grad_norm": 1.2473747498956822, "learning_rate": 3.449440548366965e-06, "loss": 0.7077, "step": 19980 }, { "epoch": 0.6123881328919946, "grad_norm": 1.2781656954587568, "learning_rate": 3.4489687066050353e-06, "loss": 0.6242, "step": 19981 }, { "epoch": 0.6124187814147358, "grad_norm": 1.2310874617496466, "learning_rate": 3.448496880125815e-06, "loss": 0.6178, "step": 19982 }, { "epoch": 0.612449429937477, "grad_norm": 1.4742706502956917, "learning_rate": 3.4480250689339522e-06, "loss": 0.6481, "step": 19983 }, { "epoch": 0.6124800784602182, "grad_norm": 1.240293110049424, "learning_rate": 3.4475532730340944e-06, "loss": 0.5475, "step": 19984 }, { "epoch": 0.6125107269829594, "grad_norm": 1.4181887097731685, "learning_rate": 3.4470814924308926e-06, "loss": 0.7534, "step": 19985 }, { "epoch": 0.6125413755057006, "grad_norm": 1.3493100640129754, "learning_rate": 3.446609727128993e-06, "loss": 0.5697, "step": 19986 }, { "epoch": 0.6125720240284418, "grad_norm": 1.3990804152993201, "learning_rate": 3.446137977133046e-06, "loss": 0.6138, "step": 19987 }, { "epoch": 0.6126026725511831, "grad_norm": 1.3611228557448825, "learning_rate": 3.4456662424477006e-06, "loss": 0.6023, "step": 19988 }, { "epoch": 0.6126333210739242, "grad_norm": 1.2553474687473356, "learning_rate": 3.4451945230776007e-06, "loss": 0.6598, "step": 19989 }, { "epoch": 0.6126639695966655, "grad_norm": 1.3889465977150637, "learning_rate": 3.4447228190273987e-06, "loss": 0.7099, "step": 19990 }, { "epoch": 0.6126946181194066, "grad_norm": 1.2860517304471215, "learning_rate": 3.44425113030174e-06, "loss": 0.5379, "step": 19991 }, { "epoch": 0.6127252666421479, "grad_norm": 1.182512678373669, "learning_rate": 3.4437794569052724e-06, "loss": 0.6573, "step": 19992 }, { "epoch": 0.612755915164889, "grad_norm": 1.2874239849707356, "learning_rate": 3.443307798842645e-06, "loss": 0.6548, "step": 19993 }, { "epoch": 0.6127865636876303, "grad_norm": 1.0509720629062291, "learning_rate": 3.4428361561185043e-06, "loss": 0.5661, "step": 19994 }, { "epoch": 0.6128172122103714, "grad_norm": 1.3156717896507624, "learning_rate": 3.442364528737496e-06, "loss": 0.6607, "step": 19995 }, { "epoch": 0.6128478607331127, "grad_norm": 1.2890965810088353, "learning_rate": 3.4418929167042704e-06, "loss": 0.6593, "step": 19996 }, { "epoch": 0.6128785092558539, "grad_norm": 1.4173703471914134, "learning_rate": 3.4414213200234696e-06, "loss": 0.5815, "step": 19997 }, { "epoch": 0.6129091577785951, "grad_norm": 1.2315897352359122, "learning_rate": 3.4409497386997472e-06, "loss": 0.6321, "step": 19998 }, { "epoch": 0.6129398063013363, "grad_norm": 1.2739982100848557, "learning_rate": 3.440478172737744e-06, "loss": 0.628, "step": 19999 }, { "epoch": 0.6129704548240775, "grad_norm": 0.47707194305113954, "learning_rate": 3.4400066221421073e-06, "loss": 0.4079, "step": 20000 }, { "epoch": 0.6130011033468187, "grad_norm": 1.0905176465897723, "learning_rate": 3.439535086917486e-06, "loss": 0.5467, "step": 20001 }, { "epoch": 0.6130317518695599, "grad_norm": 1.1788703854482898, "learning_rate": 3.4390635670685244e-06, "loss": 0.635, "step": 20002 }, { "epoch": 0.6130624003923011, "grad_norm": 0.4932371344186838, "learning_rate": 3.438592062599868e-06, "loss": 0.3948, "step": 20003 }, { "epoch": 0.6130930489150423, "grad_norm": 1.2240417311051104, "learning_rate": 3.438120573516165e-06, "loss": 0.5995, "step": 20004 }, { "epoch": 0.6131236974377835, "grad_norm": 1.2673028403830273, "learning_rate": 3.437649099822058e-06, "loss": 0.6392, "step": 20005 }, { "epoch": 0.6131543459605246, "grad_norm": 1.4558613378256964, "learning_rate": 3.437177641522196e-06, "loss": 0.7223, "step": 20006 }, { "epoch": 0.6131849944832659, "grad_norm": 1.3234685758282323, "learning_rate": 3.4367061986212223e-06, "loss": 0.6244, "step": 20007 }, { "epoch": 0.6132156430060071, "grad_norm": 1.3268924735915075, "learning_rate": 3.4362347711237797e-06, "loss": 0.7237, "step": 20008 }, { "epoch": 0.6132462915287483, "grad_norm": 1.3355099544358158, "learning_rate": 3.4357633590345195e-06, "loss": 0.6249, "step": 20009 }, { "epoch": 0.6132769400514895, "grad_norm": 1.2247241044798203, "learning_rate": 3.435291962358082e-06, "loss": 0.5818, "step": 20010 }, { "epoch": 0.6133075885742307, "grad_norm": 1.1826568521236465, "learning_rate": 3.434820581099112e-06, "loss": 0.6008, "step": 20011 }, { "epoch": 0.6133382370969719, "grad_norm": 1.1637721084763775, "learning_rate": 3.4343492152622564e-06, "loss": 0.6534, "step": 20012 }, { "epoch": 0.6133688856197131, "grad_norm": 1.1867702490426766, "learning_rate": 3.4338778648521575e-06, "loss": 0.669, "step": 20013 }, { "epoch": 0.6133995341424543, "grad_norm": 1.2972994382294174, "learning_rate": 3.433406529873462e-06, "loss": 0.661, "step": 20014 }, { "epoch": 0.6134301826651956, "grad_norm": 1.3253306194335788, "learning_rate": 3.4329352103308123e-06, "loss": 0.7028, "step": 20015 }, { "epoch": 0.6134608311879367, "grad_norm": 1.0968709355433766, "learning_rate": 3.432463906228852e-06, "loss": 0.5019, "step": 20016 }, { "epoch": 0.613491479710678, "grad_norm": 1.4171680159399678, "learning_rate": 3.4319926175722272e-06, "loss": 0.6732, "step": 20017 }, { "epoch": 0.6135221282334191, "grad_norm": 1.146332674213928, "learning_rate": 3.4315213443655816e-06, "loss": 0.5795, "step": 20018 }, { "epoch": 0.6135527767561604, "grad_norm": 1.1257299550286732, "learning_rate": 3.4310500866135543e-06, "loss": 0.6404, "step": 20019 }, { "epoch": 0.6135834252789015, "grad_norm": 1.370964449047766, "learning_rate": 3.4305788443207944e-06, "loss": 0.6116, "step": 20020 }, { "epoch": 0.6136140738016428, "grad_norm": 0.4581540938702443, "learning_rate": 3.4301076174919423e-06, "loss": 0.3941, "step": 20021 }, { "epoch": 0.6136447223243839, "grad_norm": 1.3118057274961834, "learning_rate": 3.4296364061316402e-06, "loss": 0.6796, "step": 20022 }, { "epoch": 0.6136753708471252, "grad_norm": 1.2351525073452374, "learning_rate": 3.4291652102445337e-06, "loss": 0.6797, "step": 20023 }, { "epoch": 0.6137060193698664, "grad_norm": 0.4739778837359143, "learning_rate": 3.4286940298352627e-06, "loss": 0.4221, "step": 20024 }, { "epoch": 0.6137366678926076, "grad_norm": 1.429588350645566, "learning_rate": 3.4282228649084733e-06, "loss": 0.632, "step": 20025 }, { "epoch": 0.6137673164153488, "grad_norm": 1.2559058316733283, "learning_rate": 3.4277517154688055e-06, "loss": 0.63, "step": 20026 }, { "epoch": 0.61379796493809, "grad_norm": 1.2395844474880835, "learning_rate": 3.4272805815209015e-06, "loss": 0.6738, "step": 20027 }, { "epoch": 0.6138286134608312, "grad_norm": 1.13441556406465, "learning_rate": 3.4268094630694047e-06, "loss": 0.6861, "step": 20028 }, { "epoch": 0.6138592619835724, "grad_norm": 1.2952391648359862, "learning_rate": 3.4263383601189594e-06, "loss": 0.6428, "step": 20029 }, { "epoch": 0.6138899105063136, "grad_norm": 1.2710536914257593, "learning_rate": 3.4258672726742005e-06, "loss": 0.6003, "step": 20030 }, { "epoch": 0.6139205590290548, "grad_norm": 1.3201773909891104, "learning_rate": 3.425396200739778e-06, "loss": 0.6149, "step": 20031 }, { "epoch": 0.613951207551796, "grad_norm": 0.4854397303464304, "learning_rate": 3.4249251443203256e-06, "loss": 0.3941, "step": 20032 }, { "epoch": 0.6139818560745373, "grad_norm": 1.0962329728825626, "learning_rate": 3.4244541034204926e-06, "loss": 0.565, "step": 20033 }, { "epoch": 0.6140125045972784, "grad_norm": 1.354352464633573, "learning_rate": 3.4239830780449147e-06, "loss": 0.6242, "step": 20034 }, { "epoch": 0.6140431531200197, "grad_norm": 1.2171145543697077, "learning_rate": 3.423512068198234e-06, "loss": 0.628, "step": 20035 }, { "epoch": 0.6140738016427608, "grad_norm": 1.167579035301498, "learning_rate": 3.423041073885094e-06, "loss": 0.509, "step": 20036 }, { "epoch": 0.614104450165502, "grad_norm": 1.1686545192043245, "learning_rate": 3.422570095110133e-06, "loss": 0.541, "step": 20037 }, { "epoch": 0.6141350986882432, "grad_norm": 1.27722066869958, "learning_rate": 3.4220991318779917e-06, "loss": 0.67, "step": 20038 }, { "epoch": 0.6141657472109844, "grad_norm": 1.4966276071760274, "learning_rate": 3.4216281841933126e-06, "loss": 0.6883, "step": 20039 }, { "epoch": 0.6141963957337256, "grad_norm": 1.2435079337190658, "learning_rate": 3.4211572520607334e-06, "loss": 0.4989, "step": 20040 }, { "epoch": 0.6142270442564668, "grad_norm": 0.44098106368426526, "learning_rate": 3.4206863354848978e-06, "loss": 0.3856, "step": 20041 }, { "epoch": 0.614257692779208, "grad_norm": 1.329783623204957, "learning_rate": 3.420215434470443e-06, "loss": 0.6151, "step": 20042 }, { "epoch": 0.6142883413019492, "grad_norm": 1.3818351550406283, "learning_rate": 3.4197445490220086e-06, "loss": 0.6641, "step": 20043 }, { "epoch": 0.6143189898246905, "grad_norm": 1.202636276874442, "learning_rate": 3.419273679144237e-06, "loss": 0.6998, "step": 20044 }, { "epoch": 0.6143496383474316, "grad_norm": 1.1969750339716314, "learning_rate": 3.418802824841766e-06, "loss": 0.5774, "step": 20045 }, { "epoch": 0.6143802868701729, "grad_norm": 1.1223583257560772, "learning_rate": 3.4183319861192344e-06, "loss": 0.6468, "step": 20046 }, { "epoch": 0.614410935392914, "grad_norm": 1.1852785720607202, "learning_rate": 3.417861162981283e-06, "loss": 0.5171, "step": 20047 }, { "epoch": 0.6144415839156553, "grad_norm": 1.3882052044458786, "learning_rate": 3.4173903554325517e-06, "loss": 0.5845, "step": 20048 }, { "epoch": 0.6144722324383964, "grad_norm": 1.4132931216413287, "learning_rate": 3.4169195634776747e-06, "loss": 0.6069, "step": 20049 }, { "epoch": 0.6145028809611377, "grad_norm": 0.4423041590107439, "learning_rate": 3.416448787121298e-06, "loss": 0.3854, "step": 20050 }, { "epoch": 0.6145335294838788, "grad_norm": 1.2702886262815205, "learning_rate": 3.4159780263680533e-06, "loss": 0.6355, "step": 20051 }, { "epoch": 0.6145641780066201, "grad_norm": 1.3847796900010099, "learning_rate": 3.4155072812225852e-06, "loss": 0.6378, "step": 20052 }, { "epoch": 0.6145948265293613, "grad_norm": 1.3608512642273274, "learning_rate": 3.4150365516895285e-06, "loss": 0.6963, "step": 20053 }, { "epoch": 0.6146254750521025, "grad_norm": 0.4498022590762257, "learning_rate": 3.4145658377735206e-06, "loss": 0.4062, "step": 20054 }, { "epoch": 0.6146561235748437, "grad_norm": 1.2806567765640449, "learning_rate": 3.4140951394792033e-06, "loss": 0.5552, "step": 20055 }, { "epoch": 0.6146867720975849, "grad_norm": 1.3026187303171834, "learning_rate": 3.4136244568112115e-06, "loss": 0.5865, "step": 20056 }, { "epoch": 0.6147174206203261, "grad_norm": 1.2429652350504659, "learning_rate": 3.4131537897741828e-06, "loss": 0.6765, "step": 20057 }, { "epoch": 0.6147480691430673, "grad_norm": 1.2393007127442657, "learning_rate": 3.412683138372757e-06, "loss": 0.5666, "step": 20058 }, { "epoch": 0.6147787176658085, "grad_norm": 1.323263575149836, "learning_rate": 3.412212502611569e-06, "loss": 0.6148, "step": 20059 }, { "epoch": 0.6148093661885498, "grad_norm": 1.280588659367026, "learning_rate": 3.4117418824952597e-06, "loss": 0.5723, "step": 20060 }, { "epoch": 0.6148400147112909, "grad_norm": 1.3664209395520484, "learning_rate": 3.411271278028464e-06, "loss": 0.6657, "step": 20061 }, { "epoch": 0.6148706632340322, "grad_norm": 1.3608404705335064, "learning_rate": 3.4108006892158162e-06, "loss": 0.5812, "step": 20062 }, { "epoch": 0.6149013117567733, "grad_norm": 1.3122666800293532, "learning_rate": 3.4103301160619585e-06, "loss": 0.6075, "step": 20063 }, { "epoch": 0.6149319602795146, "grad_norm": 1.2327223218137033, "learning_rate": 3.4098595585715243e-06, "loss": 0.6247, "step": 20064 }, { "epoch": 0.6149626088022557, "grad_norm": 1.1241975377364117, "learning_rate": 3.4093890167491493e-06, "loss": 0.5063, "step": 20065 }, { "epoch": 0.614993257324997, "grad_norm": 1.483032210722939, "learning_rate": 3.408918490599472e-06, "loss": 0.6865, "step": 20066 }, { "epoch": 0.6150239058477381, "grad_norm": 1.2565800857592906, "learning_rate": 3.4084479801271285e-06, "loss": 0.6631, "step": 20067 }, { "epoch": 0.6150545543704793, "grad_norm": 1.450387317760978, "learning_rate": 3.407977485336754e-06, "loss": 0.6429, "step": 20068 }, { "epoch": 0.6150852028932206, "grad_norm": 1.2900270165156567, "learning_rate": 3.4075070062329847e-06, "loss": 0.5877, "step": 20069 }, { "epoch": 0.6151158514159617, "grad_norm": 1.192034451254655, "learning_rate": 3.4070365428204555e-06, "loss": 0.5423, "step": 20070 }, { "epoch": 0.615146499938703, "grad_norm": 1.1129911817826676, "learning_rate": 3.406566095103804e-06, "loss": 0.5498, "step": 20071 }, { "epoch": 0.6151771484614441, "grad_norm": 1.3527350507104878, "learning_rate": 3.4060956630876653e-06, "loss": 0.6644, "step": 20072 }, { "epoch": 0.6152077969841854, "grad_norm": 1.2784067146198275, "learning_rate": 3.4056252467766703e-06, "loss": 0.6462, "step": 20073 }, { "epoch": 0.6152384455069265, "grad_norm": 1.3512828557725838, "learning_rate": 3.4051548461754615e-06, "loss": 0.6538, "step": 20074 }, { "epoch": 0.6152690940296678, "grad_norm": 1.2614849869841045, "learning_rate": 3.404684461288669e-06, "loss": 0.624, "step": 20075 }, { "epoch": 0.6152997425524089, "grad_norm": 1.4670933533281196, "learning_rate": 3.4042140921209265e-06, "loss": 0.6763, "step": 20076 }, { "epoch": 0.6153303910751502, "grad_norm": 1.1022246143700711, "learning_rate": 3.4037437386768735e-06, "loss": 0.5605, "step": 20077 }, { "epoch": 0.6153610395978913, "grad_norm": 0.4841110620341034, "learning_rate": 3.4032734009611403e-06, "loss": 0.3981, "step": 20078 }, { "epoch": 0.6153916881206326, "grad_norm": 1.2380668714448821, "learning_rate": 3.4028030789783635e-06, "loss": 0.6754, "step": 20079 }, { "epoch": 0.6154223366433738, "grad_norm": 1.3525948552418905, "learning_rate": 3.4023327727331768e-06, "loss": 0.6537, "step": 20080 }, { "epoch": 0.615452985166115, "grad_norm": 1.1793522671812413, "learning_rate": 3.4018624822302126e-06, "loss": 0.6433, "step": 20081 }, { "epoch": 0.6154836336888562, "grad_norm": 1.2502976835382555, "learning_rate": 3.401392207474108e-06, "loss": 0.6358, "step": 20082 }, { "epoch": 0.6155142822115974, "grad_norm": 1.323797266078483, "learning_rate": 3.4009219484694954e-06, "loss": 0.6949, "step": 20083 }, { "epoch": 0.6155449307343386, "grad_norm": 0.44110007116792654, "learning_rate": 3.4004517052210056e-06, "loss": 0.4131, "step": 20084 }, { "epoch": 0.6155755792570798, "grad_norm": 1.3995690303784645, "learning_rate": 3.3999814777332774e-06, "loss": 0.5664, "step": 20085 }, { "epoch": 0.615606227779821, "grad_norm": 1.3657519452180569, "learning_rate": 3.399511266010939e-06, "loss": 0.6082, "step": 20086 }, { "epoch": 0.6156368763025623, "grad_norm": 1.2061678949979204, "learning_rate": 3.3990410700586262e-06, "loss": 0.5789, "step": 20087 }, { "epoch": 0.6156675248253034, "grad_norm": 1.542167573517747, "learning_rate": 3.398570889880971e-06, "loss": 0.6687, "step": 20088 }, { "epoch": 0.6156981733480447, "grad_norm": 0.4518787341221183, "learning_rate": 3.3981007254826064e-06, "loss": 0.3837, "step": 20089 }, { "epoch": 0.6157288218707858, "grad_norm": 1.3475034983510807, "learning_rate": 3.3976305768681662e-06, "loss": 0.6863, "step": 20090 }, { "epoch": 0.6157594703935271, "grad_norm": 1.311596526865781, "learning_rate": 3.3971604440422813e-06, "loss": 0.5878, "step": 20091 }, { "epoch": 0.6157901189162682, "grad_norm": 1.2203025270380947, "learning_rate": 3.396690327009584e-06, "loss": 0.5997, "step": 20092 }, { "epoch": 0.6158207674390095, "grad_norm": 1.2514378475702286, "learning_rate": 3.396220225774709e-06, "loss": 0.6083, "step": 20093 }, { "epoch": 0.6158514159617506, "grad_norm": 0.4402862420752758, "learning_rate": 3.395750140342286e-06, "loss": 0.4084, "step": 20094 }, { "epoch": 0.6158820644844919, "grad_norm": 1.2769657426318164, "learning_rate": 3.395280070716946e-06, "loss": 0.658, "step": 20095 }, { "epoch": 0.615912713007233, "grad_norm": 0.44148920573075734, "learning_rate": 3.3948100169033225e-06, "loss": 0.4131, "step": 20096 }, { "epoch": 0.6159433615299743, "grad_norm": 1.3907232442679018, "learning_rate": 3.394339978906046e-06, "loss": 0.6268, "step": 20097 }, { "epoch": 0.6159740100527155, "grad_norm": 1.1514127790962574, "learning_rate": 3.39386995672975e-06, "loss": 0.6478, "step": 20098 }, { "epoch": 0.6160046585754566, "grad_norm": 1.3594335745323887, "learning_rate": 3.3933999503790638e-06, "loss": 0.5469, "step": 20099 }, { "epoch": 0.6160353070981979, "grad_norm": 1.3672615201126297, "learning_rate": 3.3929299598586183e-06, "loss": 0.7255, "step": 20100 }, { "epoch": 0.616065955620939, "grad_norm": 1.266221815580369, "learning_rate": 3.3924599851730456e-06, "loss": 0.5008, "step": 20101 }, { "epoch": 0.6160966041436803, "grad_norm": 1.2451109160228064, "learning_rate": 3.391990026326977e-06, "loss": 0.6996, "step": 20102 }, { "epoch": 0.6161272526664214, "grad_norm": 1.1305429073602395, "learning_rate": 3.3915200833250393e-06, "loss": 0.5832, "step": 20103 }, { "epoch": 0.6161579011891627, "grad_norm": 1.1548072771195264, "learning_rate": 3.391050156171869e-06, "loss": 0.5732, "step": 20104 }, { "epoch": 0.6161885497119038, "grad_norm": 1.1847285106646048, "learning_rate": 3.39058024487209e-06, "loss": 0.6273, "step": 20105 }, { "epoch": 0.6162191982346451, "grad_norm": 1.4237755795404894, "learning_rate": 3.390110349430339e-06, "loss": 0.6319, "step": 20106 }, { "epoch": 0.6162498467573863, "grad_norm": 1.3421000001943437, "learning_rate": 3.389640469851241e-06, "loss": 0.5753, "step": 20107 }, { "epoch": 0.6162804952801275, "grad_norm": 1.2182952916703558, "learning_rate": 3.3891706061394263e-06, "loss": 0.5759, "step": 20108 }, { "epoch": 0.6163111438028687, "grad_norm": 0.4534387937546792, "learning_rate": 3.388700758299527e-06, "loss": 0.4324, "step": 20109 }, { "epoch": 0.6163417923256099, "grad_norm": 2.123134402665049, "learning_rate": 3.388230926336172e-06, "loss": 0.6924, "step": 20110 }, { "epoch": 0.6163724408483511, "grad_norm": 1.1140323382201756, "learning_rate": 3.3877611102539885e-06, "loss": 0.6146, "step": 20111 }, { "epoch": 0.6164030893710923, "grad_norm": 0.4646891457324042, "learning_rate": 3.387291310057608e-06, "loss": 0.407, "step": 20112 }, { "epoch": 0.6164337378938335, "grad_norm": 1.2057266618809301, "learning_rate": 3.3868215257516583e-06, "loss": 0.6331, "step": 20113 }, { "epoch": 0.6164643864165747, "grad_norm": 1.192003157722467, "learning_rate": 3.38635175734077e-06, "loss": 0.5137, "step": 20114 }, { "epoch": 0.6164950349393159, "grad_norm": 1.393815833779859, "learning_rate": 3.3858820048295714e-06, "loss": 0.6566, "step": 20115 }, { "epoch": 0.6165256834620572, "grad_norm": 1.2703340134526306, "learning_rate": 3.3854122682226873e-06, "loss": 0.681, "step": 20116 }, { "epoch": 0.6165563319847983, "grad_norm": 1.2207779840184845, "learning_rate": 3.3849425475247533e-06, "loss": 0.6575, "step": 20117 }, { "epoch": 0.6165869805075396, "grad_norm": 1.2692137618684094, "learning_rate": 3.384472842740392e-06, "loss": 0.7022, "step": 20118 }, { "epoch": 0.6166176290302807, "grad_norm": 1.2590309291657062, "learning_rate": 3.384003153874231e-06, "loss": 0.6335, "step": 20119 }, { "epoch": 0.616648277553022, "grad_norm": 1.1261748658238886, "learning_rate": 3.383533480930903e-06, "loss": 0.6078, "step": 20120 }, { "epoch": 0.6166789260757631, "grad_norm": 1.3467212162201392, "learning_rate": 3.383063823915032e-06, "loss": 0.6773, "step": 20121 }, { "epoch": 0.6167095745985044, "grad_norm": 1.2664963161283938, "learning_rate": 3.3825941828312463e-06, "loss": 0.5172, "step": 20122 }, { "epoch": 0.6167402231212455, "grad_norm": 1.3308411446107267, "learning_rate": 3.382124557684175e-06, "loss": 0.6531, "step": 20123 }, { "epoch": 0.6167708716439868, "grad_norm": 0.46137035257680953, "learning_rate": 3.3816549484784434e-06, "loss": 0.3897, "step": 20124 }, { "epoch": 0.616801520166728, "grad_norm": 1.3560612421474327, "learning_rate": 3.38118535521868e-06, "loss": 0.5592, "step": 20125 }, { "epoch": 0.6168321686894692, "grad_norm": 1.5097808939860449, "learning_rate": 3.3807157779095135e-06, "loss": 0.6217, "step": 20126 }, { "epoch": 0.6168628172122104, "grad_norm": 1.5026266632860696, "learning_rate": 3.3802462165555653e-06, "loss": 0.6879, "step": 20127 }, { "epoch": 0.6168934657349516, "grad_norm": 1.3192625770929343, "learning_rate": 3.3797766711614686e-06, "loss": 0.7147, "step": 20128 }, { "epoch": 0.6169241142576928, "grad_norm": 1.3598709781628593, "learning_rate": 3.379307141731846e-06, "loss": 0.616, "step": 20129 }, { "epoch": 0.6169547627804339, "grad_norm": 1.1749502488614094, "learning_rate": 3.3788376282713244e-06, "loss": 0.6221, "step": 20130 }, { "epoch": 0.6169854113031752, "grad_norm": 1.535279160628317, "learning_rate": 3.3783681307845307e-06, "loss": 0.5954, "step": 20131 }, { "epoch": 0.6170160598259163, "grad_norm": 1.422070942795287, "learning_rate": 3.3778986492760895e-06, "loss": 0.6197, "step": 20132 }, { "epoch": 0.6170467083486576, "grad_norm": 1.2290752832614447, "learning_rate": 3.377429183750629e-06, "loss": 0.5859, "step": 20133 }, { "epoch": 0.6170773568713988, "grad_norm": 0.45481716509924275, "learning_rate": 3.3769597342127745e-06, "loss": 0.424, "step": 20134 }, { "epoch": 0.61710800539414, "grad_norm": 1.1456466585690463, "learning_rate": 3.3764903006671496e-06, "loss": 0.5879, "step": 20135 }, { "epoch": 0.6171386539168812, "grad_norm": 1.22721839399982, "learning_rate": 3.376020883118382e-06, "loss": 0.6528, "step": 20136 }, { "epoch": 0.6171693024396224, "grad_norm": 1.2454795039752014, "learning_rate": 3.3755514815710976e-06, "loss": 0.6509, "step": 20137 }, { "epoch": 0.6171999509623636, "grad_norm": 1.3375839695159777, "learning_rate": 3.375082096029918e-06, "loss": 0.557, "step": 20138 }, { "epoch": 0.6172305994851048, "grad_norm": 1.1682629348092377, "learning_rate": 3.374612726499471e-06, "loss": 0.5409, "step": 20139 }, { "epoch": 0.617261248007846, "grad_norm": 0.47520886746326535, "learning_rate": 3.3741433729843796e-06, "loss": 0.4169, "step": 20140 }, { "epoch": 0.6172918965305872, "grad_norm": 1.3376471621112942, "learning_rate": 3.3736740354892707e-06, "loss": 0.5965, "step": 20141 }, { "epoch": 0.6173225450533284, "grad_norm": 1.6546844245717585, "learning_rate": 3.373204714018768e-06, "loss": 0.6124, "step": 20142 }, { "epoch": 0.6173531935760697, "grad_norm": 1.2521718484124926, "learning_rate": 3.3727354085774944e-06, "loss": 0.7043, "step": 20143 }, { "epoch": 0.6173838420988108, "grad_norm": 1.4180349302235222, "learning_rate": 3.3722661191700757e-06, "loss": 0.5821, "step": 20144 }, { "epoch": 0.6174144906215521, "grad_norm": 1.39058179811141, "learning_rate": 3.3717968458011364e-06, "loss": 0.605, "step": 20145 }, { "epoch": 0.6174451391442932, "grad_norm": 1.2562608636041992, "learning_rate": 3.371327588475297e-06, "loss": 0.5874, "step": 20146 }, { "epoch": 0.6174757876670345, "grad_norm": 1.3005201632431784, "learning_rate": 3.3708583471971854e-06, "loss": 0.6846, "step": 20147 }, { "epoch": 0.6175064361897756, "grad_norm": 1.4417877553906044, "learning_rate": 3.3703891219714237e-06, "loss": 0.568, "step": 20148 }, { "epoch": 0.6175370847125169, "grad_norm": 1.2243737104796215, "learning_rate": 3.369919912802633e-06, "loss": 0.5952, "step": 20149 }, { "epoch": 0.617567733235258, "grad_norm": 1.2765599539734944, "learning_rate": 3.36945071969544e-06, "loss": 0.6312, "step": 20150 }, { "epoch": 0.6175983817579993, "grad_norm": 1.3427897914977058, "learning_rate": 3.368981542654465e-06, "loss": 0.6574, "step": 20151 }, { "epoch": 0.6176290302807405, "grad_norm": 1.3533636145308106, "learning_rate": 3.3685123816843335e-06, "loss": 0.6695, "step": 20152 }, { "epoch": 0.6176596788034817, "grad_norm": 1.2227480031684501, "learning_rate": 3.3680432367896667e-06, "loss": 0.5844, "step": 20153 }, { "epoch": 0.6176903273262229, "grad_norm": 1.3229877461377986, "learning_rate": 3.367574107975087e-06, "loss": 0.6432, "step": 20154 }, { "epoch": 0.6177209758489641, "grad_norm": 1.2819879108845318, "learning_rate": 3.3671049952452172e-06, "loss": 0.566, "step": 20155 }, { "epoch": 0.6177516243717053, "grad_norm": 1.5402283200888398, "learning_rate": 3.366635898604681e-06, "loss": 0.691, "step": 20156 }, { "epoch": 0.6177822728944465, "grad_norm": 1.277558599185645, "learning_rate": 3.3661668180580965e-06, "loss": 0.6866, "step": 20157 }, { "epoch": 0.6178129214171877, "grad_norm": 1.0985695132756024, "learning_rate": 3.3656977536100916e-06, "loss": 0.5125, "step": 20158 }, { "epoch": 0.617843569939929, "grad_norm": 1.5597881088700711, "learning_rate": 3.3652287052652816e-06, "loss": 0.585, "step": 20159 }, { "epoch": 0.6178742184626701, "grad_norm": 1.320788730829705, "learning_rate": 3.3647596730282944e-06, "loss": 0.6982, "step": 20160 }, { "epoch": 0.6179048669854112, "grad_norm": 1.3521962024352865, "learning_rate": 3.3642906569037474e-06, "loss": 0.6307, "step": 20161 }, { "epoch": 0.6179355155081525, "grad_norm": 1.245355230030335, "learning_rate": 3.363821656896262e-06, "loss": 0.6706, "step": 20162 }, { "epoch": 0.6179661640308937, "grad_norm": 1.3099469674231015, "learning_rate": 3.363352673010462e-06, "loss": 0.5729, "step": 20163 }, { "epoch": 0.6179968125536349, "grad_norm": 1.283899239035764, "learning_rate": 3.3628837052509666e-06, "loss": 0.6674, "step": 20164 }, { "epoch": 0.6180274610763761, "grad_norm": 1.19510700829562, "learning_rate": 3.3624147536223962e-06, "loss": 0.6585, "step": 20165 }, { "epoch": 0.6180581095991173, "grad_norm": 2.025521557872683, "learning_rate": 3.3619458181293728e-06, "loss": 0.5789, "step": 20166 }, { "epoch": 0.6180887581218585, "grad_norm": 1.4741362740808177, "learning_rate": 3.3614768987765155e-06, "loss": 0.6605, "step": 20167 }, { "epoch": 0.6181194066445997, "grad_norm": 1.3497571064625005, "learning_rate": 3.361007995568446e-06, "loss": 0.6462, "step": 20168 }, { "epoch": 0.6181500551673409, "grad_norm": 0.5047254568125222, "learning_rate": 3.360539108509786e-06, "loss": 0.405, "step": 20169 }, { "epoch": 0.6181807036900822, "grad_norm": 1.2673601788988333, "learning_rate": 3.3600702376051497e-06, "loss": 0.5588, "step": 20170 }, { "epoch": 0.6182113522128233, "grad_norm": 1.1331791687915163, "learning_rate": 3.359601382859165e-06, "loss": 0.5606, "step": 20171 }, { "epoch": 0.6182420007355646, "grad_norm": 1.3769425025695101, "learning_rate": 3.359132544276446e-06, "loss": 0.69, "step": 20172 }, { "epoch": 0.6182726492583057, "grad_norm": 1.3868301241276637, "learning_rate": 3.358663721861613e-06, "loss": 0.6502, "step": 20173 }, { "epoch": 0.618303297781047, "grad_norm": 1.1889852584315252, "learning_rate": 3.358194915619287e-06, "loss": 0.5796, "step": 20174 }, { "epoch": 0.6183339463037881, "grad_norm": 1.121992842961053, "learning_rate": 3.3577261255540873e-06, "loss": 0.6854, "step": 20175 }, { "epoch": 0.6183645948265294, "grad_norm": 1.3474731957288333, "learning_rate": 3.3572573516706307e-06, "loss": 0.5624, "step": 20176 }, { "epoch": 0.6183952433492705, "grad_norm": 1.1482081125590449, "learning_rate": 3.356788593973539e-06, "loss": 0.6098, "step": 20177 }, { "epoch": 0.6184258918720118, "grad_norm": 1.3320150550311483, "learning_rate": 3.356319852467428e-06, "loss": 0.566, "step": 20178 }, { "epoch": 0.618456540394753, "grad_norm": 1.3099216043819635, "learning_rate": 3.3558511271569194e-06, "loss": 0.6775, "step": 20179 }, { "epoch": 0.6184871889174942, "grad_norm": 0.44610661884069935, "learning_rate": 3.355382418046632e-06, "loss": 0.4067, "step": 20180 }, { "epoch": 0.6185178374402354, "grad_norm": 1.2085848568030706, "learning_rate": 3.3549137251411788e-06, "loss": 0.6602, "step": 20181 }, { "epoch": 0.6185484859629766, "grad_norm": 1.110907554025282, "learning_rate": 3.354445048445185e-06, "loss": 0.5855, "step": 20182 }, { "epoch": 0.6185791344857178, "grad_norm": 0.46082704688196086, "learning_rate": 3.3539763879632636e-06, "loss": 0.4069, "step": 20183 }, { "epoch": 0.618609783008459, "grad_norm": 1.3323262348129437, "learning_rate": 3.353507743700033e-06, "loss": 0.6046, "step": 20184 }, { "epoch": 0.6186404315312002, "grad_norm": 1.411963810992862, "learning_rate": 3.353039115660113e-06, "loss": 0.552, "step": 20185 }, { "epoch": 0.6186710800539414, "grad_norm": 1.2374500430276019, "learning_rate": 3.3525705038481194e-06, "loss": 0.6131, "step": 20186 }, { "epoch": 0.6187017285766826, "grad_norm": 1.2595189626067937, "learning_rate": 3.352101908268671e-06, "loss": 0.5735, "step": 20187 }, { "epoch": 0.6187323770994239, "grad_norm": 1.3225029299591458, "learning_rate": 3.3516333289263843e-06, "loss": 0.5607, "step": 20188 }, { "epoch": 0.618763025622165, "grad_norm": 1.4944011584669044, "learning_rate": 3.3511647658258747e-06, "loss": 0.6068, "step": 20189 }, { "epoch": 0.6187936741449063, "grad_norm": 1.3039531141002854, "learning_rate": 3.3506962189717628e-06, "loss": 0.6162, "step": 20190 }, { "epoch": 0.6188243226676474, "grad_norm": 1.2109848199697781, "learning_rate": 3.350227688368662e-06, "loss": 0.6139, "step": 20191 }, { "epoch": 0.6188549711903886, "grad_norm": 1.3143549722115229, "learning_rate": 3.349759174021189e-06, "loss": 0.6119, "step": 20192 }, { "epoch": 0.6188856197131298, "grad_norm": 1.3778788765423255, "learning_rate": 3.349290675933962e-06, "loss": 0.6755, "step": 20193 }, { "epoch": 0.618916268235871, "grad_norm": 0.43545652988091565, "learning_rate": 3.348822194111595e-06, "loss": 0.3982, "step": 20194 }, { "epoch": 0.6189469167586122, "grad_norm": 1.1479164189122046, "learning_rate": 3.3483537285587066e-06, "loss": 0.6714, "step": 20195 }, { "epoch": 0.6189775652813534, "grad_norm": 1.3146701005720882, "learning_rate": 3.3478852792799116e-06, "loss": 0.682, "step": 20196 }, { "epoch": 0.6190082138040947, "grad_norm": 1.1274496044105706, "learning_rate": 3.3474168462798244e-06, "loss": 0.58, "step": 20197 }, { "epoch": 0.6190388623268358, "grad_norm": 1.3779949932150615, "learning_rate": 3.3469484295630634e-06, "loss": 0.622, "step": 20198 }, { "epoch": 0.6190695108495771, "grad_norm": 0.4682015359096017, "learning_rate": 3.3464800291342432e-06, "loss": 0.4192, "step": 20199 }, { "epoch": 0.6191001593723182, "grad_norm": 1.4096065282687555, "learning_rate": 3.346011644997975e-06, "loss": 0.6329, "step": 20200 }, { "epoch": 0.6191308078950595, "grad_norm": 1.260270055528545, "learning_rate": 3.3455432771588803e-06, "loss": 0.5697, "step": 20201 }, { "epoch": 0.6191614564178006, "grad_norm": 1.2743473075434741, "learning_rate": 3.345074925621571e-06, "loss": 0.6347, "step": 20202 }, { "epoch": 0.6191921049405419, "grad_norm": 0.4487978351681363, "learning_rate": 3.3446065903906597e-06, "loss": 0.3966, "step": 20203 }, { "epoch": 0.619222753463283, "grad_norm": 0.47562364895062464, "learning_rate": 3.3441382714707647e-06, "loss": 0.4106, "step": 20204 }, { "epoch": 0.6192534019860243, "grad_norm": 1.3994172069317357, "learning_rate": 3.3436699688664975e-06, "loss": 0.7128, "step": 20205 }, { "epoch": 0.6192840505087654, "grad_norm": 1.2497695378158067, "learning_rate": 3.3432016825824753e-06, "loss": 0.686, "step": 20206 }, { "epoch": 0.6193146990315067, "grad_norm": 1.055616072451625, "learning_rate": 3.3427334126233115e-06, "loss": 0.5596, "step": 20207 }, { "epoch": 0.6193453475542479, "grad_norm": 0.4592491233052223, "learning_rate": 3.3422651589936173e-06, "loss": 0.3894, "step": 20208 }, { "epoch": 0.6193759960769891, "grad_norm": 1.2196919295906705, "learning_rate": 3.3417969216980107e-06, "loss": 0.6738, "step": 20209 }, { "epoch": 0.6194066445997303, "grad_norm": 1.5081794814087812, "learning_rate": 3.3413287007411034e-06, "loss": 0.6203, "step": 20210 }, { "epoch": 0.6194372931224715, "grad_norm": 1.144617519894108, "learning_rate": 3.340860496127506e-06, "loss": 0.5558, "step": 20211 }, { "epoch": 0.6194679416452127, "grad_norm": 1.3173054508376134, "learning_rate": 3.3403923078618378e-06, "loss": 0.5061, "step": 20212 }, { "epoch": 0.6194985901679539, "grad_norm": 0.476553991208692, "learning_rate": 3.3399241359487057e-06, "loss": 0.3918, "step": 20213 }, { "epoch": 0.6195292386906951, "grad_norm": 1.1387179443089877, "learning_rate": 3.339455980392729e-06, "loss": 0.6388, "step": 20214 }, { "epoch": 0.6195598872134364, "grad_norm": 1.2109830037336076, "learning_rate": 3.3389878411985165e-06, "loss": 0.745, "step": 20215 }, { "epoch": 0.6195905357361775, "grad_norm": 1.326486882668637, "learning_rate": 3.3385197183706803e-06, "loss": 0.6267, "step": 20216 }, { "epoch": 0.6196211842589188, "grad_norm": 1.443641952324852, "learning_rate": 3.3380516119138357e-06, "loss": 0.6236, "step": 20217 }, { "epoch": 0.6196518327816599, "grad_norm": 1.2245707202722789, "learning_rate": 3.3375835218325934e-06, "loss": 0.6402, "step": 20218 }, { "epoch": 0.6196824813044012, "grad_norm": 1.305850298523826, "learning_rate": 3.337115448131566e-06, "loss": 0.6048, "step": 20219 }, { "epoch": 0.6197131298271423, "grad_norm": 1.3740706540922578, "learning_rate": 3.336647390815366e-06, "loss": 0.5599, "step": 20220 }, { "epoch": 0.6197437783498836, "grad_norm": 1.4773559801876617, "learning_rate": 3.3361793498886035e-06, "loss": 0.6922, "step": 20221 }, { "epoch": 0.6197744268726247, "grad_norm": 1.3130051864274443, "learning_rate": 3.3357113253558927e-06, "loss": 0.6832, "step": 20222 }, { "epoch": 0.6198050753953659, "grad_norm": 1.3274376948610525, "learning_rate": 3.3352433172218457e-06, "loss": 0.6045, "step": 20223 }, { "epoch": 0.6198357239181072, "grad_norm": 1.1563586658300133, "learning_rate": 3.3347753254910686e-06, "loss": 0.528, "step": 20224 }, { "epoch": 0.6198663724408483, "grad_norm": 1.3435685139286622, "learning_rate": 3.3343073501681794e-06, "loss": 0.6712, "step": 20225 }, { "epoch": 0.6198970209635896, "grad_norm": 1.3491306683839224, "learning_rate": 3.3338393912577848e-06, "loss": 0.4947, "step": 20226 }, { "epoch": 0.6199276694863307, "grad_norm": 1.327543072234638, "learning_rate": 3.3333714487644963e-06, "loss": 0.5864, "step": 20227 }, { "epoch": 0.619958318009072, "grad_norm": 1.225475223875143, "learning_rate": 3.3329035226929265e-06, "loss": 0.6015, "step": 20228 }, { "epoch": 0.6199889665318131, "grad_norm": 1.4181804471293273, "learning_rate": 3.332435613047685e-06, "loss": 0.6837, "step": 20229 }, { "epoch": 0.6200196150545544, "grad_norm": 1.2522550319552141, "learning_rate": 3.3319677198333804e-06, "loss": 0.5993, "step": 20230 }, { "epoch": 0.6200502635772955, "grad_norm": 1.4134453450297688, "learning_rate": 3.331499843054626e-06, "loss": 0.6749, "step": 20231 }, { "epoch": 0.6200809121000368, "grad_norm": 1.2912889956998181, "learning_rate": 3.3310319827160297e-06, "loss": 0.6502, "step": 20232 }, { "epoch": 0.620111560622778, "grad_norm": 1.484289899237562, "learning_rate": 3.330564138822203e-06, "loss": 0.7184, "step": 20233 }, { "epoch": 0.6201422091455192, "grad_norm": 1.365572088757202, "learning_rate": 3.3300963113777563e-06, "loss": 0.6101, "step": 20234 }, { "epoch": 0.6201728576682604, "grad_norm": 1.2050680182451994, "learning_rate": 3.329628500387295e-06, "loss": 0.6009, "step": 20235 }, { "epoch": 0.6202035061910016, "grad_norm": 0.4611106318347748, "learning_rate": 3.329160705855434e-06, "loss": 0.3999, "step": 20236 }, { "epoch": 0.6202341547137428, "grad_norm": 1.3411808092669464, "learning_rate": 3.328692927786779e-06, "loss": 0.6663, "step": 20237 }, { "epoch": 0.620264803236484, "grad_norm": 1.3472655045751378, "learning_rate": 3.32822516618594e-06, "loss": 0.5995, "step": 20238 }, { "epoch": 0.6202954517592252, "grad_norm": 1.3242139252395968, "learning_rate": 3.327757421057526e-06, "loss": 0.615, "step": 20239 }, { "epoch": 0.6203261002819664, "grad_norm": 1.4781936055019302, "learning_rate": 3.327289692406146e-06, "loss": 0.5505, "step": 20240 }, { "epoch": 0.6203567488047076, "grad_norm": 1.2701688861741474, "learning_rate": 3.3268219802364088e-06, "loss": 0.7208, "step": 20241 }, { "epoch": 0.6203873973274489, "grad_norm": 1.2651419713155576, "learning_rate": 3.3263542845529247e-06, "loss": 0.6801, "step": 20242 }, { "epoch": 0.62041804585019, "grad_norm": 0.4564933025597987, "learning_rate": 3.3258866053602967e-06, "loss": 0.4003, "step": 20243 }, { "epoch": 0.6204486943729313, "grad_norm": 1.2651306607963577, "learning_rate": 3.325418942663139e-06, "loss": 0.604, "step": 20244 }, { "epoch": 0.6204793428956724, "grad_norm": 1.3713765225366477, "learning_rate": 3.3249512964660556e-06, "loss": 0.568, "step": 20245 }, { "epoch": 0.6205099914184137, "grad_norm": 0.44986439369610604, "learning_rate": 3.3244836667736557e-06, "loss": 0.4042, "step": 20246 }, { "epoch": 0.6205406399411548, "grad_norm": 1.1157437436719213, "learning_rate": 3.3240160535905475e-06, "loss": 0.6662, "step": 20247 }, { "epoch": 0.6205712884638961, "grad_norm": 1.0910455027099253, "learning_rate": 3.3235484569213373e-06, "loss": 0.5765, "step": 20248 }, { "epoch": 0.6206019369866372, "grad_norm": 1.2211147008863685, "learning_rate": 3.3230808767706328e-06, "loss": 0.6266, "step": 20249 }, { "epoch": 0.6206325855093785, "grad_norm": 1.2840056469633365, "learning_rate": 3.3226133131430428e-06, "loss": 0.5511, "step": 20250 }, { "epoch": 0.6206632340321196, "grad_norm": 1.2075409892976674, "learning_rate": 3.3221457660431713e-06, "loss": 0.634, "step": 20251 }, { "epoch": 0.6206938825548609, "grad_norm": 1.3207077842674861, "learning_rate": 3.321678235475628e-06, "loss": 0.6803, "step": 20252 }, { "epoch": 0.6207245310776021, "grad_norm": 1.2798412569479862, "learning_rate": 3.3212107214450196e-06, "loss": 0.7326, "step": 20253 }, { "epoch": 0.6207551796003432, "grad_norm": 0.44541719825768455, "learning_rate": 3.320743223955948e-06, "loss": 0.3987, "step": 20254 }, { "epoch": 0.6207858281230845, "grad_norm": 0.4383555598017991, "learning_rate": 3.3202757430130265e-06, "loss": 0.4148, "step": 20255 }, { "epoch": 0.6208164766458256, "grad_norm": 1.1744736966548421, "learning_rate": 3.3198082786208575e-06, "loss": 0.6584, "step": 20256 }, { "epoch": 0.6208471251685669, "grad_norm": 0.4337356714114329, "learning_rate": 3.3193408307840453e-06, "loss": 0.3867, "step": 20257 }, { "epoch": 0.620877773691308, "grad_norm": 1.2334316695425462, "learning_rate": 3.3188733995072e-06, "loss": 0.576, "step": 20258 }, { "epoch": 0.6209084222140493, "grad_norm": 1.0644702483773778, "learning_rate": 3.3184059847949234e-06, "loss": 0.5385, "step": 20259 }, { "epoch": 0.6209390707367904, "grad_norm": 1.440764169177898, "learning_rate": 3.3179385866518236e-06, "loss": 0.7045, "step": 20260 }, { "epoch": 0.6209697192595317, "grad_norm": 1.3694427042076311, "learning_rate": 3.3174712050825066e-06, "loss": 0.6229, "step": 20261 }, { "epoch": 0.6210003677822729, "grad_norm": 1.243946626461498, "learning_rate": 3.3170038400915737e-06, "loss": 0.6171, "step": 20262 }, { "epoch": 0.6210310163050141, "grad_norm": 1.280578753619674, "learning_rate": 3.3165364916836346e-06, "loss": 0.626, "step": 20263 }, { "epoch": 0.6210616648277553, "grad_norm": 0.4523357279453029, "learning_rate": 3.3160691598632934e-06, "loss": 0.3847, "step": 20264 }, { "epoch": 0.6210923133504965, "grad_norm": 1.2111566393956923, "learning_rate": 3.31560184463515e-06, "loss": 0.5695, "step": 20265 }, { "epoch": 0.6211229618732377, "grad_norm": 1.3435004141544986, "learning_rate": 3.3151345460038154e-06, "loss": 0.6055, "step": 20266 }, { "epoch": 0.6211536103959789, "grad_norm": 1.3185119386676967, "learning_rate": 3.3146672639738886e-06, "loss": 0.5948, "step": 20267 }, { "epoch": 0.6211842589187201, "grad_norm": 1.2403149659005837, "learning_rate": 3.3141999985499795e-06, "loss": 0.6276, "step": 20268 }, { "epoch": 0.6212149074414614, "grad_norm": 1.2834513326370227, "learning_rate": 3.3137327497366885e-06, "loss": 0.689, "step": 20269 }, { "epoch": 0.6212455559642025, "grad_norm": 1.3556144932995335, "learning_rate": 3.3132655175386188e-06, "loss": 0.6394, "step": 20270 }, { "epoch": 0.6212762044869438, "grad_norm": 1.4527480999153781, "learning_rate": 3.312798301960376e-06, "loss": 0.7109, "step": 20271 }, { "epoch": 0.6213068530096849, "grad_norm": 1.2569759259484627, "learning_rate": 3.312331103006564e-06, "loss": 0.5864, "step": 20272 }, { "epoch": 0.6213375015324262, "grad_norm": 1.4193930441380407, "learning_rate": 3.3118639206817836e-06, "loss": 0.6083, "step": 20273 }, { "epoch": 0.6213681500551673, "grad_norm": 1.3965224618836183, "learning_rate": 3.3113967549906424e-06, "loss": 0.6229, "step": 20274 }, { "epoch": 0.6213987985779086, "grad_norm": 1.3197638789353132, "learning_rate": 3.3109296059377405e-06, "loss": 0.6258, "step": 20275 }, { "epoch": 0.6214294471006497, "grad_norm": 1.3174778661320048, "learning_rate": 3.310462473527679e-06, "loss": 0.658, "step": 20276 }, { "epoch": 0.621460095623391, "grad_norm": 0.47751224992262054, "learning_rate": 3.309995357765066e-06, "loss": 0.4335, "step": 20277 }, { "epoch": 0.6214907441461321, "grad_norm": 1.2970945913466958, "learning_rate": 3.3095282586545e-06, "loss": 0.6295, "step": 20278 }, { "epoch": 0.6215213926688734, "grad_norm": 1.2568242389084212, "learning_rate": 3.309061176200584e-06, "loss": 0.6573, "step": 20279 }, { "epoch": 0.6215520411916146, "grad_norm": 1.096881742686689, "learning_rate": 3.3085941104079217e-06, "loss": 0.5507, "step": 20280 }, { "epoch": 0.6215826897143558, "grad_norm": 1.3048183090315744, "learning_rate": 3.3081270612811132e-06, "loss": 0.6008, "step": 20281 }, { "epoch": 0.621613338237097, "grad_norm": 1.2636856153622953, "learning_rate": 3.3076600288247627e-06, "loss": 0.5931, "step": 20282 }, { "epoch": 0.6216439867598382, "grad_norm": 1.2146730631727187, "learning_rate": 3.3071930130434717e-06, "loss": 0.5452, "step": 20283 }, { "epoch": 0.6216746352825794, "grad_norm": 1.1814134051291034, "learning_rate": 3.306726013941839e-06, "loss": 0.5874, "step": 20284 }, { "epoch": 0.6217052838053205, "grad_norm": 1.2600559305325458, "learning_rate": 3.306259031524469e-06, "loss": 0.631, "step": 20285 }, { "epoch": 0.6217359323280618, "grad_norm": 1.2050767121105508, "learning_rate": 3.305792065795962e-06, "loss": 0.5935, "step": 20286 }, { "epoch": 0.6217665808508029, "grad_norm": 1.3015565748587168, "learning_rate": 3.3053251167609214e-06, "loss": 0.6439, "step": 20287 }, { "epoch": 0.6217972293735442, "grad_norm": 1.317445309943619, "learning_rate": 3.3048581844239436e-06, "loss": 0.7508, "step": 20288 }, { "epoch": 0.6218278778962854, "grad_norm": 1.1601459068220286, "learning_rate": 3.304391268789632e-06, "loss": 0.5679, "step": 20289 }, { "epoch": 0.6218585264190266, "grad_norm": 1.2825013693566498, "learning_rate": 3.303924369862588e-06, "loss": 0.6028, "step": 20290 }, { "epoch": 0.6218891749417678, "grad_norm": 1.4195702475495633, "learning_rate": 3.3034574876474113e-06, "loss": 0.665, "step": 20291 }, { "epoch": 0.621919823464509, "grad_norm": 1.395408013991354, "learning_rate": 3.3029906221487e-06, "loss": 0.7246, "step": 20292 }, { "epoch": 0.6219504719872502, "grad_norm": 1.3555925214257747, "learning_rate": 3.302523773371058e-06, "loss": 0.5946, "step": 20293 }, { "epoch": 0.6219811205099914, "grad_norm": 1.2004066299250375, "learning_rate": 3.302056941319083e-06, "loss": 0.6042, "step": 20294 }, { "epoch": 0.6220117690327326, "grad_norm": 1.30876418689051, "learning_rate": 3.301590125997376e-06, "loss": 0.49, "step": 20295 }, { "epoch": 0.6220424175554738, "grad_norm": 1.3216550342507318, "learning_rate": 3.301123327410537e-06, "loss": 0.693, "step": 20296 }, { "epoch": 0.622073066078215, "grad_norm": 1.380994018157016, "learning_rate": 3.300656545563161e-06, "loss": 0.6006, "step": 20297 }, { "epoch": 0.6221037146009563, "grad_norm": 0.4648344680510624, "learning_rate": 3.300189780459855e-06, "loss": 0.3821, "step": 20298 }, { "epoch": 0.6221343631236974, "grad_norm": 1.2053152965917342, "learning_rate": 3.299723032105212e-06, "loss": 0.5922, "step": 20299 }, { "epoch": 0.6221650116464387, "grad_norm": 1.2478026531094015, "learning_rate": 3.2992563005038323e-06, "loss": 0.6248, "step": 20300 }, { "epoch": 0.6221956601691798, "grad_norm": 0.45308745753436563, "learning_rate": 3.298789585660317e-06, "loss": 0.4169, "step": 20301 }, { "epoch": 0.6222263086919211, "grad_norm": 1.4559825058353721, "learning_rate": 3.298322887579263e-06, "loss": 0.5883, "step": 20302 }, { "epoch": 0.6222569572146622, "grad_norm": 1.286965625725225, "learning_rate": 3.2978562062652674e-06, "loss": 0.6741, "step": 20303 }, { "epoch": 0.6222876057374035, "grad_norm": 1.3338412122588514, "learning_rate": 3.2973895417229312e-06, "loss": 0.6372, "step": 20304 }, { "epoch": 0.6223182542601446, "grad_norm": 1.2470942729856702, "learning_rate": 3.296922893956851e-06, "loss": 0.6425, "step": 20305 }, { "epoch": 0.6223489027828859, "grad_norm": 1.2554129348226872, "learning_rate": 3.2964562629716256e-06, "loss": 0.5927, "step": 20306 }, { "epoch": 0.622379551305627, "grad_norm": 0.4592560653695825, "learning_rate": 3.295989648771854e-06, "loss": 0.3999, "step": 20307 }, { "epoch": 0.6224101998283683, "grad_norm": 0.4431530506674624, "learning_rate": 3.2955230513621294e-06, "loss": 0.3965, "step": 20308 }, { "epoch": 0.6224408483511095, "grad_norm": 1.3377999119061157, "learning_rate": 3.295056470747055e-06, "loss": 0.5866, "step": 20309 }, { "epoch": 0.6224714968738507, "grad_norm": 1.1884450411404135, "learning_rate": 3.294589906931225e-06, "loss": 0.645, "step": 20310 }, { "epoch": 0.6225021453965919, "grad_norm": 1.3965923684486363, "learning_rate": 3.294123359919235e-06, "loss": 0.693, "step": 20311 }, { "epoch": 0.6225327939193331, "grad_norm": 1.2570043919380056, "learning_rate": 3.2936568297156856e-06, "loss": 0.6397, "step": 20312 }, { "epoch": 0.6225634424420743, "grad_norm": 1.160231649320928, "learning_rate": 3.293190316325171e-06, "loss": 0.5966, "step": 20313 }, { "epoch": 0.6225940909648155, "grad_norm": 0.44181915918146536, "learning_rate": 3.2927238197522897e-06, "loss": 0.4012, "step": 20314 }, { "epoch": 0.6226247394875567, "grad_norm": 1.2493158606148889, "learning_rate": 3.292257340001638e-06, "loss": 0.579, "step": 20315 }, { "epoch": 0.6226553880102978, "grad_norm": 1.3050296674535222, "learning_rate": 3.29179087707781e-06, "loss": 0.6068, "step": 20316 }, { "epoch": 0.6226860365330391, "grad_norm": 0.4475748159604138, "learning_rate": 3.291324430985405e-06, "loss": 0.3733, "step": 20317 }, { "epoch": 0.6227166850557803, "grad_norm": 1.2101521379148044, "learning_rate": 3.2908580017290185e-06, "loss": 0.6258, "step": 20318 }, { "epoch": 0.6227473335785215, "grad_norm": 1.2857436478092361, "learning_rate": 3.2903915893132423e-06, "loss": 0.5947, "step": 20319 }, { "epoch": 0.6227779821012627, "grad_norm": 1.2578228591964298, "learning_rate": 3.2899251937426783e-06, "loss": 0.5597, "step": 20320 }, { "epoch": 0.6228086306240039, "grad_norm": 1.1843125337024127, "learning_rate": 3.289458815021916e-06, "loss": 0.5747, "step": 20321 }, { "epoch": 0.6228392791467451, "grad_norm": 1.345333832923595, "learning_rate": 3.288992453155556e-06, "loss": 0.62, "step": 20322 }, { "epoch": 0.6228699276694863, "grad_norm": 1.3322137163965018, "learning_rate": 3.288526108148191e-06, "loss": 0.6341, "step": 20323 }, { "epoch": 0.6229005761922275, "grad_norm": 1.1748458268306523, "learning_rate": 3.2880597800044144e-06, "loss": 0.5426, "step": 20324 }, { "epoch": 0.6229312247149688, "grad_norm": 1.3969492986249048, "learning_rate": 3.2875934687288245e-06, "loss": 0.7075, "step": 20325 }, { "epoch": 0.6229618732377099, "grad_norm": 0.4708998691966644, "learning_rate": 3.287127174326014e-06, "loss": 0.4186, "step": 20326 }, { "epoch": 0.6229925217604512, "grad_norm": 1.289704678381298, "learning_rate": 3.286660896800577e-06, "loss": 0.6483, "step": 20327 }, { "epoch": 0.6230231702831923, "grad_norm": 1.3121041873797021, "learning_rate": 3.2861946361571094e-06, "loss": 0.5321, "step": 20328 }, { "epoch": 0.6230538188059336, "grad_norm": 1.1936106418249082, "learning_rate": 3.2857283924002055e-06, "loss": 0.6389, "step": 20329 }, { "epoch": 0.6230844673286747, "grad_norm": 1.3149828143167186, "learning_rate": 3.285262165534456e-06, "loss": 0.6091, "step": 20330 }, { "epoch": 0.623115115851416, "grad_norm": 0.5237416964953623, "learning_rate": 3.2847959555644582e-06, "loss": 0.4201, "step": 20331 }, { "epoch": 0.6231457643741571, "grad_norm": 1.5179592937502067, "learning_rate": 3.284329762494804e-06, "loss": 0.653, "step": 20332 }, { "epoch": 0.6231764128968984, "grad_norm": 0.47287990313781453, "learning_rate": 3.283863586330088e-06, "loss": 0.4142, "step": 20333 }, { "epoch": 0.6232070614196396, "grad_norm": 0.4675553279085399, "learning_rate": 3.2833974270749047e-06, "loss": 0.4142, "step": 20334 }, { "epoch": 0.6232377099423808, "grad_norm": 1.1412224448505186, "learning_rate": 3.2829312847338434e-06, "loss": 0.5157, "step": 20335 }, { "epoch": 0.623268358465122, "grad_norm": 1.3543912651362722, "learning_rate": 3.282465159311501e-06, "loss": 0.6397, "step": 20336 }, { "epoch": 0.6232990069878632, "grad_norm": 1.3687873204860495, "learning_rate": 3.28199905081247e-06, "loss": 0.6449, "step": 20337 }, { "epoch": 0.6233296555106044, "grad_norm": 1.3067346157959538, "learning_rate": 3.281532959241338e-06, "loss": 0.5351, "step": 20338 }, { "epoch": 0.6233603040333456, "grad_norm": 1.1609222223623266, "learning_rate": 3.281066884602705e-06, "loss": 0.6045, "step": 20339 }, { "epoch": 0.6233909525560868, "grad_norm": 1.2494020654099927, "learning_rate": 3.280600826901157e-06, "loss": 0.6079, "step": 20340 }, { "epoch": 0.623421601078828, "grad_norm": 1.2096580722649994, "learning_rate": 3.280134786141292e-06, "loss": 0.6537, "step": 20341 }, { "epoch": 0.6234522496015692, "grad_norm": 1.2265839296086785, "learning_rate": 3.279668762327698e-06, "loss": 0.6673, "step": 20342 }, { "epoch": 0.6234828981243105, "grad_norm": 1.1553037857574977, "learning_rate": 3.2792027554649663e-06, "loss": 0.5996, "step": 20343 }, { "epoch": 0.6235135466470516, "grad_norm": 1.4214011721115327, "learning_rate": 3.278736765557692e-06, "loss": 0.5994, "step": 20344 }, { "epoch": 0.6235441951697929, "grad_norm": 1.4012733448481243, "learning_rate": 3.278270792610464e-06, "loss": 0.657, "step": 20345 }, { "epoch": 0.623574843692534, "grad_norm": 1.2460846676612367, "learning_rate": 3.2778048366278737e-06, "loss": 0.604, "step": 20346 }, { "epoch": 0.6236054922152752, "grad_norm": 1.1905803202344445, "learning_rate": 3.277338897614514e-06, "loss": 0.5925, "step": 20347 }, { "epoch": 0.6236361407380164, "grad_norm": 1.184069001166981, "learning_rate": 3.2768729755749734e-06, "loss": 0.5991, "step": 20348 }, { "epoch": 0.6236667892607576, "grad_norm": 0.4578557651471463, "learning_rate": 3.2764070705138463e-06, "loss": 0.4025, "step": 20349 }, { "epoch": 0.6236974377834988, "grad_norm": 1.1690905023866185, "learning_rate": 3.2759411824357213e-06, "loss": 0.4907, "step": 20350 }, { "epoch": 0.62372808630624, "grad_norm": 1.425525363624059, "learning_rate": 3.2754753113451864e-06, "loss": 0.667, "step": 20351 }, { "epoch": 0.6237587348289813, "grad_norm": 1.1693905170944274, "learning_rate": 3.275009457246837e-06, "loss": 0.5445, "step": 20352 }, { "epoch": 0.6237893833517224, "grad_norm": 0.44942764334630414, "learning_rate": 3.2745436201452606e-06, "loss": 0.3979, "step": 20353 }, { "epoch": 0.6238200318744637, "grad_norm": 1.1926719328143247, "learning_rate": 3.274077800045046e-06, "loss": 0.6274, "step": 20354 }, { "epoch": 0.6238506803972048, "grad_norm": 1.3551281226766427, "learning_rate": 3.2736119969507858e-06, "loss": 0.62, "step": 20355 }, { "epoch": 0.6238813289199461, "grad_norm": 1.2060026381595297, "learning_rate": 3.2731462108670676e-06, "loss": 0.6113, "step": 20356 }, { "epoch": 0.6239119774426872, "grad_norm": 1.1905043765231667, "learning_rate": 3.2726804417984816e-06, "loss": 0.7042, "step": 20357 }, { "epoch": 0.6239426259654285, "grad_norm": 0.4580182720395241, "learning_rate": 3.272214689749618e-06, "loss": 0.3895, "step": 20358 }, { "epoch": 0.6239732744881696, "grad_norm": 1.4761341579695768, "learning_rate": 3.271748954725063e-06, "loss": 0.7172, "step": 20359 }, { "epoch": 0.6240039230109109, "grad_norm": 1.3496089053426727, "learning_rate": 3.2712832367294094e-06, "loss": 0.7124, "step": 20360 }, { "epoch": 0.624034571533652, "grad_norm": 1.3819530549765937, "learning_rate": 3.2708175357672457e-06, "loss": 0.607, "step": 20361 }, { "epoch": 0.6240652200563933, "grad_norm": 1.2994938994951766, "learning_rate": 3.2703518518431552e-06, "loss": 0.6159, "step": 20362 }, { "epoch": 0.6240958685791345, "grad_norm": 1.4486577775741947, "learning_rate": 3.269886184961735e-06, "loss": 0.728, "step": 20363 }, { "epoch": 0.6241265171018757, "grad_norm": 1.3348314706893956, "learning_rate": 3.2694205351275666e-06, "loss": 0.6209, "step": 20364 }, { "epoch": 0.6241571656246169, "grad_norm": 1.1360545729104348, "learning_rate": 3.2689549023452405e-06, "loss": 0.7234, "step": 20365 }, { "epoch": 0.6241878141473581, "grad_norm": 1.2751468709589027, "learning_rate": 3.268489286619345e-06, "loss": 0.637, "step": 20366 }, { "epoch": 0.6242184626700993, "grad_norm": 1.2345746722617936, "learning_rate": 3.2680236879544667e-06, "loss": 0.559, "step": 20367 }, { "epoch": 0.6242491111928405, "grad_norm": 1.1761703581701308, "learning_rate": 3.2675581063551954e-06, "loss": 0.618, "step": 20368 }, { "epoch": 0.6242797597155817, "grad_norm": 1.3105655594306664, "learning_rate": 3.2670925418261167e-06, "loss": 0.6027, "step": 20369 }, { "epoch": 0.624310408238323, "grad_norm": 1.4363383184082836, "learning_rate": 3.2666269943718175e-06, "loss": 0.6055, "step": 20370 }, { "epoch": 0.6243410567610641, "grad_norm": 1.1795384570304681, "learning_rate": 3.266161463996888e-06, "loss": 0.6437, "step": 20371 }, { "epoch": 0.6243717052838054, "grad_norm": 1.3482047984103125, "learning_rate": 3.2656959507059137e-06, "loss": 0.6814, "step": 20372 }, { "epoch": 0.6244023538065465, "grad_norm": 1.3310940364294621, "learning_rate": 3.265230454503478e-06, "loss": 0.6335, "step": 20373 }, { "epoch": 0.6244330023292878, "grad_norm": 1.3401964896492493, "learning_rate": 3.2647649753941733e-06, "loss": 0.4757, "step": 20374 }, { "epoch": 0.6244636508520289, "grad_norm": 1.3327501012785719, "learning_rate": 3.2642995133825815e-06, "loss": 0.7446, "step": 20375 }, { "epoch": 0.6244942993747702, "grad_norm": 1.262305283415996, "learning_rate": 3.263834068473292e-06, "loss": 0.6247, "step": 20376 }, { "epoch": 0.6245249478975113, "grad_norm": 1.2797357504773434, "learning_rate": 3.2633686406708888e-06, "loss": 0.6075, "step": 20377 }, { "epoch": 0.6245555964202525, "grad_norm": 1.1773632315250186, "learning_rate": 3.2629032299799577e-06, "loss": 0.5819, "step": 20378 }, { "epoch": 0.6245862449429938, "grad_norm": 1.323679890595848, "learning_rate": 3.262437836405088e-06, "loss": 0.6189, "step": 20379 }, { "epoch": 0.6246168934657349, "grad_norm": 1.2446262289302499, "learning_rate": 3.261972459950862e-06, "loss": 0.6204, "step": 20380 }, { "epoch": 0.6246475419884762, "grad_norm": 1.2722040170848319, "learning_rate": 3.2615071006218644e-06, "loss": 0.5818, "step": 20381 }, { "epoch": 0.6246781905112173, "grad_norm": 1.2022496030819934, "learning_rate": 3.261041758422685e-06, "loss": 0.5535, "step": 20382 }, { "epoch": 0.6247088390339586, "grad_norm": 1.3813317254791198, "learning_rate": 3.260576433357905e-06, "loss": 0.628, "step": 20383 }, { "epoch": 0.6247394875566997, "grad_norm": 1.2202728014543436, "learning_rate": 3.2601111254321083e-06, "loss": 0.5852, "step": 20384 }, { "epoch": 0.624770136079441, "grad_norm": 1.354277485162392, "learning_rate": 3.2596458346498836e-06, "loss": 0.6309, "step": 20385 }, { "epoch": 0.6248007846021821, "grad_norm": 1.410918509442431, "learning_rate": 3.2591805610158134e-06, "loss": 0.552, "step": 20386 }, { "epoch": 0.6248314331249234, "grad_norm": 1.2675366065121314, "learning_rate": 3.258715304534483e-06, "loss": 0.5727, "step": 20387 }, { "epoch": 0.6248620816476645, "grad_norm": 0.4416393499288494, "learning_rate": 3.2582500652104765e-06, "loss": 0.3991, "step": 20388 }, { "epoch": 0.6248927301704058, "grad_norm": 1.3717156116626605, "learning_rate": 3.2577848430483767e-06, "loss": 0.677, "step": 20389 }, { "epoch": 0.624923378693147, "grad_norm": 1.2491473330220113, "learning_rate": 3.2573196380527693e-06, "loss": 0.5522, "step": 20390 }, { "epoch": 0.6249540272158882, "grad_norm": 1.212005094773451, "learning_rate": 3.2568544502282384e-06, "loss": 0.5935, "step": 20391 }, { "epoch": 0.6249846757386294, "grad_norm": 0.45055937264882184, "learning_rate": 3.256389279579364e-06, "loss": 0.4042, "step": 20392 }, { "epoch": 0.6250153242613706, "grad_norm": 1.2602553226895428, "learning_rate": 3.255924126110735e-06, "loss": 0.5719, "step": 20393 }, { "epoch": 0.6250459727841118, "grad_norm": 0.4644945838389278, "learning_rate": 3.2554589898269284e-06, "loss": 0.4113, "step": 20394 }, { "epoch": 0.625076621306853, "grad_norm": 1.3469914884697687, "learning_rate": 3.2549938707325346e-06, "loss": 0.6244, "step": 20395 }, { "epoch": 0.6251072698295942, "grad_norm": 0.44521236235242106, "learning_rate": 3.2545287688321308e-06, "loss": 0.3905, "step": 20396 }, { "epoch": 0.6251379183523355, "grad_norm": 1.2034344478405492, "learning_rate": 3.2540636841303006e-06, "loss": 0.6882, "step": 20397 }, { "epoch": 0.6251685668750766, "grad_norm": 0.45764547012897816, "learning_rate": 3.2535986166316292e-06, "loss": 0.401, "step": 20398 }, { "epoch": 0.6251992153978179, "grad_norm": 1.4412508232455068, "learning_rate": 3.2531335663406976e-06, "loss": 0.6494, "step": 20399 }, { "epoch": 0.625229863920559, "grad_norm": 0.4488262522761873, "learning_rate": 3.2526685332620867e-06, "loss": 0.4107, "step": 20400 }, { "epoch": 0.6252605124433003, "grad_norm": 1.13151887062433, "learning_rate": 3.252203517400381e-06, "loss": 0.5764, "step": 20401 }, { "epoch": 0.6252911609660414, "grad_norm": 1.39693920369473, "learning_rate": 3.251738518760161e-06, "loss": 0.6933, "step": 20402 }, { "epoch": 0.6253218094887827, "grad_norm": 1.3170634986744778, "learning_rate": 3.2512735373460068e-06, "loss": 0.6923, "step": 20403 }, { "epoch": 0.6253524580115238, "grad_norm": 0.43488248086628145, "learning_rate": 3.250808573162505e-06, "loss": 0.4012, "step": 20404 }, { "epoch": 0.6253831065342651, "grad_norm": 1.5526174818893441, "learning_rate": 3.250343626214231e-06, "loss": 0.6871, "step": 20405 }, { "epoch": 0.6254137550570062, "grad_norm": 1.2691551961316794, "learning_rate": 3.2498786965057716e-06, "loss": 0.6316, "step": 20406 }, { "epoch": 0.6254444035797475, "grad_norm": 1.2307823798875874, "learning_rate": 3.249413784041704e-06, "loss": 0.7059, "step": 20407 }, { "epoch": 0.6254750521024887, "grad_norm": 1.156756236640305, "learning_rate": 3.248948888826609e-06, "loss": 0.6363, "step": 20408 }, { "epoch": 0.6255057006252298, "grad_norm": 1.2409060312974043, "learning_rate": 3.2484840108650706e-06, "loss": 0.6805, "step": 20409 }, { "epoch": 0.6255363491479711, "grad_norm": 1.3016801185822893, "learning_rate": 3.2480191501616663e-06, "loss": 0.6294, "step": 20410 }, { "epoch": 0.6255669976707122, "grad_norm": 1.3861412038851193, "learning_rate": 3.2475543067209768e-06, "loss": 0.6389, "step": 20411 }, { "epoch": 0.6255976461934535, "grad_norm": 1.231260729189397, "learning_rate": 3.247089480547585e-06, "loss": 0.6474, "step": 20412 }, { "epoch": 0.6256282947161946, "grad_norm": 1.3657834959304949, "learning_rate": 3.246624671646067e-06, "loss": 0.5705, "step": 20413 }, { "epoch": 0.6256589432389359, "grad_norm": 1.3601878865632748, "learning_rate": 3.2461598800210065e-06, "loss": 0.6063, "step": 20414 }, { "epoch": 0.625689591761677, "grad_norm": 0.46052001085850375, "learning_rate": 3.245695105676982e-06, "loss": 0.4197, "step": 20415 }, { "epoch": 0.6257202402844183, "grad_norm": 1.1292804692050693, "learning_rate": 3.2452303486185698e-06, "loss": 0.5659, "step": 20416 }, { "epoch": 0.6257508888071595, "grad_norm": 1.4136486768755159, "learning_rate": 3.244765608850354e-06, "loss": 0.6847, "step": 20417 }, { "epoch": 0.6257815373299007, "grad_norm": 1.3051894069877765, "learning_rate": 3.244300886376912e-06, "loss": 0.6435, "step": 20418 }, { "epoch": 0.6258121858526419, "grad_norm": 1.4824253743840718, "learning_rate": 3.2438361812028212e-06, "loss": 0.5471, "step": 20419 }, { "epoch": 0.6258428343753831, "grad_norm": 1.2504408111142429, "learning_rate": 3.243371493332663e-06, "loss": 0.6115, "step": 20420 }, { "epoch": 0.6258734828981243, "grad_norm": 1.3137934129328837, "learning_rate": 3.2429068227710137e-06, "loss": 0.5951, "step": 20421 }, { "epoch": 0.6259041314208655, "grad_norm": 1.2469064201227444, "learning_rate": 3.2424421695224538e-06, "loss": 0.6271, "step": 20422 }, { "epoch": 0.6259347799436067, "grad_norm": 1.3411548475469979, "learning_rate": 3.241977533591561e-06, "loss": 0.5784, "step": 20423 }, { "epoch": 0.625965428466348, "grad_norm": 1.2021137104199522, "learning_rate": 3.241512914982913e-06, "loss": 0.5772, "step": 20424 }, { "epoch": 0.6259960769890891, "grad_norm": 1.233823168059729, "learning_rate": 3.2410483137010885e-06, "loss": 0.5775, "step": 20425 }, { "epoch": 0.6260267255118304, "grad_norm": 1.2515676836707497, "learning_rate": 3.240583729750666e-06, "loss": 0.5978, "step": 20426 }, { "epoch": 0.6260573740345715, "grad_norm": 1.2518693065819257, "learning_rate": 3.24011916313622e-06, "loss": 0.6081, "step": 20427 }, { "epoch": 0.6260880225573128, "grad_norm": 1.326234399025854, "learning_rate": 3.2396546138623313e-06, "loss": 0.6441, "step": 20428 }, { "epoch": 0.6261186710800539, "grad_norm": 1.374058993049367, "learning_rate": 3.2391900819335766e-06, "loss": 0.6426, "step": 20429 }, { "epoch": 0.6261493196027952, "grad_norm": 1.3380853680713243, "learning_rate": 3.2387255673545317e-06, "loss": 0.657, "step": 20430 }, { "epoch": 0.6261799681255363, "grad_norm": 1.3124035535979865, "learning_rate": 3.2382610701297743e-06, "loss": 0.6535, "step": 20431 }, { "epoch": 0.6262106166482776, "grad_norm": 1.2964918299398043, "learning_rate": 3.2377965902638807e-06, "loss": 0.6295, "step": 20432 }, { "epoch": 0.6262412651710187, "grad_norm": 1.3379878384387216, "learning_rate": 3.23733212776143e-06, "loss": 0.6131, "step": 20433 }, { "epoch": 0.62627191369376, "grad_norm": 1.247820679263499, "learning_rate": 3.2368676826269972e-06, "loss": 0.6037, "step": 20434 }, { "epoch": 0.6263025622165012, "grad_norm": 1.620319346373014, "learning_rate": 3.2364032548651554e-06, "loss": 0.5848, "step": 20435 }, { "epoch": 0.6263332107392424, "grad_norm": 1.2962581158787672, "learning_rate": 3.2359388444804863e-06, "loss": 0.676, "step": 20436 }, { "epoch": 0.6263638592619836, "grad_norm": 1.4808872485856726, "learning_rate": 3.2354744514775626e-06, "loss": 0.6131, "step": 20437 }, { "epoch": 0.6263945077847248, "grad_norm": 1.243019942214485, "learning_rate": 3.235010075860959e-06, "loss": 0.5679, "step": 20438 }, { "epoch": 0.626425156307466, "grad_norm": 1.4467017293588142, "learning_rate": 3.2345457176352546e-06, "loss": 0.6019, "step": 20439 }, { "epoch": 0.6264558048302071, "grad_norm": 1.2433435819922933, "learning_rate": 3.2340813768050213e-06, "loss": 0.6457, "step": 20440 }, { "epoch": 0.6264864533529484, "grad_norm": 1.3596482699923187, "learning_rate": 3.233617053374837e-06, "loss": 0.6543, "step": 20441 }, { "epoch": 0.6265171018756895, "grad_norm": 0.44795639810962024, "learning_rate": 3.233152747349276e-06, "loss": 0.3967, "step": 20442 }, { "epoch": 0.6265477503984308, "grad_norm": 1.351367445471061, "learning_rate": 3.232688458732912e-06, "loss": 0.5472, "step": 20443 }, { "epoch": 0.626578398921172, "grad_norm": 1.2402389388493462, "learning_rate": 3.2322241875303217e-06, "loss": 0.666, "step": 20444 }, { "epoch": 0.6266090474439132, "grad_norm": 1.334132313199907, "learning_rate": 3.23175993374608e-06, "loss": 0.6479, "step": 20445 }, { "epoch": 0.6266396959666544, "grad_norm": 1.2064861191566572, "learning_rate": 3.231295697384757e-06, "loss": 0.5691, "step": 20446 }, { "epoch": 0.6266703444893956, "grad_norm": 1.383018392528811, "learning_rate": 3.2308314784509333e-06, "loss": 0.6968, "step": 20447 }, { "epoch": 0.6267009930121368, "grad_norm": 1.2492566215101621, "learning_rate": 3.230367276949176e-06, "loss": 0.5197, "step": 20448 }, { "epoch": 0.626731641534878, "grad_norm": 1.2866276078808305, "learning_rate": 3.2299030928840665e-06, "loss": 0.6185, "step": 20449 }, { "epoch": 0.6267622900576192, "grad_norm": 1.3796935042715759, "learning_rate": 3.2294389262601733e-06, "loss": 0.646, "step": 20450 }, { "epoch": 0.6267929385803604, "grad_norm": 1.3314382001963834, "learning_rate": 3.22897477708207e-06, "loss": 0.6316, "step": 20451 }, { "epoch": 0.6268235871031016, "grad_norm": 1.4874226841285045, "learning_rate": 3.228510645354333e-06, "loss": 0.6431, "step": 20452 }, { "epoch": 0.6268542356258429, "grad_norm": 1.4209213722453962, "learning_rate": 3.2280465310815335e-06, "loss": 0.6779, "step": 20453 }, { "epoch": 0.626884884148584, "grad_norm": 1.2785477607793423, "learning_rate": 3.227582434268244e-06, "loss": 0.6545, "step": 20454 }, { "epoch": 0.6269155326713253, "grad_norm": 1.350750432222839, "learning_rate": 3.22711835491904e-06, "loss": 0.5542, "step": 20455 }, { "epoch": 0.6269461811940664, "grad_norm": 1.351913647329936, "learning_rate": 3.2266542930384926e-06, "loss": 0.5638, "step": 20456 }, { "epoch": 0.6269768297168077, "grad_norm": 1.212279771491504, "learning_rate": 3.226190248631171e-06, "loss": 0.5837, "step": 20457 }, { "epoch": 0.6270074782395488, "grad_norm": 1.228627901581851, "learning_rate": 3.2257262217016546e-06, "loss": 0.6209, "step": 20458 }, { "epoch": 0.6270381267622901, "grad_norm": 0.44252051090968847, "learning_rate": 3.2252622122545076e-06, "loss": 0.3996, "step": 20459 }, { "epoch": 0.6270687752850312, "grad_norm": 1.3446341843750274, "learning_rate": 3.2247982202943096e-06, "loss": 0.6265, "step": 20460 }, { "epoch": 0.6270994238077725, "grad_norm": 1.603422687070326, "learning_rate": 3.2243342458256287e-06, "loss": 0.6981, "step": 20461 }, { "epoch": 0.6271300723305137, "grad_norm": 1.322049418703115, "learning_rate": 3.223870288853035e-06, "loss": 0.5868, "step": 20462 }, { "epoch": 0.6271607208532549, "grad_norm": 1.3435999497330355, "learning_rate": 3.223406349381103e-06, "loss": 0.6788, "step": 20463 }, { "epoch": 0.6271913693759961, "grad_norm": 1.2607012439072582, "learning_rate": 3.2229424274144028e-06, "loss": 0.5276, "step": 20464 }, { "epoch": 0.6272220178987373, "grad_norm": 1.4954289726124237, "learning_rate": 3.222478522957504e-06, "loss": 0.6006, "step": 20465 }, { "epoch": 0.6272526664214785, "grad_norm": 1.4068557714005685, "learning_rate": 3.2220146360149806e-06, "loss": 0.6747, "step": 20466 }, { "epoch": 0.6272833149442197, "grad_norm": 0.46221790936786644, "learning_rate": 3.2215507665914015e-06, "loss": 0.3983, "step": 20467 }, { "epoch": 0.6273139634669609, "grad_norm": 1.253137698947786, "learning_rate": 3.2210869146913374e-06, "loss": 0.7338, "step": 20468 }, { "epoch": 0.6273446119897022, "grad_norm": 1.2542612134807105, "learning_rate": 3.220623080319361e-06, "loss": 0.6293, "step": 20469 }, { "epoch": 0.6273752605124433, "grad_norm": 1.4084956569476264, "learning_rate": 3.2201592634800375e-06, "loss": 0.6716, "step": 20470 }, { "epoch": 0.6274059090351845, "grad_norm": 1.4349971524709344, "learning_rate": 3.2196954641779433e-06, "loss": 0.649, "step": 20471 }, { "epoch": 0.6274365575579257, "grad_norm": 1.2819239212596807, "learning_rate": 3.219231682417644e-06, "loss": 0.679, "step": 20472 }, { "epoch": 0.6274672060806669, "grad_norm": 1.249042812856191, "learning_rate": 3.2187679182037096e-06, "loss": 0.6649, "step": 20473 }, { "epoch": 0.6274978546034081, "grad_norm": 1.3215001402591124, "learning_rate": 3.2183041715407117e-06, "loss": 0.6165, "step": 20474 }, { "epoch": 0.6275285031261493, "grad_norm": 1.3159888490373595, "learning_rate": 3.217840442433218e-06, "loss": 0.6349, "step": 20475 }, { "epoch": 0.6275591516488905, "grad_norm": 5.022259032397376, "learning_rate": 3.2173767308857982e-06, "loss": 0.6223, "step": 20476 }, { "epoch": 0.6275898001716317, "grad_norm": 1.1573815853453617, "learning_rate": 3.2169130369030234e-06, "loss": 0.5898, "step": 20477 }, { "epoch": 0.627620448694373, "grad_norm": 1.961461321840798, "learning_rate": 3.216449360489458e-06, "loss": 0.6728, "step": 20478 }, { "epoch": 0.6276510972171141, "grad_norm": 1.138934682959572, "learning_rate": 3.2159857016496763e-06, "loss": 0.6167, "step": 20479 }, { "epoch": 0.6276817457398554, "grad_norm": 1.196433976197821, "learning_rate": 3.215522060388243e-06, "loss": 0.5607, "step": 20480 }, { "epoch": 0.6277123942625965, "grad_norm": 0.4559557126061173, "learning_rate": 3.215058436709726e-06, "loss": 0.4148, "step": 20481 }, { "epoch": 0.6277430427853378, "grad_norm": 1.2182255382184692, "learning_rate": 3.214594830618696e-06, "loss": 0.6385, "step": 20482 }, { "epoch": 0.6277736913080789, "grad_norm": 1.3833443022498346, "learning_rate": 3.21413124211972e-06, "loss": 0.7815, "step": 20483 }, { "epoch": 0.6278043398308202, "grad_norm": 1.3684140071370392, "learning_rate": 3.2136676712173647e-06, "loss": 0.638, "step": 20484 }, { "epoch": 0.6278349883535613, "grad_norm": 1.3284473828255967, "learning_rate": 3.2132041179162e-06, "loss": 0.6587, "step": 20485 }, { "epoch": 0.6278656368763026, "grad_norm": 1.2926850246116204, "learning_rate": 3.212740582220791e-06, "loss": 0.5122, "step": 20486 }, { "epoch": 0.6278962853990437, "grad_norm": 1.1838873381384918, "learning_rate": 3.212277064135708e-06, "loss": 0.6639, "step": 20487 }, { "epoch": 0.627926933921785, "grad_norm": 1.3391708964651092, "learning_rate": 3.211813563665517e-06, "loss": 0.6612, "step": 20488 }, { "epoch": 0.6279575824445262, "grad_norm": 0.4654239636803114, "learning_rate": 3.2113500808147814e-06, "loss": 0.4267, "step": 20489 }, { "epoch": 0.6279882309672674, "grad_norm": 1.3412874047606291, "learning_rate": 3.2108866155880745e-06, "loss": 0.6611, "step": 20490 }, { "epoch": 0.6280188794900086, "grad_norm": 1.2521462701784971, "learning_rate": 3.2104231679899584e-06, "loss": 0.6218, "step": 20491 }, { "epoch": 0.6280495280127498, "grad_norm": 1.3100590407587913, "learning_rate": 3.2099597380249998e-06, "loss": 0.6331, "step": 20492 }, { "epoch": 0.628080176535491, "grad_norm": 1.1766411326791566, "learning_rate": 3.2094963256977663e-06, "loss": 0.6304, "step": 20493 }, { "epoch": 0.6281108250582322, "grad_norm": 0.4480700055320228, "learning_rate": 3.209032931012823e-06, "loss": 0.4097, "step": 20494 }, { "epoch": 0.6281414735809734, "grad_norm": 1.5213790264875748, "learning_rate": 3.208569553974738e-06, "loss": 0.5774, "step": 20495 }, { "epoch": 0.6281721221037146, "grad_norm": 1.106667591740599, "learning_rate": 3.2081061945880756e-06, "loss": 0.5356, "step": 20496 }, { "epoch": 0.6282027706264558, "grad_norm": 1.2297045794945272, "learning_rate": 3.207642852857399e-06, "loss": 0.6617, "step": 20497 }, { "epoch": 0.6282334191491971, "grad_norm": 1.1636304593844569, "learning_rate": 3.207179528787278e-06, "loss": 0.5571, "step": 20498 }, { "epoch": 0.6282640676719382, "grad_norm": 1.2899772348379037, "learning_rate": 3.206716222382277e-06, "loss": 0.652, "step": 20499 }, { "epoch": 0.6282947161946795, "grad_norm": 1.2009139578902, "learning_rate": 3.206252933646956e-06, "loss": 0.5497, "step": 20500 }, { "epoch": 0.6283253647174206, "grad_norm": 1.2738075343541144, "learning_rate": 3.2057896625858875e-06, "loss": 0.5985, "step": 20501 }, { "epoch": 0.6283560132401618, "grad_norm": 1.115822286751878, "learning_rate": 3.2053264092036297e-06, "loss": 0.547, "step": 20502 }, { "epoch": 0.628386661762903, "grad_norm": 0.45231449640988564, "learning_rate": 3.204863173504752e-06, "loss": 0.4034, "step": 20503 }, { "epoch": 0.6284173102856442, "grad_norm": 1.4715252424122707, "learning_rate": 3.2043999554938165e-06, "loss": 0.6286, "step": 20504 }, { "epoch": 0.6284479588083854, "grad_norm": 1.2294222830289494, "learning_rate": 3.203936755175386e-06, "loss": 0.6114, "step": 20505 }, { "epoch": 0.6284786073311266, "grad_norm": 1.2992913112192581, "learning_rate": 3.2034735725540283e-06, "loss": 0.6439, "step": 20506 }, { "epoch": 0.6285092558538679, "grad_norm": 0.44373692963356115, "learning_rate": 3.203010407634305e-06, "loss": 0.4106, "step": 20507 }, { "epoch": 0.628539904376609, "grad_norm": 1.3876868598731573, "learning_rate": 3.202547260420778e-06, "loss": 0.611, "step": 20508 }, { "epoch": 0.6285705528993503, "grad_norm": 1.2216498499833306, "learning_rate": 3.202084130918014e-06, "loss": 0.5578, "step": 20509 }, { "epoch": 0.6286012014220914, "grad_norm": 1.4061327655849583, "learning_rate": 3.201621019130576e-06, "loss": 0.7073, "step": 20510 }, { "epoch": 0.6286318499448327, "grad_norm": 1.3419864275678046, "learning_rate": 3.2011579250630244e-06, "loss": 0.6501, "step": 20511 }, { "epoch": 0.6286624984675738, "grad_norm": 1.13154976131728, "learning_rate": 3.2006948487199264e-06, "loss": 0.6034, "step": 20512 }, { "epoch": 0.6286931469903151, "grad_norm": 0.43998779968505963, "learning_rate": 3.2002317901058387e-06, "loss": 0.4165, "step": 20513 }, { "epoch": 0.6287237955130562, "grad_norm": 1.2671874666186855, "learning_rate": 3.199768749225331e-06, "loss": 0.6319, "step": 20514 }, { "epoch": 0.6287544440357975, "grad_norm": 1.2942445449430333, "learning_rate": 3.199305726082962e-06, "loss": 0.6506, "step": 20515 }, { "epoch": 0.6287850925585386, "grad_norm": 1.2912709286156843, "learning_rate": 3.1988427206832927e-06, "loss": 0.685, "step": 20516 }, { "epoch": 0.6288157410812799, "grad_norm": 1.2003917081482354, "learning_rate": 3.1983797330308886e-06, "loss": 0.6144, "step": 20517 }, { "epoch": 0.6288463896040211, "grad_norm": 1.269183066951593, "learning_rate": 3.1979167631303087e-06, "loss": 0.6548, "step": 20518 }, { "epoch": 0.6288770381267623, "grad_norm": 1.332490372152096, "learning_rate": 3.1974538109861164e-06, "loss": 0.6512, "step": 20519 }, { "epoch": 0.6289076866495035, "grad_norm": 1.2716384678346802, "learning_rate": 3.1969908766028736e-06, "loss": 0.6269, "step": 20520 }, { "epoch": 0.6289383351722447, "grad_norm": 1.13344274465052, "learning_rate": 3.1965279599851397e-06, "loss": 0.6231, "step": 20521 }, { "epoch": 0.6289689836949859, "grad_norm": 1.2648800163969984, "learning_rate": 3.1960650611374777e-06, "loss": 0.5883, "step": 20522 }, { "epoch": 0.6289996322177271, "grad_norm": 1.324544986357328, "learning_rate": 3.1956021800644497e-06, "loss": 0.5986, "step": 20523 }, { "epoch": 0.6290302807404683, "grad_norm": 0.4378818594426972, "learning_rate": 3.1951393167706137e-06, "loss": 0.4043, "step": 20524 }, { "epoch": 0.6290609292632096, "grad_norm": 1.2793661003224774, "learning_rate": 3.194676471260533e-06, "loss": 0.5971, "step": 20525 }, { "epoch": 0.6290915777859507, "grad_norm": 1.616346065925057, "learning_rate": 3.194213643538766e-06, "loss": 0.6894, "step": 20526 }, { "epoch": 0.629122226308692, "grad_norm": 1.1870040704830755, "learning_rate": 3.193750833609873e-06, "loss": 0.5731, "step": 20527 }, { "epoch": 0.6291528748314331, "grad_norm": 1.2690272410949959, "learning_rate": 3.193288041478416e-06, "loss": 0.6109, "step": 20528 }, { "epoch": 0.6291835233541744, "grad_norm": 1.3131448403972794, "learning_rate": 3.192825267148954e-06, "loss": 0.6283, "step": 20529 }, { "epoch": 0.6292141718769155, "grad_norm": 1.4796459765821592, "learning_rate": 3.1923625106260483e-06, "loss": 0.6543, "step": 20530 }, { "epoch": 0.6292448203996568, "grad_norm": 1.211212816827392, "learning_rate": 3.1918997719142573e-06, "loss": 0.5622, "step": 20531 }, { "epoch": 0.6292754689223979, "grad_norm": 0.4456789504758689, "learning_rate": 3.1914370510181382e-06, "loss": 0.3956, "step": 20532 }, { "epoch": 0.6293061174451391, "grad_norm": 1.3171251577014689, "learning_rate": 3.190974347942255e-06, "loss": 0.6408, "step": 20533 }, { "epoch": 0.6293367659678804, "grad_norm": 1.3136761557493941, "learning_rate": 3.1905116626911636e-06, "loss": 0.6022, "step": 20534 }, { "epoch": 0.6293674144906215, "grad_norm": 1.3774723406669636, "learning_rate": 3.1900489952694225e-06, "loss": 0.6555, "step": 20535 }, { "epoch": 0.6293980630133628, "grad_norm": 1.3545449973496777, "learning_rate": 3.1895863456815933e-06, "loss": 0.6526, "step": 20536 }, { "epoch": 0.6294287115361039, "grad_norm": 1.2857615308620338, "learning_rate": 3.189123713932233e-06, "loss": 0.6877, "step": 20537 }, { "epoch": 0.6294593600588452, "grad_norm": 0.4575463142785025, "learning_rate": 3.1886611000258984e-06, "loss": 0.3844, "step": 20538 }, { "epoch": 0.6294900085815863, "grad_norm": 1.3572873387744748, "learning_rate": 3.1881985039671515e-06, "loss": 0.7188, "step": 20539 }, { "epoch": 0.6295206571043276, "grad_norm": 1.2326389454471733, "learning_rate": 3.187735925760547e-06, "loss": 0.5972, "step": 20540 }, { "epoch": 0.6295513056270687, "grad_norm": 0.44615212309417307, "learning_rate": 3.1872733654106443e-06, "loss": 0.4307, "step": 20541 }, { "epoch": 0.62958195414981, "grad_norm": 1.3218998316847321, "learning_rate": 3.1868108229220024e-06, "loss": 0.6712, "step": 20542 }, { "epoch": 0.6296126026725511, "grad_norm": 1.3924738137380654, "learning_rate": 3.1863482982991745e-06, "loss": 0.6697, "step": 20543 }, { "epoch": 0.6296432511952924, "grad_norm": 1.3011028750005462, "learning_rate": 3.1858857915467234e-06, "loss": 0.7187, "step": 20544 }, { "epoch": 0.6296738997180336, "grad_norm": 1.3064860806837617, "learning_rate": 3.185423302669204e-06, "loss": 0.6226, "step": 20545 }, { "epoch": 0.6297045482407748, "grad_norm": 1.2739706756830171, "learning_rate": 3.184960831671171e-06, "loss": 0.4954, "step": 20546 }, { "epoch": 0.629735196763516, "grad_norm": 1.183549770600731, "learning_rate": 3.184498378557184e-06, "loss": 0.6008, "step": 20547 }, { "epoch": 0.6297658452862572, "grad_norm": 1.3873360925527267, "learning_rate": 3.1840359433317993e-06, "loss": 0.7285, "step": 20548 }, { "epoch": 0.6297964938089984, "grad_norm": 1.3525743425930663, "learning_rate": 3.1835735259995725e-06, "loss": 0.6142, "step": 20549 }, { "epoch": 0.6298271423317396, "grad_norm": 1.3678238640819484, "learning_rate": 3.1831111265650616e-06, "loss": 0.6844, "step": 20550 }, { "epoch": 0.6298577908544808, "grad_norm": 0.4564229421558446, "learning_rate": 3.1826487450328203e-06, "loss": 0.4163, "step": 20551 }, { "epoch": 0.629888439377222, "grad_norm": 1.2823387157291422, "learning_rate": 3.182186381407407e-06, "loss": 0.6737, "step": 20552 }, { "epoch": 0.6299190878999632, "grad_norm": 1.342712114113545, "learning_rate": 3.181724035693378e-06, "loss": 0.6058, "step": 20553 }, { "epoch": 0.6299497364227045, "grad_norm": 1.3227699226917762, "learning_rate": 3.1812617078952834e-06, "loss": 0.6824, "step": 20554 }, { "epoch": 0.6299803849454456, "grad_norm": 1.4486384393601004, "learning_rate": 3.1807993980176855e-06, "loss": 0.6585, "step": 20555 }, { "epoch": 0.6300110334681869, "grad_norm": 1.153637530024446, "learning_rate": 3.1803371060651343e-06, "loss": 0.5725, "step": 20556 }, { "epoch": 0.630041681990928, "grad_norm": 1.160032408143981, "learning_rate": 3.1798748320421895e-06, "loss": 0.6046, "step": 20557 }, { "epoch": 0.6300723305136693, "grad_norm": 1.6326566666154714, "learning_rate": 3.1794125759534033e-06, "loss": 0.5944, "step": 20558 }, { "epoch": 0.6301029790364104, "grad_norm": 1.2851679651566856, "learning_rate": 3.178950337803329e-06, "loss": 0.6269, "step": 20559 }, { "epoch": 0.6301336275591517, "grad_norm": 1.090082675172102, "learning_rate": 3.1784881175965248e-06, "loss": 0.6015, "step": 20560 }, { "epoch": 0.6301642760818928, "grad_norm": 1.4136838252350807, "learning_rate": 3.1780259153375426e-06, "loss": 0.6838, "step": 20561 }, { "epoch": 0.6301949246046341, "grad_norm": 1.2727577684027291, "learning_rate": 3.177563731030937e-06, "loss": 0.648, "step": 20562 }, { "epoch": 0.6302255731273753, "grad_norm": 1.27819872550204, "learning_rate": 3.1771015646812625e-06, "loss": 0.5844, "step": 20563 }, { "epoch": 0.6302562216501164, "grad_norm": 1.1382802231331275, "learning_rate": 3.1766394162930734e-06, "loss": 0.6138, "step": 20564 }, { "epoch": 0.6302868701728577, "grad_norm": 1.2231642851133897, "learning_rate": 3.1761772858709204e-06, "loss": 0.6361, "step": 20565 }, { "epoch": 0.6303175186955988, "grad_norm": 1.4075481314584428, "learning_rate": 3.1757151734193624e-06, "loss": 0.6731, "step": 20566 }, { "epoch": 0.6303481672183401, "grad_norm": 1.3402785593857938, "learning_rate": 3.175253078942947e-06, "loss": 0.6052, "step": 20567 }, { "epoch": 0.6303788157410812, "grad_norm": 1.4933642291127394, "learning_rate": 3.174791002446231e-06, "loss": 0.6264, "step": 20568 }, { "epoch": 0.6304094642638225, "grad_norm": 2.1924229487097753, "learning_rate": 3.1743289439337665e-06, "loss": 0.5217, "step": 20569 }, { "epoch": 0.6304401127865636, "grad_norm": 1.4265752462008576, "learning_rate": 3.173866903410105e-06, "loss": 0.6085, "step": 20570 }, { "epoch": 0.6304707613093049, "grad_norm": 0.4720768882217567, "learning_rate": 3.173404880879801e-06, "loss": 0.4114, "step": 20571 }, { "epoch": 0.6305014098320461, "grad_norm": 1.2556381249672766, "learning_rate": 3.1729428763474057e-06, "loss": 0.6216, "step": 20572 }, { "epoch": 0.6305320583547873, "grad_norm": 1.3585938763715526, "learning_rate": 3.1724808898174712e-06, "loss": 0.59, "step": 20573 }, { "epoch": 0.6305627068775285, "grad_norm": 1.391185534334668, "learning_rate": 3.1720189212945513e-06, "loss": 0.6195, "step": 20574 }, { "epoch": 0.6305933554002697, "grad_norm": 0.43245356619273484, "learning_rate": 3.171556970783195e-06, "loss": 0.4191, "step": 20575 }, { "epoch": 0.6306240039230109, "grad_norm": 1.3757546989295661, "learning_rate": 3.1710950382879586e-06, "loss": 0.6269, "step": 20576 }, { "epoch": 0.6306546524457521, "grad_norm": 1.4978375474239987, "learning_rate": 3.170633123813389e-06, "loss": 0.57, "step": 20577 }, { "epoch": 0.6306853009684933, "grad_norm": 1.3563890158738465, "learning_rate": 3.1701712273640383e-06, "loss": 0.6347, "step": 20578 }, { "epoch": 0.6307159494912346, "grad_norm": 0.44975283155088513, "learning_rate": 3.16970934894446e-06, "loss": 0.3945, "step": 20579 }, { "epoch": 0.6307465980139757, "grad_norm": 1.206176454803547, "learning_rate": 3.169247488559204e-06, "loss": 0.5576, "step": 20580 }, { "epoch": 0.630777246536717, "grad_norm": 1.2846145982123773, "learning_rate": 3.168785646212819e-06, "loss": 0.6928, "step": 20581 }, { "epoch": 0.6308078950594581, "grad_norm": 0.46912293770818203, "learning_rate": 3.1683238219098596e-06, "loss": 0.4141, "step": 20582 }, { "epoch": 0.6308385435821994, "grad_norm": 1.246212269983802, "learning_rate": 3.1678620156548744e-06, "loss": 0.525, "step": 20583 }, { "epoch": 0.6308691921049405, "grad_norm": 1.2818280061335623, "learning_rate": 3.167400227452411e-06, "loss": 0.6688, "step": 20584 }, { "epoch": 0.6308998406276818, "grad_norm": 1.523072640313471, "learning_rate": 3.166938457307025e-06, "loss": 0.6907, "step": 20585 }, { "epoch": 0.6309304891504229, "grad_norm": 1.517473752684105, "learning_rate": 3.1664767052232603e-06, "loss": 0.5967, "step": 20586 }, { "epoch": 0.6309611376731642, "grad_norm": 1.1320334641963385, "learning_rate": 3.1660149712056728e-06, "loss": 0.668, "step": 20587 }, { "epoch": 0.6309917861959053, "grad_norm": 1.467575085577793, "learning_rate": 3.1655532552588077e-06, "loss": 0.635, "step": 20588 }, { "epoch": 0.6310224347186466, "grad_norm": 1.3311646959333836, "learning_rate": 3.1650915573872154e-06, "loss": 0.5701, "step": 20589 }, { "epoch": 0.6310530832413878, "grad_norm": 1.124757679214316, "learning_rate": 3.164629877595446e-06, "loss": 0.5972, "step": 20590 }, { "epoch": 0.631083731764129, "grad_norm": 1.2027675024152475, "learning_rate": 3.164168215888048e-06, "loss": 0.6753, "step": 20591 }, { "epoch": 0.6311143802868702, "grad_norm": 1.1712503074484713, "learning_rate": 3.163706572269569e-06, "loss": 0.6287, "step": 20592 }, { "epoch": 0.6311450288096114, "grad_norm": 1.452414197983126, "learning_rate": 3.163244946744561e-06, "loss": 0.5602, "step": 20593 }, { "epoch": 0.6311756773323526, "grad_norm": 0.4636306442539759, "learning_rate": 3.162783339317569e-06, "loss": 0.4107, "step": 20594 }, { "epoch": 0.6312063258550937, "grad_norm": 1.2776283403260362, "learning_rate": 3.1623217499931434e-06, "loss": 0.594, "step": 20595 }, { "epoch": 0.631236974377835, "grad_norm": 1.4469477324616271, "learning_rate": 3.161860178775833e-06, "loss": 0.6644, "step": 20596 }, { "epoch": 0.6312676229005761, "grad_norm": 1.2005723148286622, "learning_rate": 3.161398625670182e-06, "loss": 0.664, "step": 20597 }, { "epoch": 0.6312982714233174, "grad_norm": 1.4149947606048006, "learning_rate": 3.1609370906807436e-06, "loss": 0.6238, "step": 20598 }, { "epoch": 0.6313289199460586, "grad_norm": 1.1126806815206518, "learning_rate": 3.1604755738120614e-06, "loss": 0.5882, "step": 20599 }, { "epoch": 0.6313595684687998, "grad_norm": 1.350335562663539, "learning_rate": 3.1600140750686827e-06, "loss": 0.6374, "step": 20600 }, { "epoch": 0.631390216991541, "grad_norm": 1.5578608135280454, "learning_rate": 3.1595525944551574e-06, "loss": 0.6928, "step": 20601 }, { "epoch": 0.6314208655142822, "grad_norm": 1.3467141517517371, "learning_rate": 3.159091131976031e-06, "loss": 0.602, "step": 20602 }, { "epoch": 0.6314515140370234, "grad_norm": 1.3484403914232603, "learning_rate": 3.1586296876358506e-06, "loss": 0.7472, "step": 20603 }, { "epoch": 0.6314821625597646, "grad_norm": 1.1801497347700016, "learning_rate": 3.1581682614391634e-06, "loss": 0.508, "step": 20604 }, { "epoch": 0.6315128110825058, "grad_norm": 1.4425787488227577, "learning_rate": 3.157706853390515e-06, "loss": 0.6206, "step": 20605 }, { "epoch": 0.631543459605247, "grad_norm": 1.1449557457103012, "learning_rate": 3.157245463494453e-06, "loss": 0.6315, "step": 20606 }, { "epoch": 0.6315741081279882, "grad_norm": 1.1131619951742084, "learning_rate": 3.1567840917555237e-06, "loss": 0.5972, "step": 20607 }, { "epoch": 0.6316047566507295, "grad_norm": 1.2601473431720918, "learning_rate": 3.15632273817827e-06, "loss": 0.652, "step": 20608 }, { "epoch": 0.6316354051734706, "grad_norm": 1.2766318717542455, "learning_rate": 3.1558614027672417e-06, "loss": 0.6135, "step": 20609 }, { "epoch": 0.6316660536962119, "grad_norm": 1.374337583912406, "learning_rate": 3.1554000855269833e-06, "loss": 0.6545, "step": 20610 }, { "epoch": 0.631696702218953, "grad_norm": 1.3202195796497713, "learning_rate": 3.1549387864620386e-06, "loss": 0.6475, "step": 20611 }, { "epoch": 0.6317273507416943, "grad_norm": 1.164146196070589, "learning_rate": 3.154477505576955e-06, "loss": 0.6061, "step": 20612 }, { "epoch": 0.6317579992644354, "grad_norm": 1.2039023652535203, "learning_rate": 3.1540162428762755e-06, "loss": 0.6519, "step": 20613 }, { "epoch": 0.6317886477871767, "grad_norm": 1.2887462580067317, "learning_rate": 3.153554998364547e-06, "loss": 0.5832, "step": 20614 }, { "epoch": 0.6318192963099178, "grad_norm": 0.46259463263896144, "learning_rate": 3.153093772046314e-06, "loss": 0.3924, "step": 20615 }, { "epoch": 0.6318499448326591, "grad_norm": 1.2252089906105093, "learning_rate": 3.152632563926119e-06, "loss": 0.5718, "step": 20616 }, { "epoch": 0.6318805933554003, "grad_norm": 1.1738020166201841, "learning_rate": 3.1521713740085103e-06, "loss": 0.5948, "step": 20617 }, { "epoch": 0.6319112418781415, "grad_norm": 1.2277883505475478, "learning_rate": 3.1517102022980296e-06, "loss": 0.6136, "step": 20618 }, { "epoch": 0.6319418904008827, "grad_norm": 1.436158309415139, "learning_rate": 3.1512490487992197e-06, "loss": 0.6888, "step": 20619 }, { "epoch": 0.6319725389236239, "grad_norm": 1.1750277087083414, "learning_rate": 3.150787913516627e-06, "loss": 0.5663, "step": 20620 }, { "epoch": 0.6320031874463651, "grad_norm": 1.1693649086573243, "learning_rate": 3.1503267964547927e-06, "loss": 0.5536, "step": 20621 }, { "epoch": 0.6320338359691063, "grad_norm": 1.331495209707631, "learning_rate": 3.149865697618263e-06, "loss": 0.5906, "step": 20622 }, { "epoch": 0.6320644844918475, "grad_norm": 1.3298912432288437, "learning_rate": 3.1494046170115798e-06, "loss": 0.6757, "step": 20623 }, { "epoch": 0.6320951330145888, "grad_norm": 1.281467911617563, "learning_rate": 3.148943554639286e-06, "loss": 0.5801, "step": 20624 }, { "epoch": 0.6321257815373299, "grad_norm": 1.4192767265218489, "learning_rate": 3.148482510505926e-06, "loss": 0.6486, "step": 20625 }, { "epoch": 0.632156430060071, "grad_norm": 1.3495240788640481, "learning_rate": 3.1480214846160405e-06, "loss": 0.6464, "step": 20626 }, { "epoch": 0.6321870785828123, "grad_norm": 0.43953050193471416, "learning_rate": 3.147560476974173e-06, "loss": 0.3934, "step": 20627 }, { "epoch": 0.6322177271055535, "grad_norm": 1.221472339920527, "learning_rate": 3.147099487584868e-06, "loss": 0.6529, "step": 20628 }, { "epoch": 0.6322483756282947, "grad_norm": 1.3438091013949578, "learning_rate": 3.1466385164526625e-06, "loss": 0.6572, "step": 20629 }, { "epoch": 0.6322790241510359, "grad_norm": 1.365581820905275, "learning_rate": 3.1461775635821053e-06, "loss": 0.6036, "step": 20630 }, { "epoch": 0.6323096726737771, "grad_norm": 1.318882587962889, "learning_rate": 3.1457166289777335e-06, "loss": 0.675, "step": 20631 }, { "epoch": 0.6323403211965183, "grad_norm": 1.2332898663121135, "learning_rate": 3.14525571264409e-06, "loss": 0.5719, "step": 20632 }, { "epoch": 0.6323709697192595, "grad_norm": 1.3940739880355633, "learning_rate": 3.1447948145857165e-06, "loss": 0.7104, "step": 20633 }, { "epoch": 0.6324016182420007, "grad_norm": 0.4655356896660137, "learning_rate": 3.1443339348071544e-06, "loss": 0.4075, "step": 20634 }, { "epoch": 0.632432266764742, "grad_norm": 1.2668268388746349, "learning_rate": 3.1438730733129445e-06, "loss": 0.6175, "step": 20635 }, { "epoch": 0.6324629152874831, "grad_norm": 1.4068403435487074, "learning_rate": 3.1434122301076286e-06, "loss": 0.6485, "step": 20636 }, { "epoch": 0.6324935638102244, "grad_norm": 0.45613483458018644, "learning_rate": 3.142951405195749e-06, "loss": 0.4084, "step": 20637 }, { "epoch": 0.6325242123329655, "grad_norm": 1.0888937934986793, "learning_rate": 3.14249059858184e-06, "loss": 0.6707, "step": 20638 }, { "epoch": 0.6325548608557068, "grad_norm": 1.090437337386974, "learning_rate": 3.1420298102704505e-06, "loss": 0.6456, "step": 20639 }, { "epoch": 0.6325855093784479, "grad_norm": 1.3543074621431295, "learning_rate": 3.1415690402661124e-06, "loss": 0.638, "step": 20640 }, { "epoch": 0.6326161579011892, "grad_norm": 1.2222756594509185, "learning_rate": 3.1411082885733747e-06, "loss": 0.5906, "step": 20641 }, { "epoch": 0.6326468064239303, "grad_norm": 1.151307014052725, "learning_rate": 3.1406475551967703e-06, "loss": 0.6152, "step": 20642 }, { "epoch": 0.6326774549466716, "grad_norm": 1.2914469519385094, "learning_rate": 3.140186840140841e-06, "loss": 0.6563, "step": 20643 }, { "epoch": 0.6327081034694128, "grad_norm": 1.2498337384879916, "learning_rate": 3.139726143410127e-06, "loss": 0.5657, "step": 20644 }, { "epoch": 0.632738751992154, "grad_norm": 1.3000421508684727, "learning_rate": 3.139265465009168e-06, "loss": 0.544, "step": 20645 }, { "epoch": 0.6327694005148952, "grad_norm": 1.3894130067877921, "learning_rate": 3.1388048049425007e-06, "loss": 0.6884, "step": 20646 }, { "epoch": 0.6328000490376364, "grad_norm": 1.3597290985530381, "learning_rate": 3.1383441632146673e-06, "loss": 0.608, "step": 20647 }, { "epoch": 0.6328306975603776, "grad_norm": 1.321592205268316, "learning_rate": 3.1378835398302043e-06, "loss": 0.6257, "step": 20648 }, { "epoch": 0.6328613460831188, "grad_norm": 1.3683118327303156, "learning_rate": 3.137422934793652e-06, "loss": 0.5991, "step": 20649 }, { "epoch": 0.63289199460586, "grad_norm": 1.2546700650765343, "learning_rate": 3.136962348109549e-06, "loss": 0.6033, "step": 20650 }, { "epoch": 0.6329226431286012, "grad_norm": 1.2125534087263106, "learning_rate": 3.13650177978243e-06, "loss": 0.6026, "step": 20651 }, { "epoch": 0.6329532916513424, "grad_norm": 0.4474498570985785, "learning_rate": 3.1360412298168384e-06, "loss": 0.4027, "step": 20652 }, { "epoch": 0.6329839401740837, "grad_norm": 1.2670539987726013, "learning_rate": 3.1355806982173086e-06, "loss": 0.6278, "step": 20653 }, { "epoch": 0.6330145886968248, "grad_norm": 1.144652688481576, "learning_rate": 3.1351201849883784e-06, "loss": 0.5661, "step": 20654 }, { "epoch": 0.6330452372195661, "grad_norm": 1.5261364532990085, "learning_rate": 3.134659690134587e-06, "loss": 0.554, "step": 20655 }, { "epoch": 0.6330758857423072, "grad_norm": 1.0868855071786214, "learning_rate": 3.1341992136604706e-06, "loss": 0.6397, "step": 20656 }, { "epoch": 0.6331065342650484, "grad_norm": 1.273213195273463, "learning_rate": 3.1337387555705667e-06, "loss": 0.6446, "step": 20657 }, { "epoch": 0.6331371827877896, "grad_norm": 1.2829467013412865, "learning_rate": 3.1332783158694123e-06, "loss": 0.5958, "step": 20658 }, { "epoch": 0.6331678313105308, "grad_norm": 0.45811273848270345, "learning_rate": 3.132817894561544e-06, "loss": 0.3869, "step": 20659 }, { "epoch": 0.633198479833272, "grad_norm": 1.2075300304438612, "learning_rate": 3.1323574916515e-06, "loss": 0.56, "step": 20660 }, { "epoch": 0.6332291283560132, "grad_norm": 1.3916819170250696, "learning_rate": 3.1318971071438154e-06, "loss": 0.6398, "step": 20661 }, { "epoch": 0.6332597768787545, "grad_norm": 1.202793807740794, "learning_rate": 3.131436741043024e-06, "loss": 0.6122, "step": 20662 }, { "epoch": 0.6332904254014956, "grad_norm": 1.3259048226411712, "learning_rate": 3.130976393353668e-06, "loss": 0.5911, "step": 20663 }, { "epoch": 0.6333210739242369, "grad_norm": 1.1920307542592354, "learning_rate": 3.1305160640802786e-06, "loss": 0.573, "step": 20664 }, { "epoch": 0.633351722446978, "grad_norm": 0.46319037978586025, "learning_rate": 3.1300557532273913e-06, "loss": 0.3993, "step": 20665 }, { "epoch": 0.6333823709697193, "grad_norm": 1.3750801982024057, "learning_rate": 3.129595460799544e-06, "loss": 0.6192, "step": 20666 }, { "epoch": 0.6334130194924604, "grad_norm": 1.1709525303342656, "learning_rate": 3.129135186801271e-06, "loss": 0.6581, "step": 20667 }, { "epoch": 0.6334436680152017, "grad_norm": 0.4544906850330484, "learning_rate": 3.128674931237108e-06, "loss": 0.3907, "step": 20668 }, { "epoch": 0.6334743165379428, "grad_norm": 0.4498267772029739, "learning_rate": 3.12821469411159e-06, "loss": 0.4232, "step": 20669 }, { "epoch": 0.6335049650606841, "grad_norm": 1.1864276083781, "learning_rate": 3.1277544754292505e-06, "loss": 0.6403, "step": 20670 }, { "epoch": 0.6335356135834253, "grad_norm": 1.4147861052453081, "learning_rate": 3.127294275194627e-06, "loss": 0.6648, "step": 20671 }, { "epoch": 0.6335662621061665, "grad_norm": 1.2780049289494626, "learning_rate": 3.1268340934122515e-06, "loss": 0.6322, "step": 20672 }, { "epoch": 0.6335969106289077, "grad_norm": 1.4229132935685047, "learning_rate": 3.126373930086658e-06, "loss": 0.7205, "step": 20673 }, { "epoch": 0.6336275591516489, "grad_norm": 1.1211415327832908, "learning_rate": 3.125913785222382e-06, "loss": 0.5627, "step": 20674 }, { "epoch": 0.6336582076743901, "grad_norm": 1.3584431326161637, "learning_rate": 3.1254536588239566e-06, "loss": 0.589, "step": 20675 }, { "epoch": 0.6336888561971313, "grad_norm": 1.2762686933315837, "learning_rate": 3.1249935508959163e-06, "loss": 0.6455, "step": 20676 }, { "epoch": 0.6337195047198725, "grad_norm": 1.2996774314008093, "learning_rate": 3.1245334614427946e-06, "loss": 0.574, "step": 20677 }, { "epoch": 0.6337501532426137, "grad_norm": 1.2170050057833837, "learning_rate": 3.1240733904691234e-06, "loss": 0.6104, "step": 20678 }, { "epoch": 0.6337808017653549, "grad_norm": 1.2172949434091824, "learning_rate": 3.1236133379794386e-06, "loss": 0.6285, "step": 20679 }, { "epoch": 0.6338114502880962, "grad_norm": 1.30374407574826, "learning_rate": 3.1231533039782724e-06, "loss": 0.61, "step": 20680 }, { "epoch": 0.6338420988108373, "grad_norm": 1.4466743553029062, "learning_rate": 3.1226932884701537e-06, "loss": 0.6685, "step": 20681 }, { "epoch": 0.6338727473335786, "grad_norm": 1.3163222853032455, "learning_rate": 3.1222332914596214e-06, "loss": 0.6577, "step": 20682 }, { "epoch": 0.6339033958563197, "grad_norm": 1.2175334791740036, "learning_rate": 3.121773312951202e-06, "loss": 0.6412, "step": 20683 }, { "epoch": 0.633934044379061, "grad_norm": 1.276147392492445, "learning_rate": 3.121313352949434e-06, "loss": 0.68, "step": 20684 }, { "epoch": 0.6339646929018021, "grad_norm": 1.269358345394407, "learning_rate": 3.120853411458845e-06, "loss": 0.5119, "step": 20685 }, { "epoch": 0.6339953414245434, "grad_norm": 1.1687637146629315, "learning_rate": 3.120393488483967e-06, "loss": 0.5842, "step": 20686 }, { "epoch": 0.6340259899472845, "grad_norm": 0.4660945538293394, "learning_rate": 3.119933584029334e-06, "loss": 0.4122, "step": 20687 }, { "epoch": 0.6340566384700257, "grad_norm": 1.174671885305791, "learning_rate": 3.1194736980994764e-06, "loss": 0.5708, "step": 20688 }, { "epoch": 0.634087286992767, "grad_norm": 1.1789886817787008, "learning_rate": 3.1190138306989247e-06, "loss": 0.6199, "step": 20689 }, { "epoch": 0.6341179355155081, "grad_norm": 2.0990122236254143, "learning_rate": 3.118553981832212e-06, "loss": 0.6435, "step": 20690 }, { "epoch": 0.6341485840382494, "grad_norm": 1.2558725110150641, "learning_rate": 3.118094151503869e-06, "loss": 0.625, "step": 20691 }, { "epoch": 0.6341792325609905, "grad_norm": 1.4697569057498643, "learning_rate": 3.1176343397184226e-06, "loss": 0.6753, "step": 20692 }, { "epoch": 0.6342098810837318, "grad_norm": 1.3352743424417988, "learning_rate": 3.1171745464804106e-06, "loss": 0.5738, "step": 20693 }, { "epoch": 0.6342405296064729, "grad_norm": 1.263999933813874, "learning_rate": 3.1167147717943556e-06, "loss": 0.6025, "step": 20694 }, { "epoch": 0.6342711781292142, "grad_norm": 1.307496184914011, "learning_rate": 3.116255015664795e-06, "loss": 0.6383, "step": 20695 }, { "epoch": 0.6343018266519553, "grad_norm": 1.3007058503448514, "learning_rate": 3.1157952780962555e-06, "loss": 0.6149, "step": 20696 }, { "epoch": 0.6343324751746966, "grad_norm": 1.297802304798905, "learning_rate": 3.1153355590932655e-06, "loss": 0.6205, "step": 20697 }, { "epoch": 0.6343631236974377, "grad_norm": 1.3100141968106107, "learning_rate": 3.114875858660358e-06, "loss": 0.6088, "step": 20698 }, { "epoch": 0.634393772220179, "grad_norm": 1.317629955201987, "learning_rate": 3.11441617680206e-06, "loss": 0.7077, "step": 20699 }, { "epoch": 0.6344244207429202, "grad_norm": 1.1858857930555302, "learning_rate": 3.113956513522902e-06, "loss": 0.5919, "step": 20700 }, { "epoch": 0.6344550692656614, "grad_norm": 0.44397491733261407, "learning_rate": 3.1134968688274135e-06, "loss": 0.4075, "step": 20701 }, { "epoch": 0.6344857177884026, "grad_norm": 1.2852097908998767, "learning_rate": 3.1130372427201215e-06, "loss": 0.5693, "step": 20702 }, { "epoch": 0.6345163663111438, "grad_norm": 1.4366975911974476, "learning_rate": 3.1125776352055583e-06, "loss": 0.6577, "step": 20703 }, { "epoch": 0.634547014833885, "grad_norm": 0.482403322884096, "learning_rate": 3.112118046288251e-06, "loss": 0.4304, "step": 20704 }, { "epoch": 0.6345776633566262, "grad_norm": 1.2366983841361388, "learning_rate": 3.1116584759727255e-06, "loss": 0.6413, "step": 20705 }, { "epoch": 0.6346083118793674, "grad_norm": 0.44530698129974244, "learning_rate": 3.1111989242635144e-06, "loss": 0.4198, "step": 20706 }, { "epoch": 0.6346389604021087, "grad_norm": 1.5133368895080745, "learning_rate": 3.110739391165142e-06, "loss": 0.6136, "step": 20707 }, { "epoch": 0.6346696089248498, "grad_norm": 1.4291737446520336, "learning_rate": 3.110279876682137e-06, "loss": 0.6303, "step": 20708 }, { "epoch": 0.6347002574475911, "grad_norm": 1.408912446163409, "learning_rate": 3.1098203808190297e-06, "loss": 0.6943, "step": 20709 }, { "epoch": 0.6347309059703322, "grad_norm": 1.2941083384545087, "learning_rate": 3.1093609035803446e-06, "loss": 0.6414, "step": 20710 }, { "epoch": 0.6347615544930735, "grad_norm": 1.2615293833729908, "learning_rate": 3.1089014449706114e-06, "loss": 0.5479, "step": 20711 }, { "epoch": 0.6347922030158146, "grad_norm": 1.3298448133064797, "learning_rate": 3.1084420049943553e-06, "loss": 0.5926, "step": 20712 }, { "epoch": 0.6348228515385559, "grad_norm": 0.4757254986673656, "learning_rate": 3.107982583656104e-06, "loss": 0.4359, "step": 20713 }, { "epoch": 0.634853500061297, "grad_norm": 1.3319238379353011, "learning_rate": 3.1075231809603847e-06, "loss": 0.6574, "step": 20714 }, { "epoch": 0.6348841485840383, "grad_norm": 1.2397405498811358, "learning_rate": 3.107063796911725e-06, "loss": 0.584, "step": 20715 }, { "epoch": 0.6349147971067794, "grad_norm": 1.1135885873076472, "learning_rate": 3.106604431514648e-06, "loss": 0.5437, "step": 20716 }, { "epoch": 0.6349454456295207, "grad_norm": 1.3902956360965526, "learning_rate": 3.106145084773683e-06, "loss": 0.7178, "step": 20717 }, { "epoch": 0.6349760941522619, "grad_norm": 1.0620112710809682, "learning_rate": 3.105685756693355e-06, "loss": 0.6243, "step": 20718 }, { "epoch": 0.635006742675003, "grad_norm": 1.2837925356453443, "learning_rate": 3.105226447278189e-06, "loss": 0.6108, "step": 20719 }, { "epoch": 0.6350373911977443, "grad_norm": 1.502077771427172, "learning_rate": 3.104767156532713e-06, "loss": 0.6353, "step": 20720 }, { "epoch": 0.6350680397204854, "grad_norm": 1.3341465705855355, "learning_rate": 3.1043078844614495e-06, "loss": 0.5874, "step": 20721 }, { "epoch": 0.6350986882432267, "grad_norm": 1.2034607031742397, "learning_rate": 3.1038486310689266e-06, "loss": 0.6714, "step": 20722 }, { "epoch": 0.6351293367659678, "grad_norm": 1.4066767740448611, "learning_rate": 3.1033893963596695e-06, "loss": 0.6313, "step": 20723 }, { "epoch": 0.6351599852887091, "grad_norm": 1.230597755669556, "learning_rate": 3.1029301803381984e-06, "loss": 0.5568, "step": 20724 }, { "epoch": 0.6351906338114502, "grad_norm": 1.2799037183070385, "learning_rate": 3.1024709830090453e-06, "loss": 0.6455, "step": 20725 }, { "epoch": 0.6352212823341915, "grad_norm": 1.1432134502010713, "learning_rate": 3.10201180437673e-06, "loss": 0.5607, "step": 20726 }, { "epoch": 0.6352519308569327, "grad_norm": 1.2813085822007737, "learning_rate": 3.1015526444457767e-06, "loss": 0.6088, "step": 20727 }, { "epoch": 0.6352825793796739, "grad_norm": 1.4085193657792194, "learning_rate": 3.1010935032207123e-06, "loss": 0.6602, "step": 20728 }, { "epoch": 0.6353132279024151, "grad_norm": 1.2111205146125064, "learning_rate": 3.100634380706058e-06, "loss": 0.6332, "step": 20729 }, { "epoch": 0.6353438764251563, "grad_norm": 1.249670914431012, "learning_rate": 3.10017527690634e-06, "loss": 0.6838, "step": 20730 }, { "epoch": 0.6353745249478975, "grad_norm": 1.3119914153188659, "learning_rate": 3.099716191826082e-06, "loss": 0.6478, "step": 20731 }, { "epoch": 0.6354051734706387, "grad_norm": 1.1482992392246223, "learning_rate": 3.099257125469805e-06, "loss": 0.5356, "step": 20732 }, { "epoch": 0.6354358219933799, "grad_norm": 1.3092730280551241, "learning_rate": 3.0987980778420346e-06, "loss": 0.6552, "step": 20733 }, { "epoch": 0.6354664705161212, "grad_norm": 1.1821215714666835, "learning_rate": 3.0983390489472948e-06, "loss": 0.5286, "step": 20734 }, { "epoch": 0.6354971190388623, "grad_norm": 1.310488796600985, "learning_rate": 3.0978800387901033e-06, "loss": 0.5771, "step": 20735 }, { "epoch": 0.6355277675616036, "grad_norm": 1.185094058404257, "learning_rate": 3.09742104737499e-06, "loss": 0.6257, "step": 20736 }, { "epoch": 0.6355584160843447, "grad_norm": 1.4233682936822758, "learning_rate": 3.0969620747064704e-06, "loss": 0.6872, "step": 20737 }, { "epoch": 0.635589064607086, "grad_norm": 1.3744702380470013, "learning_rate": 3.096503120789074e-06, "loss": 0.6619, "step": 20738 }, { "epoch": 0.6356197131298271, "grad_norm": 1.2385101739631126, "learning_rate": 3.0960441856273183e-06, "loss": 0.564, "step": 20739 }, { "epoch": 0.6356503616525684, "grad_norm": 1.2607721231441393, "learning_rate": 3.095585269225725e-06, "loss": 0.652, "step": 20740 }, { "epoch": 0.6356810101753095, "grad_norm": 1.2243731403886493, "learning_rate": 3.095126371588818e-06, "loss": 0.6068, "step": 20741 }, { "epoch": 0.6357116586980508, "grad_norm": 0.4624343079610495, "learning_rate": 3.0946674927211186e-06, "loss": 0.3968, "step": 20742 }, { "epoch": 0.635742307220792, "grad_norm": 1.1260385318751485, "learning_rate": 3.094208632627147e-06, "loss": 0.6086, "step": 20743 }, { "epoch": 0.6357729557435332, "grad_norm": 1.3469603600946987, "learning_rate": 3.093749791311427e-06, "loss": 0.6225, "step": 20744 }, { "epoch": 0.6358036042662744, "grad_norm": 1.2276571662140965, "learning_rate": 3.093290968778478e-06, "loss": 0.603, "step": 20745 }, { "epoch": 0.6358342527890156, "grad_norm": 1.277858847405222, "learning_rate": 3.0928321650328187e-06, "loss": 0.6336, "step": 20746 }, { "epoch": 0.6358649013117568, "grad_norm": 0.4469471927669374, "learning_rate": 3.092373380078974e-06, "loss": 0.3884, "step": 20747 }, { "epoch": 0.635895549834498, "grad_norm": 1.2248711287666572, "learning_rate": 3.091914613921461e-06, "loss": 0.5525, "step": 20748 }, { "epoch": 0.6359261983572392, "grad_norm": 1.4219970741110375, "learning_rate": 3.091455866564803e-06, "loss": 0.6232, "step": 20749 }, { "epoch": 0.6359568468799803, "grad_norm": 1.1870199878668628, "learning_rate": 3.0909971380135184e-06, "loss": 0.6813, "step": 20750 }, { "epoch": 0.6359874954027216, "grad_norm": 1.3427407424886775, "learning_rate": 3.0905384282721262e-06, "loss": 0.5861, "step": 20751 }, { "epoch": 0.6360181439254627, "grad_norm": 1.212904011612268, "learning_rate": 3.0900797373451485e-06, "loss": 0.5923, "step": 20752 }, { "epoch": 0.636048792448204, "grad_norm": 1.4158309760720658, "learning_rate": 3.089621065237104e-06, "loss": 0.6467, "step": 20753 }, { "epoch": 0.6360794409709452, "grad_norm": 1.2700446256571687, "learning_rate": 3.089162411952511e-06, "loss": 0.6421, "step": 20754 }, { "epoch": 0.6361100894936864, "grad_norm": 1.3887590426189094, "learning_rate": 3.0887037774958906e-06, "loss": 0.6564, "step": 20755 }, { "epoch": 0.6361407380164276, "grad_norm": 1.1637898877077593, "learning_rate": 3.088245161871759e-06, "loss": 0.6144, "step": 20756 }, { "epoch": 0.6361713865391688, "grad_norm": 1.2489286573014913, "learning_rate": 3.0877865650846395e-06, "loss": 0.6631, "step": 20757 }, { "epoch": 0.63620203506191, "grad_norm": 1.2050802203894413, "learning_rate": 3.087327987139048e-06, "loss": 0.6179, "step": 20758 }, { "epoch": 0.6362326835846512, "grad_norm": 0.48716094593191, "learning_rate": 3.0868694280395006e-06, "loss": 0.4003, "step": 20759 }, { "epoch": 0.6362633321073924, "grad_norm": 1.2276973216396678, "learning_rate": 3.086410887790522e-06, "loss": 0.5875, "step": 20760 }, { "epoch": 0.6362939806301336, "grad_norm": 1.286684167022444, "learning_rate": 3.0859523663966244e-06, "loss": 0.6031, "step": 20761 }, { "epoch": 0.6363246291528748, "grad_norm": 1.1381119664453554, "learning_rate": 3.0854938638623276e-06, "loss": 0.534, "step": 20762 }, { "epoch": 0.6363552776756161, "grad_norm": 1.2163259556946318, "learning_rate": 3.08503538019215e-06, "loss": 0.6172, "step": 20763 }, { "epoch": 0.6363859261983572, "grad_norm": 0.4396171266931955, "learning_rate": 3.084576915390609e-06, "loss": 0.3683, "step": 20764 }, { "epoch": 0.6364165747210985, "grad_norm": 1.2563923526617418, "learning_rate": 3.084118469462221e-06, "loss": 0.5837, "step": 20765 }, { "epoch": 0.6364472232438396, "grad_norm": 1.4368464880356235, "learning_rate": 3.083660042411505e-06, "loss": 0.6161, "step": 20766 }, { "epoch": 0.6364778717665809, "grad_norm": 1.2643561130370633, "learning_rate": 3.0832016342429754e-06, "loss": 0.6155, "step": 20767 }, { "epoch": 0.636508520289322, "grad_norm": 1.392897157378603, "learning_rate": 3.0827432449611527e-06, "loss": 0.695, "step": 20768 }, { "epoch": 0.6365391688120633, "grad_norm": 1.244068920515673, "learning_rate": 3.082284874570551e-06, "loss": 0.6239, "step": 20769 }, { "epoch": 0.6365698173348044, "grad_norm": 1.3103616963308027, "learning_rate": 3.081826523075685e-06, "loss": 0.6114, "step": 20770 }, { "epoch": 0.6366004658575457, "grad_norm": 1.3083090126689725, "learning_rate": 3.0813681904810748e-06, "loss": 0.598, "step": 20771 }, { "epoch": 0.6366311143802869, "grad_norm": 1.3043641299147337, "learning_rate": 3.080909876791235e-06, "loss": 0.5774, "step": 20772 }, { "epoch": 0.6366617629030281, "grad_norm": 1.1977151677400255, "learning_rate": 3.0804515820106797e-06, "loss": 0.5722, "step": 20773 }, { "epoch": 0.6366924114257693, "grad_norm": 1.1181892714006045, "learning_rate": 3.079993306143927e-06, "loss": 0.5662, "step": 20774 }, { "epoch": 0.6367230599485105, "grad_norm": 1.348618496569823, "learning_rate": 3.0795350491954904e-06, "loss": 0.6487, "step": 20775 }, { "epoch": 0.6367537084712517, "grad_norm": 1.23420851741495, "learning_rate": 3.079076811169888e-06, "loss": 0.5937, "step": 20776 }, { "epoch": 0.6367843569939929, "grad_norm": 0.4769682324634853, "learning_rate": 3.0786185920716337e-06, "loss": 0.3886, "step": 20777 }, { "epoch": 0.6368150055167341, "grad_norm": 1.4090290090391748, "learning_rate": 3.0781603919052394e-06, "loss": 0.6466, "step": 20778 }, { "epoch": 0.6368456540394754, "grad_norm": 1.4998001077720824, "learning_rate": 3.0777022106752254e-06, "loss": 0.6534, "step": 20779 }, { "epoch": 0.6368763025622165, "grad_norm": 1.4395752815051903, "learning_rate": 3.0772440483861022e-06, "loss": 0.6475, "step": 20780 }, { "epoch": 0.6369069510849577, "grad_norm": 1.2857971865732594, "learning_rate": 3.076785905042384e-06, "loss": 0.6233, "step": 20781 }, { "epoch": 0.6369375996076989, "grad_norm": 1.4182742555899999, "learning_rate": 3.076327780648588e-06, "loss": 0.6661, "step": 20782 }, { "epoch": 0.6369682481304401, "grad_norm": 1.3095160150462855, "learning_rate": 3.0758696752092255e-06, "loss": 0.5803, "step": 20783 }, { "epoch": 0.6369988966531813, "grad_norm": 1.3350180757808723, "learning_rate": 3.0754115887288116e-06, "loss": 0.6057, "step": 20784 }, { "epoch": 0.6370295451759225, "grad_norm": 1.2757111770616079, "learning_rate": 3.0749535212118608e-06, "loss": 0.5031, "step": 20785 }, { "epoch": 0.6370601936986637, "grad_norm": 0.44258158239784184, "learning_rate": 3.074495472662884e-06, "loss": 0.4086, "step": 20786 }, { "epoch": 0.6370908422214049, "grad_norm": 1.2476272171700518, "learning_rate": 3.0740374430863966e-06, "loss": 0.6436, "step": 20787 }, { "epoch": 0.6371214907441461, "grad_norm": 1.2150165912294562, "learning_rate": 3.0735794324869127e-06, "loss": 0.6391, "step": 20788 }, { "epoch": 0.6371521392668873, "grad_norm": 1.3069722847390102, "learning_rate": 3.0731214408689403e-06, "loss": 0.6044, "step": 20789 }, { "epoch": 0.6371827877896286, "grad_norm": 1.2866692539148266, "learning_rate": 3.0726634682369984e-06, "loss": 0.4736, "step": 20790 }, { "epoch": 0.6372134363123697, "grad_norm": 1.4934285486047163, "learning_rate": 3.0722055145955953e-06, "loss": 0.6186, "step": 20791 }, { "epoch": 0.637244084835111, "grad_norm": 1.3211615099267155, "learning_rate": 3.0717475799492436e-06, "loss": 0.6981, "step": 20792 }, { "epoch": 0.6372747333578521, "grad_norm": 1.3700539058210643, "learning_rate": 3.0712896643024576e-06, "loss": 0.6652, "step": 20793 }, { "epoch": 0.6373053818805934, "grad_norm": 0.4569486509051947, "learning_rate": 3.0708317676597464e-06, "loss": 0.3981, "step": 20794 }, { "epoch": 0.6373360304033345, "grad_norm": 1.2375423992464774, "learning_rate": 3.070373890025625e-06, "loss": 0.5772, "step": 20795 }, { "epoch": 0.6373666789260758, "grad_norm": 0.46194437579984476, "learning_rate": 3.0699160314046033e-06, "loss": 0.4067, "step": 20796 }, { "epoch": 0.6373973274488169, "grad_norm": 0.45682531055625425, "learning_rate": 3.0694581918011916e-06, "loss": 0.3856, "step": 20797 }, { "epoch": 0.6374279759715582, "grad_norm": 1.1218463040592537, "learning_rate": 3.0690003712199037e-06, "loss": 0.5266, "step": 20798 }, { "epoch": 0.6374586244942994, "grad_norm": 1.2940098963127005, "learning_rate": 3.068542569665249e-06, "loss": 0.5443, "step": 20799 }, { "epoch": 0.6374892730170406, "grad_norm": 1.2968821531887995, "learning_rate": 3.0680847871417373e-06, "loss": 0.7316, "step": 20800 }, { "epoch": 0.6375199215397818, "grad_norm": 0.4685292747326436, "learning_rate": 3.0676270236538823e-06, "loss": 0.3866, "step": 20801 }, { "epoch": 0.637550570062523, "grad_norm": 1.4470754095325296, "learning_rate": 3.0671692792061904e-06, "loss": 0.6212, "step": 20802 }, { "epoch": 0.6375812185852642, "grad_norm": 1.220356312802467, "learning_rate": 3.0667115538031766e-06, "loss": 0.66, "step": 20803 }, { "epoch": 0.6376118671080054, "grad_norm": 1.3261448822229447, "learning_rate": 3.0662538474493473e-06, "loss": 0.5507, "step": 20804 }, { "epoch": 0.6376425156307466, "grad_norm": 1.2740424637974561, "learning_rate": 3.0657961601492133e-06, "loss": 0.5959, "step": 20805 }, { "epoch": 0.6376731641534878, "grad_norm": 1.3487367399009307, "learning_rate": 3.065338491907285e-06, "loss": 0.6005, "step": 20806 }, { "epoch": 0.637703812676229, "grad_norm": 1.1738201489237305, "learning_rate": 3.0648808427280717e-06, "loss": 0.5223, "step": 20807 }, { "epoch": 0.6377344611989703, "grad_norm": 1.3741564028638218, "learning_rate": 3.0644232126160815e-06, "loss": 0.6225, "step": 20808 }, { "epoch": 0.6377651097217114, "grad_norm": 1.3509799203605746, "learning_rate": 3.0639656015758256e-06, "loss": 0.6248, "step": 20809 }, { "epoch": 0.6377957582444527, "grad_norm": 1.276966069538134, "learning_rate": 3.063508009611812e-06, "loss": 0.5959, "step": 20810 }, { "epoch": 0.6378264067671938, "grad_norm": 1.221467862955414, "learning_rate": 3.0630504367285493e-06, "loss": 0.6507, "step": 20811 }, { "epoch": 0.637857055289935, "grad_norm": 1.306563318624752, "learning_rate": 3.0625928829305474e-06, "loss": 0.6415, "step": 20812 }, { "epoch": 0.6378877038126762, "grad_norm": 1.317812971889065, "learning_rate": 3.0621353482223116e-06, "loss": 0.5952, "step": 20813 }, { "epoch": 0.6379183523354174, "grad_norm": 1.1612807206444382, "learning_rate": 3.061677832608353e-06, "loss": 0.6259, "step": 20814 }, { "epoch": 0.6379490008581586, "grad_norm": 1.4454604891489327, "learning_rate": 3.0612203360931792e-06, "loss": 0.5672, "step": 20815 }, { "epoch": 0.6379796493808998, "grad_norm": 1.3087084126464388, "learning_rate": 3.060762858681296e-06, "loss": 0.6165, "step": 20816 }, { "epoch": 0.638010297903641, "grad_norm": 1.456583025692156, "learning_rate": 3.0603054003772137e-06, "loss": 0.7643, "step": 20817 }, { "epoch": 0.6380409464263822, "grad_norm": 1.2601875347050044, "learning_rate": 3.059847961185438e-06, "loss": 0.6573, "step": 20818 }, { "epoch": 0.6380715949491235, "grad_norm": 1.1793176909117125, "learning_rate": 3.059390541110476e-06, "loss": 0.6103, "step": 20819 }, { "epoch": 0.6381022434718646, "grad_norm": 1.2419636199767994, "learning_rate": 3.0589331401568383e-06, "loss": 0.6118, "step": 20820 }, { "epoch": 0.6381328919946059, "grad_norm": 1.2413297139159063, "learning_rate": 3.0584757583290257e-06, "loss": 0.5932, "step": 20821 }, { "epoch": 0.638163540517347, "grad_norm": 0.49790542047041236, "learning_rate": 3.0580183956315513e-06, "loss": 0.4068, "step": 20822 }, { "epoch": 0.6381941890400883, "grad_norm": 1.3069008291074353, "learning_rate": 3.057561052068917e-06, "loss": 0.667, "step": 20823 }, { "epoch": 0.6382248375628294, "grad_norm": 1.314365452854707, "learning_rate": 3.0571037276456293e-06, "loss": 0.5934, "step": 20824 }, { "epoch": 0.6382554860855707, "grad_norm": 1.328885753448232, "learning_rate": 3.0566464223661975e-06, "loss": 0.5285, "step": 20825 }, { "epoch": 0.6382861346083119, "grad_norm": 1.3223938515314044, "learning_rate": 3.056189136235126e-06, "loss": 0.6284, "step": 20826 }, { "epoch": 0.6383167831310531, "grad_norm": 1.5321974747414844, "learning_rate": 3.0557318692569183e-06, "loss": 0.6382, "step": 20827 }, { "epoch": 0.6383474316537943, "grad_norm": 0.4464394899793218, "learning_rate": 3.0552746214360834e-06, "loss": 0.4035, "step": 20828 }, { "epoch": 0.6383780801765355, "grad_norm": 0.436786363560118, "learning_rate": 3.054817392777124e-06, "loss": 0.4012, "step": 20829 }, { "epoch": 0.6384087286992767, "grad_norm": 1.4773664967390008, "learning_rate": 3.054360183284548e-06, "loss": 0.596, "step": 20830 }, { "epoch": 0.6384393772220179, "grad_norm": 0.4562301715971653, "learning_rate": 3.0539029929628596e-06, "loss": 0.4062, "step": 20831 }, { "epoch": 0.6384700257447591, "grad_norm": 1.274259865936931, "learning_rate": 3.05344582181656e-06, "loss": 0.6653, "step": 20832 }, { "epoch": 0.6385006742675003, "grad_norm": 1.3001336764929792, "learning_rate": 3.05298866985016e-06, "loss": 0.6461, "step": 20833 }, { "epoch": 0.6385313227902415, "grad_norm": 1.1431959126742774, "learning_rate": 3.05253153706816e-06, "loss": 0.5449, "step": 20834 }, { "epoch": 0.6385619713129828, "grad_norm": 0.45874186407976025, "learning_rate": 3.052074423475063e-06, "loss": 0.4056, "step": 20835 }, { "epoch": 0.6385926198357239, "grad_norm": 1.2476802093677375, "learning_rate": 3.051617329075377e-06, "loss": 0.6015, "step": 20836 }, { "epoch": 0.6386232683584652, "grad_norm": 1.4058824098652372, "learning_rate": 3.051160253873603e-06, "loss": 0.594, "step": 20837 }, { "epoch": 0.6386539168812063, "grad_norm": 1.191040667649384, "learning_rate": 3.0507031978742463e-06, "loss": 0.5561, "step": 20838 }, { "epoch": 0.6386845654039476, "grad_norm": 0.44638602787115733, "learning_rate": 3.0502461610818095e-06, "loss": 0.4005, "step": 20839 }, { "epoch": 0.6387152139266887, "grad_norm": 1.205484076088436, "learning_rate": 3.0497891435007952e-06, "loss": 0.631, "step": 20840 }, { "epoch": 0.63874586244943, "grad_norm": 1.4961811206343116, "learning_rate": 3.0493321451357093e-06, "loss": 0.609, "step": 20841 }, { "epoch": 0.6387765109721711, "grad_norm": 1.3567770100049128, "learning_rate": 3.0488751659910533e-06, "loss": 0.6791, "step": 20842 }, { "epoch": 0.6388071594949123, "grad_norm": 1.397519408587392, "learning_rate": 3.048418206071326e-06, "loss": 0.6193, "step": 20843 }, { "epoch": 0.6388378080176536, "grad_norm": 1.3138810730876012, "learning_rate": 3.0479612653810376e-06, "loss": 0.6501, "step": 20844 }, { "epoch": 0.6388684565403947, "grad_norm": 0.48535428231932765, "learning_rate": 3.0475043439246845e-06, "loss": 0.3998, "step": 20845 }, { "epoch": 0.638899105063136, "grad_norm": 1.3163712729705963, "learning_rate": 3.0470474417067696e-06, "loss": 0.5662, "step": 20846 }, { "epoch": 0.6389297535858771, "grad_norm": 0.4341587259963299, "learning_rate": 3.0465905587317967e-06, "loss": 0.3658, "step": 20847 }, { "epoch": 0.6389604021086184, "grad_norm": 1.3472647651419172, "learning_rate": 3.0461336950042664e-06, "loss": 0.5763, "step": 20848 }, { "epoch": 0.6389910506313595, "grad_norm": 1.2478337542386657, "learning_rate": 3.0456768505286815e-06, "loss": 0.5658, "step": 20849 }, { "epoch": 0.6390216991541008, "grad_norm": 1.376378239391194, "learning_rate": 3.0452200253095433e-06, "loss": 0.6311, "step": 20850 }, { "epoch": 0.6390523476768419, "grad_norm": 1.4980403071155168, "learning_rate": 3.044763219351351e-06, "loss": 0.6843, "step": 20851 }, { "epoch": 0.6390829961995832, "grad_norm": 1.455716265625986, "learning_rate": 3.044306432658607e-06, "loss": 0.6946, "step": 20852 }, { "epoch": 0.6391136447223243, "grad_norm": 1.1232251265699438, "learning_rate": 3.043849665235814e-06, "loss": 0.5451, "step": 20853 }, { "epoch": 0.6391442932450656, "grad_norm": 1.3512741885814967, "learning_rate": 3.0433929170874676e-06, "loss": 0.6258, "step": 20854 }, { "epoch": 0.6391749417678068, "grad_norm": 1.313031643116121, "learning_rate": 3.0429361882180747e-06, "loss": 0.6447, "step": 20855 }, { "epoch": 0.639205590290548, "grad_norm": 1.2989946382850064, "learning_rate": 3.042479478632129e-06, "loss": 0.6846, "step": 20856 }, { "epoch": 0.6392362388132892, "grad_norm": 1.2522923431965387, "learning_rate": 3.042022788334137e-06, "loss": 0.5468, "step": 20857 }, { "epoch": 0.6392668873360304, "grad_norm": 1.2935270216655905, "learning_rate": 3.0415661173285938e-06, "loss": 0.6569, "step": 20858 }, { "epoch": 0.6392975358587716, "grad_norm": 0.4446327461866032, "learning_rate": 3.04110946562e-06, "loss": 0.403, "step": 20859 }, { "epoch": 0.6393281843815128, "grad_norm": 1.3829282352678272, "learning_rate": 3.040652833212857e-06, "loss": 0.602, "step": 20860 }, { "epoch": 0.639358832904254, "grad_norm": 1.2856513716111138, "learning_rate": 3.040196220111663e-06, "loss": 0.7081, "step": 20861 }, { "epoch": 0.6393894814269953, "grad_norm": 0.43564032577313067, "learning_rate": 3.0397396263209165e-06, "loss": 0.4068, "step": 20862 }, { "epoch": 0.6394201299497364, "grad_norm": 1.2134823207670087, "learning_rate": 3.0392830518451177e-06, "loss": 0.4836, "step": 20863 }, { "epoch": 0.6394507784724777, "grad_norm": 1.3223326404231275, "learning_rate": 3.0388264966887636e-06, "loss": 0.6796, "step": 20864 }, { "epoch": 0.6394814269952188, "grad_norm": 1.3604706119652423, "learning_rate": 3.0383699608563557e-06, "loss": 0.6995, "step": 20865 }, { "epoch": 0.6395120755179601, "grad_norm": 1.2069561614414435, "learning_rate": 3.0379134443523892e-06, "loss": 0.591, "step": 20866 }, { "epoch": 0.6395427240407012, "grad_norm": 0.4735613630642398, "learning_rate": 3.0374569471813632e-06, "loss": 0.385, "step": 20867 }, { "epoch": 0.6395733725634425, "grad_norm": 1.2714571432159412, "learning_rate": 3.0370004693477767e-06, "loss": 0.5447, "step": 20868 }, { "epoch": 0.6396040210861836, "grad_norm": 1.258727507335939, "learning_rate": 3.0365440108561265e-06, "loss": 0.6409, "step": 20869 }, { "epoch": 0.6396346696089249, "grad_norm": 1.146002187268536, "learning_rate": 3.03608757171091e-06, "loss": 0.6411, "step": 20870 }, { "epoch": 0.639665318131666, "grad_norm": 1.2623053506125175, "learning_rate": 3.035631151916626e-06, "loss": 0.6425, "step": 20871 }, { "epoch": 0.6396959666544073, "grad_norm": 1.2657816084941909, "learning_rate": 3.0351747514777716e-06, "loss": 0.5114, "step": 20872 }, { "epoch": 0.6397266151771485, "grad_norm": 1.2176222369269536, "learning_rate": 3.034718370398841e-06, "loss": 0.5797, "step": 20873 }, { "epoch": 0.6397572636998896, "grad_norm": 1.181499881508593, "learning_rate": 3.0342620086843354e-06, "loss": 0.5431, "step": 20874 }, { "epoch": 0.6397879122226309, "grad_norm": 1.4360585534273647, "learning_rate": 3.033805666338746e-06, "loss": 0.6368, "step": 20875 }, { "epoch": 0.639818560745372, "grad_norm": 1.323865452254623, "learning_rate": 3.033349343366576e-06, "loss": 0.6725, "step": 20876 }, { "epoch": 0.6398492092681133, "grad_norm": 1.2580220386474237, "learning_rate": 3.0328930397723167e-06, "loss": 0.4783, "step": 20877 }, { "epoch": 0.6398798577908544, "grad_norm": 1.155346231634685, "learning_rate": 3.032436755560465e-06, "loss": 0.5984, "step": 20878 }, { "epoch": 0.6399105063135957, "grad_norm": 1.4546021191949454, "learning_rate": 3.031980490735518e-06, "loss": 0.5675, "step": 20879 }, { "epoch": 0.6399411548363368, "grad_norm": 0.4875293022409556, "learning_rate": 3.031524245301971e-06, "loss": 0.4139, "step": 20880 }, { "epoch": 0.6399718033590781, "grad_norm": 1.421230226472279, "learning_rate": 3.031068019264318e-06, "loss": 0.6885, "step": 20881 }, { "epoch": 0.6400024518818193, "grad_norm": 1.2030205177056343, "learning_rate": 3.0306118126270567e-06, "loss": 0.5848, "step": 20882 }, { "epoch": 0.6400331004045605, "grad_norm": 1.3874024183456886, "learning_rate": 3.0301556253946806e-06, "loss": 0.6468, "step": 20883 }, { "epoch": 0.6400637489273017, "grad_norm": 1.3226562867235863, "learning_rate": 3.0296994575716854e-06, "loss": 0.6359, "step": 20884 }, { "epoch": 0.6400943974500429, "grad_norm": 1.1504935481145433, "learning_rate": 3.0292433091625672e-06, "loss": 0.6406, "step": 20885 }, { "epoch": 0.6401250459727841, "grad_norm": 1.3377435638121156, "learning_rate": 3.028787180171815e-06, "loss": 0.6382, "step": 20886 }, { "epoch": 0.6401556944955253, "grad_norm": 1.4142234992360767, "learning_rate": 3.028331070603931e-06, "loss": 0.6318, "step": 20887 }, { "epoch": 0.6401863430182665, "grad_norm": 1.2712074139043472, "learning_rate": 3.027874980463404e-06, "loss": 0.6526, "step": 20888 }, { "epoch": 0.6402169915410078, "grad_norm": 1.1756644576041233, "learning_rate": 3.0274189097547287e-06, "loss": 0.5499, "step": 20889 }, { "epoch": 0.6402476400637489, "grad_norm": 1.2211957181581299, "learning_rate": 3.0269628584824002e-06, "loss": 0.5834, "step": 20890 }, { "epoch": 0.6402782885864902, "grad_norm": 1.3760401822871182, "learning_rate": 3.0265068266509107e-06, "loss": 0.7146, "step": 20891 }, { "epoch": 0.6403089371092313, "grad_norm": 1.3591201828478459, "learning_rate": 3.0260508142647548e-06, "loss": 0.6085, "step": 20892 }, { "epoch": 0.6403395856319726, "grad_norm": 0.4406675321172606, "learning_rate": 3.025594821328426e-06, "loss": 0.4034, "step": 20893 }, { "epoch": 0.6403702341547137, "grad_norm": 1.2003533232159689, "learning_rate": 3.0251388478464155e-06, "loss": 0.6458, "step": 20894 }, { "epoch": 0.640400882677455, "grad_norm": 0.45700232352846554, "learning_rate": 3.0246828938232183e-06, "loss": 0.4024, "step": 20895 }, { "epoch": 0.6404315312001961, "grad_norm": 0.42317293630901, "learning_rate": 3.0242269592633267e-06, "loss": 0.3724, "step": 20896 }, { "epoch": 0.6404621797229374, "grad_norm": 1.2764930398886807, "learning_rate": 3.02377104417123e-06, "loss": 0.5473, "step": 20897 }, { "epoch": 0.6404928282456785, "grad_norm": 1.2565351942277756, "learning_rate": 3.0233151485514246e-06, "loss": 0.5977, "step": 20898 }, { "epoch": 0.6405234767684198, "grad_norm": 0.42519242885983044, "learning_rate": 3.0228592724084003e-06, "loss": 0.3938, "step": 20899 }, { "epoch": 0.640554125291161, "grad_norm": 1.4925907771356428, "learning_rate": 3.022403415746649e-06, "loss": 0.6742, "step": 20900 }, { "epoch": 0.6405847738139022, "grad_norm": 1.2423155903198226, "learning_rate": 3.0219475785706633e-06, "loss": 0.6477, "step": 20901 }, { "epoch": 0.6406154223366434, "grad_norm": 0.44125810646207353, "learning_rate": 3.021491760884934e-06, "loss": 0.4113, "step": 20902 }, { "epoch": 0.6406460708593846, "grad_norm": 1.49514234231812, "learning_rate": 3.0210359626939523e-06, "loss": 0.728, "step": 20903 }, { "epoch": 0.6406767193821258, "grad_norm": 0.43787927378967123, "learning_rate": 3.020580184002211e-06, "loss": 0.395, "step": 20904 }, { "epoch": 0.6407073679048669, "grad_norm": 1.1899980535916526, "learning_rate": 3.0201244248141975e-06, "loss": 0.6048, "step": 20905 }, { "epoch": 0.6407380164276082, "grad_norm": 1.276464115752046, "learning_rate": 3.019668685134406e-06, "loss": 0.6392, "step": 20906 }, { "epoch": 0.6407686649503493, "grad_norm": 1.284809238278669, "learning_rate": 3.0192129649673267e-06, "loss": 0.6883, "step": 20907 }, { "epoch": 0.6407993134730906, "grad_norm": 1.3984814804906018, "learning_rate": 3.0187572643174465e-06, "loss": 0.7008, "step": 20908 }, { "epoch": 0.6408299619958318, "grad_norm": 1.4648689249552498, "learning_rate": 3.0183015831892592e-06, "loss": 0.5699, "step": 20909 }, { "epoch": 0.640860610518573, "grad_norm": 1.380333875289637, "learning_rate": 3.0178459215872528e-06, "loss": 0.6766, "step": 20910 }, { "epoch": 0.6408912590413142, "grad_norm": 1.4765382147032888, "learning_rate": 3.017390279515918e-06, "loss": 0.6619, "step": 20911 }, { "epoch": 0.6409219075640554, "grad_norm": 1.0948323726279714, "learning_rate": 3.0169346569797446e-06, "loss": 0.5625, "step": 20912 }, { "epoch": 0.6409525560867966, "grad_norm": 1.1646673192427204, "learning_rate": 3.01647905398322e-06, "loss": 0.6001, "step": 20913 }, { "epoch": 0.6409832046095378, "grad_norm": 1.344862864183854, "learning_rate": 3.0160234705308366e-06, "loss": 0.6755, "step": 20914 }, { "epoch": 0.641013853132279, "grad_norm": 1.3570058764107404, "learning_rate": 3.0155679066270803e-06, "loss": 0.5784, "step": 20915 }, { "epoch": 0.6410445016550202, "grad_norm": 1.1041596411013395, "learning_rate": 3.015112362276441e-06, "loss": 0.5391, "step": 20916 }, { "epoch": 0.6410751501777614, "grad_norm": 1.3105954753032842, "learning_rate": 3.0146568374834095e-06, "loss": 0.6465, "step": 20917 }, { "epoch": 0.6411057987005027, "grad_norm": 1.4941410461600912, "learning_rate": 3.014201332252471e-06, "loss": 0.5736, "step": 20918 }, { "epoch": 0.6411364472232438, "grad_norm": 1.3769515652443842, "learning_rate": 3.013745846588114e-06, "loss": 0.5492, "step": 20919 }, { "epoch": 0.6411670957459851, "grad_norm": 1.5793217857830553, "learning_rate": 3.0132903804948276e-06, "loss": 0.7052, "step": 20920 }, { "epoch": 0.6411977442687262, "grad_norm": 1.2998465759676447, "learning_rate": 3.012834933977099e-06, "loss": 0.6431, "step": 20921 }, { "epoch": 0.6412283927914675, "grad_norm": 1.3925475525654734, "learning_rate": 3.012379507039418e-06, "loss": 0.6621, "step": 20922 }, { "epoch": 0.6412590413142086, "grad_norm": 1.2942224550160377, "learning_rate": 3.0119240996862693e-06, "loss": 0.5723, "step": 20923 }, { "epoch": 0.6412896898369499, "grad_norm": 1.3145157226910793, "learning_rate": 3.0114687119221398e-06, "loss": 0.6915, "step": 20924 }, { "epoch": 0.641320338359691, "grad_norm": 1.3853929148888462, "learning_rate": 3.0110133437515197e-06, "loss": 0.6091, "step": 20925 }, { "epoch": 0.6413509868824323, "grad_norm": 1.362126290700495, "learning_rate": 3.0105579951788944e-06, "loss": 0.6673, "step": 20926 }, { "epoch": 0.6413816354051735, "grad_norm": 1.454029639373173, "learning_rate": 3.0101026662087474e-06, "loss": 0.6139, "step": 20927 }, { "epoch": 0.6414122839279147, "grad_norm": 0.45258940053488056, "learning_rate": 3.0096473568455716e-06, "loss": 0.3869, "step": 20928 }, { "epoch": 0.6414429324506559, "grad_norm": 1.2823867058677763, "learning_rate": 3.009192067093846e-06, "loss": 0.6182, "step": 20929 }, { "epoch": 0.6414735809733971, "grad_norm": 0.46323956544912637, "learning_rate": 3.0087367969580635e-06, "loss": 0.4348, "step": 20930 }, { "epoch": 0.6415042294961383, "grad_norm": 1.3611649597498778, "learning_rate": 3.008281546442706e-06, "loss": 0.6167, "step": 20931 }, { "epoch": 0.6415348780188795, "grad_norm": 0.451175444454336, "learning_rate": 3.007826315552258e-06, "loss": 0.405, "step": 20932 }, { "epoch": 0.6415655265416207, "grad_norm": 1.5030286867030713, "learning_rate": 3.007371104291209e-06, "loss": 0.7894, "step": 20933 }, { "epoch": 0.641596175064362, "grad_norm": 1.4665053276862412, "learning_rate": 3.0069159126640426e-06, "loss": 0.6842, "step": 20934 }, { "epoch": 0.6416268235871031, "grad_norm": 0.4245166564156806, "learning_rate": 3.006460740675242e-06, "loss": 0.4057, "step": 20935 }, { "epoch": 0.6416574721098443, "grad_norm": 1.3192257364559061, "learning_rate": 3.0060055883292947e-06, "loss": 0.6805, "step": 20936 }, { "epoch": 0.6416881206325855, "grad_norm": 1.178177862303814, "learning_rate": 3.0055504556306842e-06, "loss": 0.538, "step": 20937 }, { "epoch": 0.6417187691553267, "grad_norm": 1.270404906650861, "learning_rate": 3.005095342583896e-06, "loss": 0.5483, "step": 20938 }, { "epoch": 0.6417494176780679, "grad_norm": 1.3651432938443855, "learning_rate": 3.0046402491934147e-06, "loss": 0.6104, "step": 20939 }, { "epoch": 0.6417800662008091, "grad_norm": 0.45533385822139993, "learning_rate": 3.004185175463721e-06, "loss": 0.3938, "step": 20940 }, { "epoch": 0.6418107147235503, "grad_norm": 1.3115227182122167, "learning_rate": 3.003730121399304e-06, "loss": 0.6195, "step": 20941 }, { "epoch": 0.6418413632462915, "grad_norm": 1.3753212267205, "learning_rate": 3.0032750870046435e-06, "loss": 0.6062, "step": 20942 }, { "epoch": 0.6418720117690327, "grad_norm": 1.3601237762906602, "learning_rate": 3.0028200722842243e-06, "loss": 0.698, "step": 20943 }, { "epoch": 0.6419026602917739, "grad_norm": 1.3670399696537772, "learning_rate": 3.0023650772425306e-06, "loss": 0.6552, "step": 20944 }, { "epoch": 0.6419333088145152, "grad_norm": 1.5578153217734116, "learning_rate": 3.0019101018840446e-06, "loss": 0.6365, "step": 20945 }, { "epoch": 0.6419639573372563, "grad_norm": 1.1729031076042384, "learning_rate": 3.001455146213249e-06, "loss": 0.5454, "step": 20946 }, { "epoch": 0.6419946058599976, "grad_norm": 1.357497804441688, "learning_rate": 3.001000210234628e-06, "loss": 0.6724, "step": 20947 }, { "epoch": 0.6420252543827387, "grad_norm": 1.4285344715550323, "learning_rate": 3.0005452939526624e-06, "loss": 0.59, "step": 20948 }, { "epoch": 0.64205590290548, "grad_norm": 1.3885745540513543, "learning_rate": 3.0000903973718365e-06, "loss": 0.6528, "step": 20949 }, { "epoch": 0.6420865514282211, "grad_norm": 1.2190790416669235, "learning_rate": 2.9996355204966334e-06, "loss": 0.5436, "step": 20950 }, { "epoch": 0.6421171999509624, "grad_norm": 1.314285708416898, "learning_rate": 2.999180663331529e-06, "loss": 0.6672, "step": 20951 }, { "epoch": 0.6421478484737035, "grad_norm": 1.3889368016245152, "learning_rate": 2.9987258258810135e-06, "loss": 0.7559, "step": 20952 }, { "epoch": 0.6421784969964448, "grad_norm": 1.3234959101623545, "learning_rate": 2.998271008149563e-06, "loss": 0.597, "step": 20953 }, { "epoch": 0.642209145519186, "grad_norm": 1.257333967588212, "learning_rate": 2.9978162101416595e-06, "loss": 0.5643, "step": 20954 }, { "epoch": 0.6422397940419272, "grad_norm": 1.1948971617084112, "learning_rate": 2.997361431861786e-06, "loss": 0.6243, "step": 20955 }, { "epoch": 0.6422704425646684, "grad_norm": 1.3638837418994174, "learning_rate": 2.9969066733144225e-06, "loss": 0.6921, "step": 20956 }, { "epoch": 0.6423010910874096, "grad_norm": 1.1939321493072235, "learning_rate": 2.9964519345040506e-06, "loss": 0.6415, "step": 20957 }, { "epoch": 0.6423317396101508, "grad_norm": 1.442473901477905, "learning_rate": 2.9959972154351504e-06, "loss": 0.6325, "step": 20958 }, { "epoch": 0.642362388132892, "grad_norm": 1.3500205154536662, "learning_rate": 2.995542516112201e-06, "loss": 0.6645, "step": 20959 }, { "epoch": 0.6423930366556332, "grad_norm": 1.1747827670227007, "learning_rate": 2.9950878365396865e-06, "loss": 0.5939, "step": 20960 }, { "epoch": 0.6424236851783744, "grad_norm": 1.219244892949637, "learning_rate": 2.994633176722083e-06, "loss": 0.556, "step": 20961 }, { "epoch": 0.6424543337011156, "grad_norm": 0.4532470361418872, "learning_rate": 2.9941785366638715e-06, "loss": 0.4039, "step": 20962 }, { "epoch": 0.6424849822238569, "grad_norm": 1.2810101499338689, "learning_rate": 2.993723916369533e-06, "loss": 0.5841, "step": 20963 }, { "epoch": 0.642515630746598, "grad_norm": 1.3488665003445848, "learning_rate": 2.9932693158435444e-06, "loss": 0.5802, "step": 20964 }, { "epoch": 0.6425462792693393, "grad_norm": 1.4992760781802426, "learning_rate": 2.9928147350903887e-06, "loss": 0.6206, "step": 20965 }, { "epoch": 0.6425769277920804, "grad_norm": 1.5936491764646619, "learning_rate": 2.9923601741145427e-06, "loss": 0.7052, "step": 20966 }, { "epoch": 0.6426075763148216, "grad_norm": 0.449811959458842, "learning_rate": 2.9919056329204845e-06, "loss": 0.392, "step": 20967 }, { "epoch": 0.6426382248375628, "grad_norm": 1.4967211511538567, "learning_rate": 2.991451111512694e-06, "loss": 0.6723, "step": 20968 }, { "epoch": 0.642668873360304, "grad_norm": 1.2830988327274342, "learning_rate": 2.9909966098956515e-06, "loss": 0.5781, "step": 20969 }, { "epoch": 0.6426995218830452, "grad_norm": 1.3833667889824341, "learning_rate": 2.9905421280738307e-06, "loss": 0.6826, "step": 20970 }, { "epoch": 0.6427301704057864, "grad_norm": 1.3410918374758813, "learning_rate": 2.9900876660517155e-06, "loss": 0.5921, "step": 20971 }, { "epoch": 0.6427608189285277, "grad_norm": 1.4017175553798409, "learning_rate": 2.989633223833779e-06, "loss": 0.6284, "step": 20972 }, { "epoch": 0.6427914674512688, "grad_norm": 1.3471740176508136, "learning_rate": 2.9891788014245e-06, "loss": 0.5882, "step": 20973 }, { "epoch": 0.6428221159740101, "grad_norm": 1.2615981004655745, "learning_rate": 2.9887243988283576e-06, "loss": 0.6512, "step": 20974 }, { "epoch": 0.6428527644967512, "grad_norm": 1.26692992147831, "learning_rate": 2.988270016049828e-06, "loss": 0.5824, "step": 20975 }, { "epoch": 0.6428834130194925, "grad_norm": 1.3803948785589115, "learning_rate": 2.9878156530933887e-06, "loss": 0.6982, "step": 20976 }, { "epoch": 0.6429140615422336, "grad_norm": 1.4074913693134816, "learning_rate": 2.9873613099635166e-06, "loss": 0.5949, "step": 20977 }, { "epoch": 0.6429447100649749, "grad_norm": 1.2806632512582217, "learning_rate": 2.9869069866646882e-06, "loss": 0.6827, "step": 20978 }, { "epoch": 0.642975358587716, "grad_norm": 1.1369804775294632, "learning_rate": 2.9864526832013813e-06, "loss": 0.6166, "step": 20979 }, { "epoch": 0.6430060071104573, "grad_norm": 1.1017661235933027, "learning_rate": 2.985998399578072e-06, "loss": 0.6116, "step": 20980 }, { "epoch": 0.6430366556331985, "grad_norm": 1.5989799438614367, "learning_rate": 2.9855441357992333e-06, "loss": 0.6998, "step": 20981 }, { "epoch": 0.6430673041559397, "grad_norm": 1.3934555768695616, "learning_rate": 2.985089891869346e-06, "loss": 0.5957, "step": 20982 }, { "epoch": 0.6430979526786809, "grad_norm": 1.4014995018083511, "learning_rate": 2.9846356677928805e-06, "loss": 0.678, "step": 20983 }, { "epoch": 0.6431286012014221, "grad_norm": 1.2636095031083607, "learning_rate": 2.9841814635743188e-06, "loss": 0.6488, "step": 20984 }, { "epoch": 0.6431592497241633, "grad_norm": 1.8454391289466476, "learning_rate": 2.9837272792181316e-06, "loss": 0.5836, "step": 20985 }, { "epoch": 0.6431898982469045, "grad_norm": 1.5695314176967625, "learning_rate": 2.983273114728795e-06, "loss": 0.6768, "step": 20986 }, { "epoch": 0.6432205467696457, "grad_norm": 1.3339098082463, "learning_rate": 2.9828189701107848e-06, "loss": 0.6634, "step": 20987 }, { "epoch": 0.643251195292387, "grad_norm": 1.498683600472084, "learning_rate": 2.982364845368576e-06, "loss": 0.6573, "step": 20988 }, { "epoch": 0.6432818438151281, "grad_norm": 1.4580686002290963, "learning_rate": 2.9819107405066407e-06, "loss": 0.5799, "step": 20989 }, { "epoch": 0.6433124923378694, "grad_norm": 1.4693745286751752, "learning_rate": 2.9814566555294567e-06, "loss": 0.673, "step": 20990 }, { "epoch": 0.6433431408606105, "grad_norm": 1.2535748468859265, "learning_rate": 2.9810025904414962e-06, "loss": 0.6251, "step": 20991 }, { "epoch": 0.6433737893833518, "grad_norm": 1.3156437700245853, "learning_rate": 2.980548545247234e-06, "loss": 0.6083, "step": 20992 }, { "epoch": 0.6434044379060929, "grad_norm": 1.3461786366441704, "learning_rate": 2.9800945199511453e-06, "loss": 0.643, "step": 20993 }, { "epoch": 0.6434350864288342, "grad_norm": 1.407052620639328, "learning_rate": 2.979640514557699e-06, "loss": 0.6663, "step": 20994 }, { "epoch": 0.6434657349515753, "grad_norm": 0.47643995817745954, "learning_rate": 2.979186529071374e-06, "loss": 0.3989, "step": 20995 }, { "epoch": 0.6434963834743166, "grad_norm": 1.384833448750656, "learning_rate": 2.978732563496641e-06, "loss": 0.6667, "step": 20996 }, { "epoch": 0.6435270319970577, "grad_norm": 1.3255815846800238, "learning_rate": 2.978278617837972e-06, "loss": 0.6097, "step": 20997 }, { "epoch": 0.6435576805197989, "grad_norm": 1.3379963760100737, "learning_rate": 2.9778246920998418e-06, "loss": 0.4959, "step": 20998 }, { "epoch": 0.6435883290425402, "grad_norm": 1.1699228181499242, "learning_rate": 2.9773707862867217e-06, "loss": 0.5531, "step": 20999 }, { "epoch": 0.6436189775652813, "grad_norm": 1.2200374660744864, "learning_rate": 2.976916900403085e-06, "loss": 0.615, "step": 21000 }, { "epoch": 0.6436496260880226, "grad_norm": 1.2393506071388458, "learning_rate": 2.976463034453404e-06, "loss": 0.6226, "step": 21001 }, { "epoch": 0.6436802746107637, "grad_norm": 1.4112017780779367, "learning_rate": 2.9760091884421493e-06, "loss": 0.6316, "step": 21002 }, { "epoch": 0.643710923133505, "grad_norm": 1.1171045549863465, "learning_rate": 2.9755553623737952e-06, "loss": 0.5489, "step": 21003 }, { "epoch": 0.6437415716562461, "grad_norm": 1.3400074211165416, "learning_rate": 2.975101556252814e-06, "loss": 0.6037, "step": 21004 }, { "epoch": 0.6437722201789874, "grad_norm": 0.4264060178066262, "learning_rate": 2.9746477700836717e-06, "loss": 0.3878, "step": 21005 }, { "epoch": 0.6438028687017285, "grad_norm": 1.234720470998406, "learning_rate": 2.974194003870845e-06, "loss": 0.6882, "step": 21006 }, { "epoch": 0.6438335172244698, "grad_norm": 1.284903774295544, "learning_rate": 2.9737402576188036e-06, "loss": 0.6582, "step": 21007 }, { "epoch": 0.643864165747211, "grad_norm": 1.174969295510644, "learning_rate": 2.9732865313320157e-06, "loss": 0.5735, "step": 21008 }, { "epoch": 0.6438948142699522, "grad_norm": 1.2722869354061508, "learning_rate": 2.9728328250149557e-06, "loss": 0.5645, "step": 21009 }, { "epoch": 0.6439254627926934, "grad_norm": 1.2899160787137263, "learning_rate": 2.9723791386720915e-06, "loss": 0.6728, "step": 21010 }, { "epoch": 0.6439561113154346, "grad_norm": 1.2635722712784097, "learning_rate": 2.9719254723078956e-06, "loss": 0.5662, "step": 21011 }, { "epoch": 0.6439867598381758, "grad_norm": 1.5929218021492977, "learning_rate": 2.9714718259268383e-06, "loss": 0.628, "step": 21012 }, { "epoch": 0.644017408360917, "grad_norm": 1.2800929113444162, "learning_rate": 2.9710181995333843e-06, "loss": 0.6284, "step": 21013 }, { "epoch": 0.6440480568836582, "grad_norm": 1.221980710333308, "learning_rate": 2.9705645931320103e-06, "loss": 0.5445, "step": 21014 }, { "epoch": 0.6440787054063994, "grad_norm": 0.4553706282557772, "learning_rate": 2.970111006727182e-06, "loss": 0.3993, "step": 21015 }, { "epoch": 0.6441093539291406, "grad_norm": 1.5990797529218697, "learning_rate": 2.969657440323368e-06, "loss": 0.6333, "step": 21016 }, { "epoch": 0.6441400024518819, "grad_norm": 1.4402651445096744, "learning_rate": 2.96920389392504e-06, "loss": 0.6651, "step": 21017 }, { "epoch": 0.644170650974623, "grad_norm": 0.48048304981016005, "learning_rate": 2.9687503675366643e-06, "loss": 0.3825, "step": 21018 }, { "epoch": 0.6442012994973643, "grad_norm": 1.2194972816187224, "learning_rate": 2.9682968611627116e-06, "loss": 0.5974, "step": 21019 }, { "epoch": 0.6442319480201054, "grad_norm": 1.4888272771231532, "learning_rate": 2.9678433748076508e-06, "loss": 0.6851, "step": 21020 }, { "epoch": 0.6442625965428467, "grad_norm": 1.2750550720157152, "learning_rate": 2.9673899084759474e-06, "loss": 0.7133, "step": 21021 }, { "epoch": 0.6442932450655878, "grad_norm": 1.3193232711523548, "learning_rate": 2.9669364621720725e-06, "loss": 0.6476, "step": 21022 }, { "epoch": 0.6443238935883291, "grad_norm": 1.287760060418471, "learning_rate": 2.9664830359004937e-06, "loss": 0.6275, "step": 21023 }, { "epoch": 0.6443545421110702, "grad_norm": 1.19032449347384, "learning_rate": 2.9660296296656754e-06, "loss": 0.6374, "step": 21024 }, { "epoch": 0.6443851906338115, "grad_norm": 0.45712828531909194, "learning_rate": 2.9655762434720904e-06, "loss": 0.407, "step": 21025 }, { "epoch": 0.6444158391565527, "grad_norm": 1.2342435626659716, "learning_rate": 2.9651228773242015e-06, "loss": 0.5257, "step": 21026 }, { "epoch": 0.6444464876792939, "grad_norm": 1.5966445926560466, "learning_rate": 2.9646695312264774e-06, "loss": 0.619, "step": 21027 }, { "epoch": 0.6444771362020351, "grad_norm": 1.5250796139970524, "learning_rate": 2.9642162051833858e-06, "loss": 0.6033, "step": 21028 }, { "epoch": 0.6445077847247762, "grad_norm": 1.2352679866773069, "learning_rate": 2.9637628991993917e-06, "loss": 0.6741, "step": 21029 }, { "epoch": 0.6445384332475175, "grad_norm": 1.2765218739135427, "learning_rate": 2.9633096132789636e-06, "loss": 0.5878, "step": 21030 }, { "epoch": 0.6445690817702586, "grad_norm": 0.4353900330358619, "learning_rate": 2.962856347426567e-06, "loss": 0.4041, "step": 21031 }, { "epoch": 0.6445997302929999, "grad_norm": 1.2836620976077318, "learning_rate": 2.962403101646667e-06, "loss": 0.5497, "step": 21032 }, { "epoch": 0.644630378815741, "grad_norm": 0.46412244832134547, "learning_rate": 2.9619498759437325e-06, "loss": 0.411, "step": 21033 }, { "epoch": 0.6446610273384823, "grad_norm": 1.3618365996971993, "learning_rate": 2.9614966703222274e-06, "loss": 0.6929, "step": 21034 }, { "epoch": 0.6446916758612234, "grad_norm": 1.1448111084011983, "learning_rate": 2.9610434847866144e-06, "loss": 0.6204, "step": 21035 }, { "epoch": 0.6447223243839647, "grad_norm": 1.2468852036714864, "learning_rate": 2.9605903193413647e-06, "loss": 0.6415, "step": 21036 }, { "epoch": 0.6447529729067059, "grad_norm": 1.2159955260317747, "learning_rate": 2.9601371739909374e-06, "loss": 0.546, "step": 21037 }, { "epoch": 0.6447836214294471, "grad_norm": 0.4313380475340932, "learning_rate": 2.9596840487398036e-06, "loss": 0.3814, "step": 21038 }, { "epoch": 0.6448142699521883, "grad_norm": 1.4304695340315379, "learning_rate": 2.959230943592424e-06, "loss": 0.6253, "step": 21039 }, { "epoch": 0.6448449184749295, "grad_norm": 1.3661319319753684, "learning_rate": 2.9587778585532623e-06, "loss": 0.6108, "step": 21040 }, { "epoch": 0.6448755669976707, "grad_norm": 1.1780756614879204, "learning_rate": 2.9583247936267863e-06, "loss": 0.5246, "step": 21041 }, { "epoch": 0.6449062155204119, "grad_norm": 1.120481726536984, "learning_rate": 2.957871748817458e-06, "loss": 0.6163, "step": 21042 }, { "epoch": 0.6449368640431531, "grad_norm": 1.3183293665676403, "learning_rate": 2.9574187241297413e-06, "loss": 0.667, "step": 21043 }, { "epoch": 0.6449675125658944, "grad_norm": 1.474847790124274, "learning_rate": 2.9569657195681013e-06, "loss": 0.7808, "step": 21044 }, { "epoch": 0.6449981610886355, "grad_norm": 1.3137782576607813, "learning_rate": 2.9565127351370005e-06, "loss": 0.6835, "step": 21045 }, { "epoch": 0.6450288096113768, "grad_norm": 1.3171235948744677, "learning_rate": 2.956059770840902e-06, "loss": 0.5642, "step": 21046 }, { "epoch": 0.6450594581341179, "grad_norm": 1.272497336983382, "learning_rate": 2.955606826684272e-06, "loss": 0.659, "step": 21047 }, { "epoch": 0.6450901066568592, "grad_norm": 0.46546368190257, "learning_rate": 2.9551539026715675e-06, "loss": 0.4131, "step": 21048 }, { "epoch": 0.6451207551796003, "grad_norm": 1.3858813021677308, "learning_rate": 2.954700998807258e-06, "loss": 0.7174, "step": 21049 }, { "epoch": 0.6451514037023416, "grad_norm": 1.1256247559577068, "learning_rate": 2.9542481150958013e-06, "loss": 0.5839, "step": 21050 }, { "epoch": 0.6451820522250827, "grad_norm": 1.3783313836855826, "learning_rate": 2.9537952515416613e-06, "loss": 0.5988, "step": 21051 }, { "epoch": 0.645212700747824, "grad_norm": 1.2959796149082097, "learning_rate": 2.9533424081493e-06, "loss": 0.5737, "step": 21052 }, { "epoch": 0.6452433492705651, "grad_norm": 1.1840383482458996, "learning_rate": 2.952889584923181e-06, "loss": 0.6333, "step": 21053 }, { "epoch": 0.6452739977933064, "grad_norm": 0.4486491138265973, "learning_rate": 2.952436781867762e-06, "loss": 0.3996, "step": 21054 }, { "epoch": 0.6453046463160476, "grad_norm": 1.2744770368494036, "learning_rate": 2.95198399898751e-06, "loss": 0.7019, "step": 21055 }, { "epoch": 0.6453352948387888, "grad_norm": 1.3130147589884382, "learning_rate": 2.951531236286882e-06, "loss": 0.5568, "step": 21056 }, { "epoch": 0.64536594336153, "grad_norm": 0.43520418901794305, "learning_rate": 2.951078493770343e-06, "loss": 0.4005, "step": 21057 }, { "epoch": 0.6453965918842712, "grad_norm": 0.43054699782687766, "learning_rate": 2.9506257714423504e-06, "loss": 0.3974, "step": 21058 }, { "epoch": 0.6454272404070124, "grad_norm": 1.2462658887712001, "learning_rate": 2.9501730693073656e-06, "loss": 0.6212, "step": 21059 }, { "epoch": 0.6454578889297535, "grad_norm": 1.2538218419930827, "learning_rate": 2.949720387369851e-06, "loss": 0.6858, "step": 21060 }, { "epoch": 0.6454885374524948, "grad_norm": 0.4537092789633136, "learning_rate": 2.9492677256342663e-06, "loss": 0.4061, "step": 21061 }, { "epoch": 0.6455191859752359, "grad_norm": 1.1824469741452504, "learning_rate": 2.94881508410507e-06, "loss": 0.5529, "step": 21062 }, { "epoch": 0.6455498344979772, "grad_norm": 1.4068763227362275, "learning_rate": 2.9483624627867246e-06, "loss": 0.6574, "step": 21063 }, { "epoch": 0.6455804830207184, "grad_norm": 1.1824628540048439, "learning_rate": 2.9479098616836876e-06, "loss": 0.5923, "step": 21064 }, { "epoch": 0.6456111315434596, "grad_norm": 1.4782224242279909, "learning_rate": 2.947457280800421e-06, "loss": 0.7377, "step": 21065 }, { "epoch": 0.6456417800662008, "grad_norm": 0.4275792278480398, "learning_rate": 2.9470047201413843e-06, "loss": 0.379, "step": 21066 }, { "epoch": 0.645672428588942, "grad_norm": 1.5630778364347406, "learning_rate": 2.946552179711032e-06, "loss": 0.6199, "step": 21067 }, { "epoch": 0.6457030771116832, "grad_norm": 1.2338882984517685, "learning_rate": 2.94609965951383e-06, "loss": 0.5664, "step": 21068 }, { "epoch": 0.6457337256344244, "grad_norm": 1.3691572086526176, "learning_rate": 2.9456471595542317e-06, "loss": 0.6263, "step": 21069 }, { "epoch": 0.6457643741571656, "grad_norm": 1.1290870531650001, "learning_rate": 2.945194679836697e-06, "loss": 0.5384, "step": 21070 }, { "epoch": 0.6457950226799068, "grad_norm": 1.3295911290963345, "learning_rate": 2.944742220365686e-06, "loss": 0.6796, "step": 21071 }, { "epoch": 0.645825671202648, "grad_norm": 1.1836056911414121, "learning_rate": 2.9442897811456545e-06, "loss": 0.5529, "step": 21072 }, { "epoch": 0.6458563197253893, "grad_norm": 1.1222551089783164, "learning_rate": 2.943837362181063e-06, "loss": 0.5996, "step": 21073 }, { "epoch": 0.6458869682481304, "grad_norm": 1.4454334587780364, "learning_rate": 2.9433849634763678e-06, "loss": 0.7202, "step": 21074 }, { "epoch": 0.6459176167708717, "grad_norm": 1.332559147075587, "learning_rate": 2.9429325850360257e-06, "loss": 0.5957, "step": 21075 }, { "epoch": 0.6459482652936128, "grad_norm": 1.3120114356365182, "learning_rate": 2.942480226864497e-06, "loss": 0.644, "step": 21076 }, { "epoch": 0.6459789138163541, "grad_norm": 1.2609615047292948, "learning_rate": 2.9420278889662366e-06, "loss": 0.6639, "step": 21077 }, { "epoch": 0.6460095623390952, "grad_norm": 1.2457388314186935, "learning_rate": 2.9415755713457008e-06, "loss": 0.585, "step": 21078 }, { "epoch": 0.6460402108618365, "grad_norm": 1.3425808519103586, "learning_rate": 2.9411232740073493e-06, "loss": 0.6739, "step": 21079 }, { "epoch": 0.6460708593845776, "grad_norm": 1.451295919471777, "learning_rate": 2.9406709969556356e-06, "loss": 0.591, "step": 21080 }, { "epoch": 0.6461015079073189, "grad_norm": 1.4309607829323414, "learning_rate": 2.940218740195018e-06, "loss": 0.6115, "step": 21081 }, { "epoch": 0.6461321564300601, "grad_norm": 1.3508919708957667, "learning_rate": 2.9397665037299518e-06, "loss": 0.5874, "step": 21082 }, { "epoch": 0.6461628049528013, "grad_norm": 0.48102830879192715, "learning_rate": 2.939314287564893e-06, "loss": 0.4069, "step": 21083 }, { "epoch": 0.6461934534755425, "grad_norm": 1.30473859489032, "learning_rate": 2.9388620917042987e-06, "loss": 0.5882, "step": 21084 }, { "epoch": 0.6462241019982837, "grad_norm": 1.3250388569525366, "learning_rate": 2.9384099161526236e-06, "loss": 0.6509, "step": 21085 }, { "epoch": 0.6462547505210249, "grad_norm": 2.7636121463591925, "learning_rate": 2.937957760914322e-06, "loss": 0.6602, "step": 21086 }, { "epoch": 0.6462853990437661, "grad_norm": 1.3861208117215678, "learning_rate": 2.9375056259938507e-06, "loss": 0.6426, "step": 21087 }, { "epoch": 0.6463160475665073, "grad_norm": 1.1683931946753459, "learning_rate": 2.9370535113956654e-06, "loss": 0.6161, "step": 21088 }, { "epoch": 0.6463466960892486, "grad_norm": 1.3812034521962355, "learning_rate": 2.9366014171242173e-06, "loss": 0.6076, "step": 21089 }, { "epoch": 0.6463773446119897, "grad_norm": 1.36020977350563, "learning_rate": 2.936149343183966e-06, "loss": 0.6214, "step": 21090 }, { "epoch": 0.6464079931347309, "grad_norm": 1.177359511656109, "learning_rate": 2.93569728957936e-06, "loss": 0.6368, "step": 21091 }, { "epoch": 0.6464386416574721, "grad_norm": 1.4803189331842637, "learning_rate": 2.9352452563148605e-06, "loss": 0.5378, "step": 21092 }, { "epoch": 0.6464692901802133, "grad_norm": 1.294606787261456, "learning_rate": 2.934793243394916e-06, "loss": 0.5877, "step": 21093 }, { "epoch": 0.6464999387029545, "grad_norm": 1.2738242992936168, "learning_rate": 2.9343412508239817e-06, "loss": 0.5598, "step": 21094 }, { "epoch": 0.6465305872256957, "grad_norm": 1.2916174311000468, "learning_rate": 2.9338892786065123e-06, "loss": 0.567, "step": 21095 }, { "epoch": 0.6465612357484369, "grad_norm": 1.3679901736460767, "learning_rate": 2.93343732674696e-06, "loss": 0.6441, "step": 21096 }, { "epoch": 0.6465918842711781, "grad_norm": 1.298270829990295, "learning_rate": 2.932985395249778e-06, "loss": 0.6289, "step": 21097 }, { "epoch": 0.6466225327939193, "grad_norm": 1.3552660612006144, "learning_rate": 2.932533484119421e-06, "loss": 0.6224, "step": 21098 }, { "epoch": 0.6466531813166605, "grad_norm": 1.1788058573076547, "learning_rate": 2.932081593360341e-06, "loss": 0.5558, "step": 21099 }, { "epoch": 0.6466838298394018, "grad_norm": 1.3326243116431156, "learning_rate": 2.9316297229769873e-06, "loss": 0.6684, "step": 21100 }, { "epoch": 0.6467144783621429, "grad_norm": 1.362382357841342, "learning_rate": 2.9311778729738183e-06, "loss": 0.585, "step": 21101 }, { "epoch": 0.6467451268848842, "grad_norm": 0.4580417067848298, "learning_rate": 2.930726043355281e-06, "loss": 0.4103, "step": 21102 }, { "epoch": 0.6467757754076253, "grad_norm": 1.2626443671089107, "learning_rate": 2.9302742341258306e-06, "loss": 0.6469, "step": 21103 }, { "epoch": 0.6468064239303666, "grad_norm": 1.1854230262633167, "learning_rate": 2.9298224452899177e-06, "loss": 0.6495, "step": 21104 }, { "epoch": 0.6468370724531077, "grad_norm": 1.1653855699803155, "learning_rate": 2.9293706768519925e-06, "loss": 0.6084, "step": 21105 }, { "epoch": 0.646867720975849, "grad_norm": 1.3459741619381882, "learning_rate": 2.9289189288165098e-06, "loss": 0.5846, "step": 21106 }, { "epoch": 0.6468983694985901, "grad_norm": 1.501654570826467, "learning_rate": 2.928467201187918e-06, "loss": 0.6109, "step": 21107 }, { "epoch": 0.6469290180213314, "grad_norm": 0.43920501625183095, "learning_rate": 2.928015493970668e-06, "loss": 0.4022, "step": 21108 }, { "epoch": 0.6469596665440726, "grad_norm": 0.4604728513714546, "learning_rate": 2.9275638071692137e-06, "loss": 0.396, "step": 21109 }, { "epoch": 0.6469903150668138, "grad_norm": 1.0894960338473063, "learning_rate": 2.9271121407879997e-06, "loss": 0.608, "step": 21110 }, { "epoch": 0.647020963589555, "grad_norm": 1.2871594203762826, "learning_rate": 2.9266604948314843e-06, "loss": 0.6961, "step": 21111 }, { "epoch": 0.6470516121122962, "grad_norm": 1.3544415202415636, "learning_rate": 2.9262088693041114e-06, "loss": 0.6465, "step": 21112 }, { "epoch": 0.6470822606350374, "grad_norm": 1.4081006370109268, "learning_rate": 2.9257572642103327e-06, "loss": 0.6784, "step": 21113 }, { "epoch": 0.6471129091577786, "grad_norm": 1.3487044577945553, "learning_rate": 2.9253056795545996e-06, "loss": 0.6471, "step": 21114 }, { "epoch": 0.6471435576805198, "grad_norm": 1.288015838320809, "learning_rate": 2.92485411534136e-06, "loss": 0.5556, "step": 21115 }, { "epoch": 0.647174206203261, "grad_norm": 1.2561010148359513, "learning_rate": 2.9244025715750634e-06, "loss": 0.6633, "step": 21116 }, { "epoch": 0.6472048547260022, "grad_norm": 1.2664738513786327, "learning_rate": 2.9239510482601596e-06, "loss": 0.6625, "step": 21117 }, { "epoch": 0.6472355032487435, "grad_norm": 0.4861109453253805, "learning_rate": 2.9234995454010968e-06, "loss": 0.4269, "step": 21118 }, { "epoch": 0.6472661517714846, "grad_norm": 0.45601933181822385, "learning_rate": 2.923048063002325e-06, "loss": 0.397, "step": 21119 }, { "epoch": 0.6472968002942259, "grad_norm": 1.6139989279102462, "learning_rate": 2.9225966010682933e-06, "loss": 0.6094, "step": 21120 }, { "epoch": 0.647327448816967, "grad_norm": 1.3801993908847596, "learning_rate": 2.9221451596034457e-06, "loss": 0.6883, "step": 21121 }, { "epoch": 0.6473580973397082, "grad_norm": 1.5400196975490716, "learning_rate": 2.921693738612236e-06, "loss": 0.6374, "step": 21122 }, { "epoch": 0.6473887458624494, "grad_norm": 1.2299864358278734, "learning_rate": 2.9212423380991093e-06, "loss": 0.608, "step": 21123 }, { "epoch": 0.6474193943851906, "grad_norm": 1.2632184809462499, "learning_rate": 2.9207909580685123e-06, "loss": 0.6035, "step": 21124 }, { "epoch": 0.6474500429079318, "grad_norm": 1.3757899902222375, "learning_rate": 2.9203395985248954e-06, "loss": 0.554, "step": 21125 }, { "epoch": 0.647480691430673, "grad_norm": 1.43438688215059, "learning_rate": 2.919888259472704e-06, "loss": 0.6612, "step": 21126 }, { "epoch": 0.6475113399534143, "grad_norm": 1.164954023777365, "learning_rate": 2.919436940916386e-06, "loss": 0.6031, "step": 21127 }, { "epoch": 0.6475419884761554, "grad_norm": 1.1800309460452338, "learning_rate": 2.918985642860388e-06, "loss": 0.637, "step": 21128 }, { "epoch": 0.6475726369988967, "grad_norm": 1.2718960836836468, "learning_rate": 2.9185343653091557e-06, "loss": 0.5251, "step": 21129 }, { "epoch": 0.6476032855216378, "grad_norm": 1.3512749125362873, "learning_rate": 2.9180831082671396e-06, "loss": 0.6378, "step": 21130 }, { "epoch": 0.6476339340443791, "grad_norm": 0.4588728844626088, "learning_rate": 2.9176318717387808e-06, "loss": 0.3864, "step": 21131 }, { "epoch": 0.6476645825671202, "grad_norm": 1.192782944326673, "learning_rate": 2.917180655728529e-06, "loss": 0.6405, "step": 21132 }, { "epoch": 0.6476952310898615, "grad_norm": 1.339134649938718, "learning_rate": 2.91672946024083e-06, "loss": 0.5587, "step": 21133 }, { "epoch": 0.6477258796126026, "grad_norm": 1.3804213982615843, "learning_rate": 2.9162782852801268e-06, "loss": 0.5606, "step": 21134 }, { "epoch": 0.6477565281353439, "grad_norm": 1.263201879268743, "learning_rate": 2.9158271308508667e-06, "loss": 0.6782, "step": 21135 }, { "epoch": 0.647787176658085, "grad_norm": 0.47914598097552197, "learning_rate": 2.915375996957498e-06, "loss": 0.3934, "step": 21136 }, { "epoch": 0.6478178251808263, "grad_norm": 1.245657872390607, "learning_rate": 2.91492488360446e-06, "loss": 0.6265, "step": 21137 }, { "epoch": 0.6478484737035675, "grad_norm": 1.2116771490973204, "learning_rate": 2.9144737907962036e-06, "loss": 0.688, "step": 21138 }, { "epoch": 0.6478791222263087, "grad_norm": 1.3799397857092195, "learning_rate": 2.914022718537168e-06, "loss": 0.5937, "step": 21139 }, { "epoch": 0.6479097707490499, "grad_norm": 1.333806760299423, "learning_rate": 2.9135716668318e-06, "loss": 0.7157, "step": 21140 }, { "epoch": 0.6479404192717911, "grad_norm": 0.45161247667727855, "learning_rate": 2.9131206356845463e-06, "loss": 0.4032, "step": 21141 }, { "epoch": 0.6479710677945323, "grad_norm": 0.4405868288939333, "learning_rate": 2.912669625099847e-06, "loss": 0.4139, "step": 21142 }, { "epoch": 0.6480017163172735, "grad_norm": 1.3338109734933683, "learning_rate": 2.912218635082148e-06, "loss": 0.5545, "step": 21143 }, { "epoch": 0.6480323648400147, "grad_norm": 1.1746010263642075, "learning_rate": 2.911767665635895e-06, "loss": 0.625, "step": 21144 }, { "epoch": 0.648063013362756, "grad_norm": 1.3936118061269152, "learning_rate": 2.9113167167655277e-06, "loss": 0.6536, "step": 21145 }, { "epoch": 0.6480936618854971, "grad_norm": 1.2135046511685628, "learning_rate": 2.9108657884754908e-06, "loss": 0.6617, "step": 21146 }, { "epoch": 0.6481243104082384, "grad_norm": 1.5101190616832634, "learning_rate": 2.9104148807702302e-06, "loss": 0.7391, "step": 21147 }, { "epoch": 0.6481549589309795, "grad_norm": 1.2891482450345302, "learning_rate": 2.9099639936541845e-06, "loss": 0.6853, "step": 21148 }, { "epoch": 0.6481856074537208, "grad_norm": 1.2567436766499085, "learning_rate": 2.9095131271318e-06, "loss": 0.4871, "step": 21149 }, { "epoch": 0.6482162559764619, "grad_norm": 1.2415410309322343, "learning_rate": 2.909062281207515e-06, "loss": 0.6267, "step": 21150 }, { "epoch": 0.6482469044992032, "grad_norm": 1.369061069138921, "learning_rate": 2.908611455885775e-06, "loss": 0.6615, "step": 21151 }, { "epoch": 0.6482775530219443, "grad_norm": 1.2099087538258864, "learning_rate": 2.9081606511710236e-06, "loss": 0.6098, "step": 21152 }, { "epoch": 0.6483082015446855, "grad_norm": 1.2854453694664898, "learning_rate": 2.9077098670676983e-06, "loss": 0.6151, "step": 21153 }, { "epoch": 0.6483388500674268, "grad_norm": 1.3697509796013083, "learning_rate": 2.9072591035802435e-06, "loss": 0.5903, "step": 21154 }, { "epoch": 0.6483694985901679, "grad_norm": 1.3673823935283556, "learning_rate": 2.9068083607131015e-06, "loss": 0.5763, "step": 21155 }, { "epoch": 0.6484001471129092, "grad_norm": 1.3102099968873346, "learning_rate": 2.9063576384707103e-06, "loss": 0.5984, "step": 21156 }, { "epoch": 0.6484307956356503, "grad_norm": 1.3175351382788083, "learning_rate": 2.905906936857513e-06, "loss": 0.7001, "step": 21157 }, { "epoch": 0.6484614441583916, "grad_norm": 1.3156087676072308, "learning_rate": 2.9054562558779524e-06, "loss": 0.7234, "step": 21158 }, { "epoch": 0.6484920926811327, "grad_norm": 1.4342933933614572, "learning_rate": 2.905005595536465e-06, "loss": 0.6695, "step": 21159 }, { "epoch": 0.648522741203874, "grad_norm": 1.3800536675163038, "learning_rate": 2.9045549558374965e-06, "loss": 0.5406, "step": 21160 }, { "epoch": 0.6485533897266151, "grad_norm": 1.2071722439457109, "learning_rate": 2.9041043367854815e-06, "loss": 0.6057, "step": 21161 }, { "epoch": 0.6485840382493564, "grad_norm": 1.5279436717019923, "learning_rate": 2.903653738384863e-06, "loss": 0.698, "step": 21162 }, { "epoch": 0.6486146867720975, "grad_norm": 1.3262092702816983, "learning_rate": 2.9032031606400825e-06, "loss": 0.6381, "step": 21163 }, { "epoch": 0.6486453352948388, "grad_norm": 1.6314685038211718, "learning_rate": 2.9027526035555754e-06, "loss": 0.5225, "step": 21164 }, { "epoch": 0.64867598381758, "grad_norm": 1.0993113187693262, "learning_rate": 2.9023020671357837e-06, "loss": 0.6114, "step": 21165 }, { "epoch": 0.6487066323403212, "grad_norm": 1.4081337272558094, "learning_rate": 2.9018515513851487e-06, "loss": 0.6214, "step": 21166 }, { "epoch": 0.6487372808630624, "grad_norm": 1.3453310352340826, "learning_rate": 2.901401056308105e-06, "loss": 0.7158, "step": 21167 }, { "epoch": 0.6487679293858036, "grad_norm": 1.4789235119975237, "learning_rate": 2.9009505819090934e-06, "loss": 0.6719, "step": 21168 }, { "epoch": 0.6487985779085448, "grad_norm": 1.2158532701287224, "learning_rate": 2.9005001281925548e-06, "loss": 0.5538, "step": 21169 }, { "epoch": 0.648829226431286, "grad_norm": 1.303041069334947, "learning_rate": 2.9000496951629244e-06, "loss": 0.7034, "step": 21170 }, { "epoch": 0.6488598749540272, "grad_norm": 1.3694418729911613, "learning_rate": 2.8995992828246428e-06, "loss": 0.6046, "step": 21171 }, { "epoch": 0.6488905234767685, "grad_norm": 1.3554368707675062, "learning_rate": 2.8991488911821455e-06, "loss": 0.6214, "step": 21172 }, { "epoch": 0.6489211719995096, "grad_norm": 1.2811638133627183, "learning_rate": 2.898698520239871e-06, "loss": 0.67, "step": 21173 }, { "epoch": 0.6489518205222509, "grad_norm": 1.2628757366123582, "learning_rate": 2.8982481700022604e-06, "loss": 0.6674, "step": 21174 }, { "epoch": 0.648982469044992, "grad_norm": 1.2302384449348231, "learning_rate": 2.8977978404737458e-06, "loss": 0.6388, "step": 21175 }, { "epoch": 0.6490131175677333, "grad_norm": 1.4009457005989865, "learning_rate": 2.8973475316587667e-06, "loss": 0.6288, "step": 21176 }, { "epoch": 0.6490437660904744, "grad_norm": 1.2367502974875044, "learning_rate": 2.8968972435617624e-06, "loss": 0.647, "step": 21177 }, { "epoch": 0.6490744146132157, "grad_norm": 1.6194777011014483, "learning_rate": 2.896446976187166e-06, "loss": 0.6175, "step": 21178 }, { "epoch": 0.6491050631359568, "grad_norm": 0.48445790025461144, "learning_rate": 2.8959967295394183e-06, "loss": 0.4234, "step": 21179 }, { "epoch": 0.6491357116586981, "grad_norm": 1.2498182593163345, "learning_rate": 2.8955465036229503e-06, "loss": 0.6357, "step": 21180 }, { "epoch": 0.6491663601814393, "grad_norm": 1.0874443479082694, "learning_rate": 2.8950962984422015e-06, "loss": 0.5838, "step": 21181 }, { "epoch": 0.6491970087041805, "grad_norm": 1.2513157374146144, "learning_rate": 2.894646114001609e-06, "loss": 0.4364, "step": 21182 }, { "epoch": 0.6492276572269217, "grad_norm": 1.179693050646231, "learning_rate": 2.8941959503056053e-06, "loss": 0.5766, "step": 21183 }, { "epoch": 0.6492583057496628, "grad_norm": 1.3264916868765202, "learning_rate": 2.8937458073586276e-06, "loss": 0.6235, "step": 21184 }, { "epoch": 0.6492889542724041, "grad_norm": 1.3044538620381363, "learning_rate": 2.893295685165114e-06, "loss": 0.6229, "step": 21185 }, { "epoch": 0.6493196027951452, "grad_norm": 1.2497262479169327, "learning_rate": 2.8928455837294944e-06, "loss": 0.5517, "step": 21186 }, { "epoch": 0.6493502513178865, "grad_norm": 1.4105738616108214, "learning_rate": 2.892395503056207e-06, "loss": 0.6189, "step": 21187 }, { "epoch": 0.6493808998406276, "grad_norm": 0.45515787518089285, "learning_rate": 2.891945443149687e-06, "loss": 0.3844, "step": 21188 }, { "epoch": 0.6494115483633689, "grad_norm": 1.2306163174858886, "learning_rate": 2.891495404014366e-06, "loss": 0.4872, "step": 21189 }, { "epoch": 0.64944219688611, "grad_norm": 1.3458966991470231, "learning_rate": 2.891045385654683e-06, "loss": 0.6088, "step": 21190 }, { "epoch": 0.6494728454088513, "grad_norm": 1.149088970766728, "learning_rate": 2.8905953880750646e-06, "loss": 0.6379, "step": 21191 }, { "epoch": 0.6495034939315925, "grad_norm": 0.46380923077896974, "learning_rate": 2.8901454112799543e-06, "loss": 0.4131, "step": 21192 }, { "epoch": 0.6495341424543337, "grad_norm": 1.2439250555703212, "learning_rate": 2.889695455273781e-06, "loss": 0.5923, "step": 21193 }, { "epoch": 0.6495647909770749, "grad_norm": 1.3269543155577255, "learning_rate": 2.889245520060976e-06, "loss": 0.6361, "step": 21194 }, { "epoch": 0.6495954394998161, "grad_norm": 1.2194362155783427, "learning_rate": 2.8887956056459755e-06, "loss": 0.5806, "step": 21195 }, { "epoch": 0.6496260880225573, "grad_norm": 0.4332190094440311, "learning_rate": 2.8883457120332136e-06, "loss": 0.4072, "step": 21196 }, { "epoch": 0.6496567365452985, "grad_norm": 1.2992995613863276, "learning_rate": 2.8878958392271194e-06, "loss": 0.5465, "step": 21197 }, { "epoch": 0.6496873850680397, "grad_norm": 0.4670437789538465, "learning_rate": 2.8874459872321283e-06, "loss": 0.402, "step": 21198 }, { "epoch": 0.649718033590781, "grad_norm": 1.3126885557071188, "learning_rate": 2.886996156052673e-06, "loss": 0.616, "step": 21199 }, { "epoch": 0.6497486821135221, "grad_norm": 1.2376229519300088, "learning_rate": 2.8865463456931865e-06, "loss": 0.6526, "step": 21200 }, { "epoch": 0.6497793306362634, "grad_norm": 1.3742048176197104, "learning_rate": 2.886096556158099e-06, "loss": 0.6021, "step": 21201 }, { "epoch": 0.6498099791590045, "grad_norm": 1.3378441403652768, "learning_rate": 2.88564678745184e-06, "loss": 0.6055, "step": 21202 }, { "epoch": 0.6498406276817458, "grad_norm": 1.2537078215140183, "learning_rate": 2.8851970395788474e-06, "loss": 0.6174, "step": 21203 }, { "epoch": 0.6498712762044869, "grad_norm": 1.1430735569020125, "learning_rate": 2.8847473125435497e-06, "loss": 0.6252, "step": 21204 }, { "epoch": 0.6499019247272282, "grad_norm": 1.2333683592526312, "learning_rate": 2.884297606350377e-06, "loss": 0.5974, "step": 21205 }, { "epoch": 0.6499325732499693, "grad_norm": 1.229856203717571, "learning_rate": 2.8838479210037603e-06, "loss": 0.6239, "step": 21206 }, { "epoch": 0.6499632217727106, "grad_norm": 1.1998602710493829, "learning_rate": 2.883398256508133e-06, "loss": 0.5476, "step": 21207 }, { "epoch": 0.6499938702954517, "grad_norm": 1.2714996954439497, "learning_rate": 2.8829486128679234e-06, "loss": 0.6551, "step": 21208 }, { "epoch": 0.650024518818193, "grad_norm": 1.2150064107323184, "learning_rate": 2.8824989900875623e-06, "loss": 0.5897, "step": 21209 }, { "epoch": 0.6500551673409342, "grad_norm": 1.1309076977736852, "learning_rate": 2.8820493881714807e-06, "loss": 0.6036, "step": 21210 }, { "epoch": 0.6500858158636754, "grad_norm": 1.0957144562706442, "learning_rate": 2.8815998071241097e-06, "loss": 0.5038, "step": 21211 }, { "epoch": 0.6501164643864166, "grad_norm": 1.4191832032552028, "learning_rate": 2.881150246949878e-06, "loss": 0.6744, "step": 21212 }, { "epoch": 0.6501471129091578, "grad_norm": 1.155638286930555, "learning_rate": 2.880700707653211e-06, "loss": 0.5725, "step": 21213 }, { "epoch": 0.650177761431899, "grad_norm": 1.277654708010356, "learning_rate": 2.8802511892385466e-06, "loss": 0.6606, "step": 21214 }, { "epoch": 0.6502084099546401, "grad_norm": 1.2725389570076755, "learning_rate": 2.87980169171031e-06, "loss": 0.6089, "step": 21215 }, { "epoch": 0.6502390584773814, "grad_norm": 1.3642764545745973, "learning_rate": 2.879352215072927e-06, "loss": 0.5981, "step": 21216 }, { "epoch": 0.6502697070001225, "grad_norm": 0.46351262484017486, "learning_rate": 2.8789027593308295e-06, "loss": 0.4185, "step": 21217 }, { "epoch": 0.6503003555228638, "grad_norm": 1.2070291599386471, "learning_rate": 2.878453324488446e-06, "loss": 0.6257, "step": 21218 }, { "epoch": 0.650331004045605, "grad_norm": 0.4582740790435222, "learning_rate": 2.878003910550206e-06, "loss": 0.4118, "step": 21219 }, { "epoch": 0.6503616525683462, "grad_norm": 1.2356840024655371, "learning_rate": 2.877554517520535e-06, "loss": 0.5658, "step": 21220 }, { "epoch": 0.6503923010910874, "grad_norm": 1.221636785144763, "learning_rate": 2.8771051454038622e-06, "loss": 0.5484, "step": 21221 }, { "epoch": 0.6504229496138286, "grad_norm": 1.3991949104238999, "learning_rate": 2.876655794204618e-06, "loss": 0.6353, "step": 21222 }, { "epoch": 0.6504535981365698, "grad_norm": 1.3073482961192353, "learning_rate": 2.876206463927227e-06, "loss": 0.5757, "step": 21223 }, { "epoch": 0.650484246659311, "grad_norm": 1.2474076482563448, "learning_rate": 2.8757571545761152e-06, "loss": 0.6056, "step": 21224 }, { "epoch": 0.6505148951820522, "grad_norm": 1.2281718426939168, "learning_rate": 2.875307866155712e-06, "loss": 0.6803, "step": 21225 }, { "epoch": 0.6505455437047934, "grad_norm": 1.232376013220165, "learning_rate": 2.8748585986704437e-06, "loss": 0.6218, "step": 21226 }, { "epoch": 0.6505761922275346, "grad_norm": 1.4404072241927401, "learning_rate": 2.8744093521247396e-06, "loss": 0.7188, "step": 21227 }, { "epoch": 0.6506068407502759, "grad_norm": 1.3710829015066208, "learning_rate": 2.8739601265230216e-06, "loss": 0.6893, "step": 21228 }, { "epoch": 0.650637489273017, "grad_norm": 1.2029365762592148, "learning_rate": 2.873510921869719e-06, "loss": 0.6206, "step": 21229 }, { "epoch": 0.6506681377957583, "grad_norm": 1.3780030053907892, "learning_rate": 2.8730617381692583e-06, "loss": 0.5558, "step": 21230 }, { "epoch": 0.6506987863184994, "grad_norm": 1.3483255190152272, "learning_rate": 2.872612575426066e-06, "loss": 0.724, "step": 21231 }, { "epoch": 0.6507294348412407, "grad_norm": 1.2334378784136313, "learning_rate": 2.8721634336445616e-06, "loss": 0.5976, "step": 21232 }, { "epoch": 0.6507600833639818, "grad_norm": 1.415582630659466, "learning_rate": 2.871714312829179e-06, "loss": 0.6555, "step": 21233 }, { "epoch": 0.6507907318867231, "grad_norm": 1.28503321106884, "learning_rate": 2.87126521298434e-06, "loss": 0.6072, "step": 21234 }, { "epoch": 0.6508213804094642, "grad_norm": 1.3267631122018049, "learning_rate": 2.870816134114468e-06, "loss": 0.686, "step": 21235 }, { "epoch": 0.6508520289322055, "grad_norm": 1.377751151297404, "learning_rate": 2.8703670762239886e-06, "loss": 0.5672, "step": 21236 }, { "epoch": 0.6508826774549467, "grad_norm": 1.4459017688083007, "learning_rate": 2.8699180393173275e-06, "loss": 0.662, "step": 21237 }, { "epoch": 0.6509133259776879, "grad_norm": 1.368466794259777, "learning_rate": 2.8694690233989116e-06, "loss": 0.6928, "step": 21238 }, { "epoch": 0.6509439745004291, "grad_norm": 0.448143529865409, "learning_rate": 2.86902002847316e-06, "loss": 0.3996, "step": 21239 }, { "epoch": 0.6509746230231703, "grad_norm": 1.442471494981188, "learning_rate": 2.8685710545444996e-06, "loss": 0.6147, "step": 21240 }, { "epoch": 0.6510052715459115, "grad_norm": 1.3070448489786586, "learning_rate": 2.8681221016173554e-06, "loss": 0.5703, "step": 21241 }, { "epoch": 0.6510359200686527, "grad_norm": 1.2697036215259934, "learning_rate": 2.86767316969615e-06, "loss": 0.6836, "step": 21242 }, { "epoch": 0.6510665685913939, "grad_norm": 1.1592573775212869, "learning_rate": 2.867224258785303e-06, "loss": 0.7964, "step": 21243 }, { "epoch": 0.6510972171141352, "grad_norm": 1.1916564230465563, "learning_rate": 2.8667753688892442e-06, "loss": 0.5979, "step": 21244 }, { "epoch": 0.6511278656368763, "grad_norm": 1.2568181474417868, "learning_rate": 2.866326500012392e-06, "loss": 0.7199, "step": 21245 }, { "epoch": 0.6511585141596175, "grad_norm": 0.43931870905926934, "learning_rate": 2.865877652159172e-06, "loss": 0.3731, "step": 21246 }, { "epoch": 0.6511891626823587, "grad_norm": 1.4449249556383617, "learning_rate": 2.865428825334004e-06, "loss": 0.6457, "step": 21247 }, { "epoch": 0.6512198112050999, "grad_norm": 1.2395001937974914, "learning_rate": 2.864980019541312e-06, "loss": 0.6152, "step": 21248 }, { "epoch": 0.6512504597278411, "grad_norm": 1.2440347719733307, "learning_rate": 2.8645312347855204e-06, "loss": 0.6787, "step": 21249 }, { "epoch": 0.6512811082505823, "grad_norm": 1.5069741633628526, "learning_rate": 2.8640824710710464e-06, "loss": 0.6596, "step": 21250 }, { "epoch": 0.6513117567733235, "grad_norm": 1.2874858951084667, "learning_rate": 2.8636337284023143e-06, "loss": 0.5513, "step": 21251 }, { "epoch": 0.6513424052960647, "grad_norm": 1.3012596527889448, "learning_rate": 2.863185006783748e-06, "loss": 0.5837, "step": 21252 }, { "epoch": 0.651373053818806, "grad_norm": 1.2516163696097815, "learning_rate": 2.8627363062197664e-06, "loss": 0.5789, "step": 21253 }, { "epoch": 0.6514037023415471, "grad_norm": 1.3796202354267408, "learning_rate": 2.862287626714787e-06, "loss": 0.6377, "step": 21254 }, { "epoch": 0.6514343508642884, "grad_norm": 1.3276225747773456, "learning_rate": 2.861838968273238e-06, "loss": 0.6499, "step": 21255 }, { "epoch": 0.6514649993870295, "grad_norm": 1.2650161322823787, "learning_rate": 2.8613903308995356e-06, "loss": 0.6182, "step": 21256 }, { "epoch": 0.6514956479097708, "grad_norm": 1.2425136043970464, "learning_rate": 2.8609417145981034e-06, "loss": 0.6114, "step": 21257 }, { "epoch": 0.6515262964325119, "grad_norm": 1.4105823630435461, "learning_rate": 2.860493119373357e-06, "loss": 0.6169, "step": 21258 }, { "epoch": 0.6515569449552532, "grad_norm": 1.3133164617570479, "learning_rate": 2.86004454522972e-06, "loss": 0.5573, "step": 21259 }, { "epoch": 0.6515875934779943, "grad_norm": 1.2259148411168963, "learning_rate": 2.859595992171613e-06, "loss": 0.5948, "step": 21260 }, { "epoch": 0.6516182420007356, "grad_norm": 1.3736625812322765, "learning_rate": 2.859147460203453e-06, "loss": 0.6041, "step": 21261 }, { "epoch": 0.6516488905234767, "grad_norm": 1.289425711996147, "learning_rate": 2.8586989493296603e-06, "loss": 0.6357, "step": 21262 }, { "epoch": 0.651679539046218, "grad_norm": 1.160991277128396, "learning_rate": 2.858250459554657e-06, "loss": 0.5535, "step": 21263 }, { "epoch": 0.6517101875689592, "grad_norm": 0.44223354440417983, "learning_rate": 2.857801990882858e-06, "loss": 0.3973, "step": 21264 }, { "epoch": 0.6517408360917004, "grad_norm": 1.3646566719620865, "learning_rate": 2.857353543318684e-06, "loss": 0.6934, "step": 21265 }, { "epoch": 0.6517714846144416, "grad_norm": 1.516151088926927, "learning_rate": 2.856905116866556e-06, "loss": 0.6791, "step": 21266 }, { "epoch": 0.6518021331371828, "grad_norm": 1.310173003929697, "learning_rate": 2.856456711530887e-06, "loss": 0.5937, "step": 21267 }, { "epoch": 0.651832781659924, "grad_norm": 1.2857535885708538, "learning_rate": 2.856008327316102e-06, "loss": 0.6487, "step": 21268 }, { "epoch": 0.6518634301826652, "grad_norm": 1.595363040319836, "learning_rate": 2.855559964226613e-06, "loss": 0.5841, "step": 21269 }, { "epoch": 0.6518940787054064, "grad_norm": 1.2836318050752642, "learning_rate": 2.85511162226684e-06, "loss": 0.6193, "step": 21270 }, { "epoch": 0.6519247272281476, "grad_norm": 1.0600137437900037, "learning_rate": 2.8546633014412035e-06, "loss": 0.5624, "step": 21271 }, { "epoch": 0.6519553757508888, "grad_norm": 1.1194260293571299, "learning_rate": 2.8542150017541158e-06, "loss": 0.5667, "step": 21272 }, { "epoch": 0.6519860242736301, "grad_norm": 1.4369535725816929, "learning_rate": 2.8537667232099975e-06, "loss": 0.6084, "step": 21273 }, { "epoch": 0.6520166727963712, "grad_norm": 1.2912642835692796, "learning_rate": 2.8533184658132662e-06, "loss": 0.6337, "step": 21274 }, { "epoch": 0.6520473213191125, "grad_norm": 1.3395598578689778, "learning_rate": 2.852870229568335e-06, "loss": 0.5833, "step": 21275 }, { "epoch": 0.6520779698418536, "grad_norm": 1.1145171071501645, "learning_rate": 2.8524220144796257e-06, "loss": 0.519, "step": 21276 }, { "epoch": 0.6521086183645948, "grad_norm": 1.277178483935334, "learning_rate": 2.851973820551549e-06, "loss": 0.556, "step": 21277 }, { "epoch": 0.652139266887336, "grad_norm": 1.1771369886213152, "learning_rate": 2.8515256477885247e-06, "loss": 0.6065, "step": 21278 }, { "epoch": 0.6521699154100772, "grad_norm": 1.3551818494254393, "learning_rate": 2.8510774961949694e-06, "loss": 0.6328, "step": 21279 }, { "epoch": 0.6522005639328184, "grad_norm": 1.192528766022881, "learning_rate": 2.8506293657752947e-06, "loss": 0.6131, "step": 21280 }, { "epoch": 0.6522312124555596, "grad_norm": 1.1868514715185015, "learning_rate": 2.85018125653392e-06, "loss": 0.6085, "step": 21281 }, { "epoch": 0.6522618609783009, "grad_norm": 1.3561566869740318, "learning_rate": 2.8497331684752605e-06, "loss": 0.6685, "step": 21282 }, { "epoch": 0.652292509501042, "grad_norm": 1.0758801247777867, "learning_rate": 2.849285101603729e-06, "loss": 0.5689, "step": 21283 }, { "epoch": 0.6523231580237833, "grad_norm": 1.2793374434168747, "learning_rate": 2.848837055923741e-06, "loss": 0.5805, "step": 21284 }, { "epoch": 0.6523538065465244, "grad_norm": 1.4128650564065057, "learning_rate": 2.8483890314397145e-06, "loss": 0.6208, "step": 21285 }, { "epoch": 0.6523844550692657, "grad_norm": 0.4916646120378059, "learning_rate": 2.8479410281560595e-06, "loss": 0.4164, "step": 21286 }, { "epoch": 0.6524151035920068, "grad_norm": 1.2463443222852857, "learning_rate": 2.8474930460771933e-06, "loss": 0.6011, "step": 21287 }, { "epoch": 0.6524457521147481, "grad_norm": 1.2021795343360744, "learning_rate": 2.8470450852075273e-06, "loss": 0.5863, "step": 21288 }, { "epoch": 0.6524764006374892, "grad_norm": 1.292023412168047, "learning_rate": 2.8465971455514774e-06, "loss": 0.614, "step": 21289 }, { "epoch": 0.6525070491602305, "grad_norm": 0.4458240345985851, "learning_rate": 2.8461492271134585e-06, "loss": 0.4135, "step": 21290 }, { "epoch": 0.6525376976829717, "grad_norm": 1.3902665303034387, "learning_rate": 2.8457013298978797e-06, "loss": 0.5532, "step": 21291 }, { "epoch": 0.6525683462057129, "grad_norm": 1.163669936170882, "learning_rate": 2.8452534539091574e-06, "loss": 0.5255, "step": 21292 }, { "epoch": 0.6525989947284541, "grad_norm": 1.3194809971938344, "learning_rate": 2.8448055991517065e-06, "loss": 0.6107, "step": 21293 }, { "epoch": 0.6526296432511953, "grad_norm": 1.3494675765545725, "learning_rate": 2.844357765629935e-06, "loss": 0.5322, "step": 21294 }, { "epoch": 0.6526602917739365, "grad_norm": 1.3291232625480038, "learning_rate": 2.843909953348258e-06, "loss": 0.5645, "step": 21295 }, { "epoch": 0.6526909402966777, "grad_norm": 1.361598580129001, "learning_rate": 2.8434621623110904e-06, "loss": 0.621, "step": 21296 }, { "epoch": 0.6527215888194189, "grad_norm": 1.2259054011877022, "learning_rate": 2.8430143925228394e-06, "loss": 0.5726, "step": 21297 }, { "epoch": 0.6527522373421601, "grad_norm": 0.44293994416716476, "learning_rate": 2.8425666439879207e-06, "loss": 0.408, "step": 21298 }, { "epoch": 0.6527828858649013, "grad_norm": 1.2176961163355517, "learning_rate": 2.8421189167107422e-06, "loss": 0.5904, "step": 21299 }, { "epoch": 0.6528135343876426, "grad_norm": 1.318754852406553, "learning_rate": 2.8416712106957213e-06, "loss": 0.6097, "step": 21300 }, { "epoch": 0.6528441829103837, "grad_norm": 1.2842343363760258, "learning_rate": 2.8412235259472663e-06, "loss": 0.6583, "step": 21301 }, { "epoch": 0.652874831433125, "grad_norm": 1.3435420314343196, "learning_rate": 2.840775862469787e-06, "loss": 0.7379, "step": 21302 }, { "epoch": 0.6529054799558661, "grad_norm": 1.2445189518624182, "learning_rate": 2.840328220267695e-06, "loss": 0.5778, "step": 21303 }, { "epoch": 0.6529361284786074, "grad_norm": 1.1253306099799933, "learning_rate": 2.8398805993454037e-06, "loss": 0.6225, "step": 21304 }, { "epoch": 0.6529667770013485, "grad_norm": 1.2526143072179943, "learning_rate": 2.8394329997073193e-06, "loss": 0.6301, "step": 21305 }, { "epoch": 0.6529974255240898, "grad_norm": 1.2251028581563146, "learning_rate": 2.838985421357855e-06, "loss": 0.5739, "step": 21306 }, { "epoch": 0.6530280740468309, "grad_norm": 1.498210386984621, "learning_rate": 2.8385378643014215e-06, "loss": 0.6079, "step": 21307 }, { "epoch": 0.6530587225695721, "grad_norm": 1.2765118781450198, "learning_rate": 2.838090328542426e-06, "loss": 0.6207, "step": 21308 }, { "epoch": 0.6530893710923134, "grad_norm": 1.3855862076923509, "learning_rate": 2.8376428140852812e-06, "loss": 0.7075, "step": 21309 }, { "epoch": 0.6531200196150545, "grad_norm": 1.4803553631670265, "learning_rate": 2.8371953209343918e-06, "loss": 0.6695, "step": 21310 }, { "epoch": 0.6531506681377958, "grad_norm": 1.168288835055867, "learning_rate": 2.8367478490941737e-06, "loss": 0.6397, "step": 21311 }, { "epoch": 0.6531813166605369, "grad_norm": 1.223013584520624, "learning_rate": 2.8363003985690323e-06, "loss": 0.5418, "step": 21312 }, { "epoch": 0.6532119651832782, "grad_norm": 0.4454823122100223, "learning_rate": 2.8358529693633752e-06, "loss": 0.4037, "step": 21313 }, { "epoch": 0.6532426137060193, "grad_norm": 1.3331949409135124, "learning_rate": 2.835405561481612e-06, "loss": 0.613, "step": 21314 }, { "epoch": 0.6532732622287606, "grad_norm": 1.326206600310021, "learning_rate": 2.834958174928154e-06, "loss": 0.6081, "step": 21315 }, { "epoch": 0.6533039107515017, "grad_norm": 1.167047508024924, "learning_rate": 2.834510809707405e-06, "loss": 0.5741, "step": 21316 }, { "epoch": 0.653334559274243, "grad_norm": 1.4144740084338905, "learning_rate": 2.8340634658237747e-06, "loss": 0.7619, "step": 21317 }, { "epoch": 0.6533652077969841, "grad_norm": 0.45956273312608237, "learning_rate": 2.8336161432816716e-06, "loss": 0.3934, "step": 21318 }, { "epoch": 0.6533958563197254, "grad_norm": 1.3170805735981042, "learning_rate": 2.833168842085505e-06, "loss": 0.6249, "step": 21319 }, { "epoch": 0.6534265048424666, "grad_norm": 1.49195815110963, "learning_rate": 2.8327215622396803e-06, "loss": 0.6494, "step": 21320 }, { "epoch": 0.6534571533652078, "grad_norm": 1.3800515054450226, "learning_rate": 2.8322743037486022e-06, "loss": 0.5754, "step": 21321 }, { "epoch": 0.653487801887949, "grad_norm": 1.2956714658338533, "learning_rate": 2.83182706661668e-06, "loss": 0.6399, "step": 21322 }, { "epoch": 0.6535184504106902, "grad_norm": 1.3347289164101934, "learning_rate": 2.8313798508483226e-06, "loss": 0.6396, "step": 21323 }, { "epoch": 0.6535490989334314, "grad_norm": 1.3309843808466821, "learning_rate": 2.8309326564479328e-06, "loss": 0.7074, "step": 21324 }, { "epoch": 0.6535797474561726, "grad_norm": 1.283410895464732, "learning_rate": 2.830485483419918e-06, "loss": 0.5911, "step": 21325 }, { "epoch": 0.6536103959789138, "grad_norm": 1.2396917250680544, "learning_rate": 2.830038331768685e-06, "loss": 0.5723, "step": 21326 }, { "epoch": 0.653641044501655, "grad_norm": 1.3109760488527114, "learning_rate": 2.8295912014986417e-06, "loss": 0.5995, "step": 21327 }, { "epoch": 0.6536716930243962, "grad_norm": 1.5764000343838804, "learning_rate": 2.8291440926141912e-06, "loss": 0.7022, "step": 21328 }, { "epoch": 0.6537023415471375, "grad_norm": 1.2523376735662535, "learning_rate": 2.828697005119736e-06, "loss": 0.5811, "step": 21329 }, { "epoch": 0.6537329900698786, "grad_norm": 1.3861314695480544, "learning_rate": 2.8282499390196883e-06, "loss": 0.6956, "step": 21330 }, { "epoch": 0.6537636385926199, "grad_norm": 1.3630897931155328, "learning_rate": 2.82780289431845e-06, "loss": 0.6705, "step": 21331 }, { "epoch": 0.653794287115361, "grad_norm": 1.2814732820697836, "learning_rate": 2.827355871020423e-06, "loss": 0.6926, "step": 21332 }, { "epoch": 0.6538249356381023, "grad_norm": 1.2510187366758116, "learning_rate": 2.826908869130015e-06, "loss": 0.58, "step": 21333 }, { "epoch": 0.6538555841608434, "grad_norm": 1.119203885295703, "learning_rate": 2.8264618886516315e-06, "loss": 0.5053, "step": 21334 }, { "epoch": 0.6538862326835847, "grad_norm": 1.2907817841791869, "learning_rate": 2.8260149295896734e-06, "loss": 0.6479, "step": 21335 }, { "epoch": 0.6539168812063259, "grad_norm": 1.249924634327277, "learning_rate": 2.825567991948546e-06, "loss": 0.6317, "step": 21336 }, { "epoch": 0.6539475297290671, "grad_norm": 0.4536015676828102, "learning_rate": 2.825121075732654e-06, "loss": 0.4184, "step": 21337 }, { "epoch": 0.6539781782518083, "grad_norm": 1.364330846362064, "learning_rate": 2.8246741809464024e-06, "loss": 0.5577, "step": 21338 }, { "epoch": 0.6540088267745494, "grad_norm": 1.2196732977402098, "learning_rate": 2.824227307594193e-06, "loss": 0.6531, "step": 21339 }, { "epoch": 0.6540394752972907, "grad_norm": 1.2426474601076085, "learning_rate": 2.823780455680424e-06, "loss": 0.644, "step": 21340 }, { "epoch": 0.6540701238200318, "grad_norm": 1.3738479623415987, "learning_rate": 2.8233336252095073e-06, "loss": 0.6113, "step": 21341 }, { "epoch": 0.6541007723427731, "grad_norm": 1.3781914517977243, "learning_rate": 2.8228868161858413e-06, "loss": 0.6256, "step": 21342 }, { "epoch": 0.6541314208655142, "grad_norm": 1.2800875006617316, "learning_rate": 2.8224400286138264e-06, "loss": 0.6141, "step": 21343 }, { "epoch": 0.6541620693882555, "grad_norm": 1.1777205173439533, "learning_rate": 2.8219932624978675e-06, "loss": 0.6444, "step": 21344 }, { "epoch": 0.6541927179109966, "grad_norm": 1.1565482737085717, "learning_rate": 2.8215465178423663e-06, "loss": 0.6147, "step": 21345 }, { "epoch": 0.6542233664337379, "grad_norm": 1.309476926656654, "learning_rate": 2.821099794651726e-06, "loss": 0.5806, "step": 21346 }, { "epoch": 0.6542540149564791, "grad_norm": 0.4602250399591755, "learning_rate": 2.820653092930345e-06, "loss": 0.3958, "step": 21347 }, { "epoch": 0.6542846634792203, "grad_norm": 1.4028054509546235, "learning_rate": 2.820206412682627e-06, "loss": 0.5836, "step": 21348 }, { "epoch": 0.6543153120019615, "grad_norm": 1.5245242514889665, "learning_rate": 2.819759753912975e-06, "loss": 0.6344, "step": 21349 }, { "epoch": 0.6543459605247027, "grad_norm": 1.2158811914799736, "learning_rate": 2.8193131166257875e-06, "loss": 0.6172, "step": 21350 }, { "epoch": 0.6543766090474439, "grad_norm": 1.4160918504233921, "learning_rate": 2.8188665008254622e-06, "loss": 0.7434, "step": 21351 }, { "epoch": 0.6544072575701851, "grad_norm": 1.2688581284473346, "learning_rate": 2.8184199065164076e-06, "loss": 0.6064, "step": 21352 }, { "epoch": 0.6544379060929263, "grad_norm": 0.43680339251809014, "learning_rate": 2.8179733337030167e-06, "loss": 0.4039, "step": 21353 }, { "epoch": 0.6544685546156676, "grad_norm": 1.4912777109139896, "learning_rate": 2.817526782389696e-06, "loss": 0.7153, "step": 21354 }, { "epoch": 0.6544992031384087, "grad_norm": 1.252777255401718, "learning_rate": 2.8170802525808398e-06, "loss": 0.6066, "step": 21355 }, { "epoch": 0.65452985166115, "grad_norm": 1.4115115545635957, "learning_rate": 2.816633744280851e-06, "loss": 0.6172, "step": 21356 }, { "epoch": 0.6545605001838911, "grad_norm": 1.2582536736701584, "learning_rate": 2.8161872574941295e-06, "loss": 0.6054, "step": 21357 }, { "epoch": 0.6545911487066324, "grad_norm": 1.4469528927321902, "learning_rate": 2.8157407922250725e-06, "loss": 0.7089, "step": 21358 }, { "epoch": 0.6546217972293735, "grad_norm": 1.1876387154861119, "learning_rate": 2.8152943484780804e-06, "loss": 0.6524, "step": 21359 }, { "epoch": 0.6546524457521148, "grad_norm": 0.46627350353278335, "learning_rate": 2.8148479262575536e-06, "loss": 0.3963, "step": 21360 }, { "epoch": 0.6546830942748559, "grad_norm": 1.3303384417276258, "learning_rate": 2.81440152556789e-06, "loss": 0.6065, "step": 21361 }, { "epoch": 0.6547137427975972, "grad_norm": 1.3828718190956981, "learning_rate": 2.8139551464134827e-06, "loss": 0.6106, "step": 21362 }, { "epoch": 0.6547443913203383, "grad_norm": 1.3805761028393448, "learning_rate": 2.813508788798739e-06, "loss": 0.6187, "step": 21363 }, { "epoch": 0.6547750398430796, "grad_norm": 0.4415080498309762, "learning_rate": 2.81306245272805e-06, "loss": 0.4031, "step": 21364 }, { "epoch": 0.6548056883658208, "grad_norm": 1.427507801097907, "learning_rate": 2.812616138205819e-06, "loss": 0.6393, "step": 21365 }, { "epoch": 0.654836336888562, "grad_norm": 1.3918719652909224, "learning_rate": 2.812169845236439e-06, "loss": 0.4954, "step": 21366 }, { "epoch": 0.6548669854113032, "grad_norm": 1.236123651761646, "learning_rate": 2.8117235738243087e-06, "loss": 0.6648, "step": 21367 }, { "epoch": 0.6548976339340444, "grad_norm": 1.4349118728674064, "learning_rate": 2.811277323973828e-06, "loss": 0.5856, "step": 21368 }, { "epoch": 0.6549282824567856, "grad_norm": 1.3230492026048157, "learning_rate": 2.8108310956893896e-06, "loss": 0.5883, "step": 21369 }, { "epoch": 0.6549589309795267, "grad_norm": 1.3961853495809766, "learning_rate": 2.810384888975393e-06, "loss": 0.5594, "step": 21370 }, { "epoch": 0.654989579502268, "grad_norm": 1.2413490107720873, "learning_rate": 2.8099387038362357e-06, "loss": 0.6357, "step": 21371 }, { "epoch": 0.6550202280250091, "grad_norm": 1.3934505691968955, "learning_rate": 2.809492540276312e-06, "loss": 0.5786, "step": 21372 }, { "epoch": 0.6550508765477504, "grad_norm": 1.3893587768083053, "learning_rate": 2.809046398300019e-06, "loss": 0.6598, "step": 21373 }, { "epoch": 0.6550815250704916, "grad_norm": 1.3028270910063384, "learning_rate": 2.8086002779117515e-06, "loss": 0.6705, "step": 21374 }, { "epoch": 0.6551121735932328, "grad_norm": 1.3154378212307882, "learning_rate": 2.8081541791159063e-06, "loss": 0.6593, "step": 21375 }, { "epoch": 0.655142822115974, "grad_norm": 1.2150406147966681, "learning_rate": 2.8077081019168804e-06, "loss": 0.5877, "step": 21376 }, { "epoch": 0.6551734706387152, "grad_norm": 1.2051972751659852, "learning_rate": 2.807262046319066e-06, "loss": 0.6002, "step": 21377 }, { "epoch": 0.6552041191614564, "grad_norm": 1.350172351432005, "learning_rate": 2.806816012326859e-06, "loss": 0.5772, "step": 21378 }, { "epoch": 0.6552347676841976, "grad_norm": 1.333595014557314, "learning_rate": 2.806369999944657e-06, "loss": 0.7099, "step": 21379 }, { "epoch": 0.6552654162069388, "grad_norm": 1.2437312220233778, "learning_rate": 2.8059240091768514e-06, "loss": 0.5502, "step": 21380 }, { "epoch": 0.65529606472968, "grad_norm": 0.4794662694051413, "learning_rate": 2.805478040027837e-06, "loss": 0.3916, "step": 21381 }, { "epoch": 0.6553267132524212, "grad_norm": 1.5262926658735307, "learning_rate": 2.8050320925020112e-06, "loss": 0.5415, "step": 21382 }, { "epoch": 0.6553573617751625, "grad_norm": 1.1956345609683035, "learning_rate": 2.8045861666037645e-06, "loss": 0.5658, "step": 21383 }, { "epoch": 0.6553880102979036, "grad_norm": 1.3254149016333254, "learning_rate": 2.8041402623374936e-06, "loss": 0.5774, "step": 21384 }, { "epoch": 0.6554186588206449, "grad_norm": 1.3181397895202118, "learning_rate": 2.8036943797075884e-06, "loss": 0.6157, "step": 21385 }, { "epoch": 0.655449307343386, "grad_norm": 1.4443546442369928, "learning_rate": 2.8032485187184446e-06, "loss": 0.593, "step": 21386 }, { "epoch": 0.6554799558661273, "grad_norm": 1.1928737571061605, "learning_rate": 2.802802679374457e-06, "loss": 0.6172, "step": 21387 }, { "epoch": 0.6555106043888684, "grad_norm": 0.4595764392308428, "learning_rate": 2.8023568616800147e-06, "loss": 0.4165, "step": 21388 }, { "epoch": 0.6555412529116097, "grad_norm": 0.44312373115966136, "learning_rate": 2.8019110656395124e-06, "loss": 0.3755, "step": 21389 }, { "epoch": 0.6555719014343508, "grad_norm": 1.2114777076213568, "learning_rate": 2.8014652912573453e-06, "loss": 0.6045, "step": 21390 }, { "epoch": 0.6556025499570921, "grad_norm": 0.47261235684179576, "learning_rate": 2.8010195385379014e-06, "loss": 0.4064, "step": 21391 }, { "epoch": 0.6556331984798333, "grad_norm": 1.387445172942457, "learning_rate": 2.800573807485574e-06, "loss": 0.6164, "step": 21392 }, { "epoch": 0.6556638470025745, "grad_norm": 1.2165739117556016, "learning_rate": 2.8001280981047574e-06, "loss": 0.5809, "step": 21393 }, { "epoch": 0.6556944955253157, "grad_norm": 0.464339489963411, "learning_rate": 2.7996824103998398e-06, "loss": 0.3874, "step": 21394 }, { "epoch": 0.6557251440480569, "grad_norm": 2.7320818100724433, "learning_rate": 2.7992367443752167e-06, "loss": 0.5562, "step": 21395 }, { "epoch": 0.6557557925707981, "grad_norm": 1.2598775416542882, "learning_rate": 2.7987911000352752e-06, "loss": 0.5109, "step": 21396 }, { "epoch": 0.6557864410935393, "grad_norm": 1.2855035272192057, "learning_rate": 2.7983454773844078e-06, "loss": 0.6111, "step": 21397 }, { "epoch": 0.6558170896162805, "grad_norm": 1.3971609941450847, "learning_rate": 2.797899876427008e-06, "loss": 0.6526, "step": 21398 }, { "epoch": 0.6558477381390218, "grad_norm": 0.4379665992935255, "learning_rate": 2.7974542971674614e-06, "loss": 0.388, "step": 21399 }, { "epoch": 0.6558783866617629, "grad_norm": 1.394123624676303, "learning_rate": 2.797008739610162e-06, "loss": 0.6697, "step": 21400 }, { "epoch": 0.655909035184504, "grad_norm": 1.455608067210434, "learning_rate": 2.7965632037595002e-06, "loss": 0.6534, "step": 21401 }, { "epoch": 0.6559396837072453, "grad_norm": 1.1823780257576624, "learning_rate": 2.7961176896198637e-06, "loss": 0.5989, "step": 21402 }, { "epoch": 0.6559703322299865, "grad_norm": 1.242714868982392, "learning_rate": 2.7956721971956435e-06, "loss": 0.6271, "step": 21403 }, { "epoch": 0.6560009807527277, "grad_norm": 1.1856817040742158, "learning_rate": 2.7952267264912314e-06, "loss": 0.5791, "step": 21404 }, { "epoch": 0.6560316292754689, "grad_norm": 1.346945497799289, "learning_rate": 2.7947812775110117e-06, "loss": 0.7434, "step": 21405 }, { "epoch": 0.6560622777982101, "grad_norm": 1.263897265050809, "learning_rate": 2.7943358502593787e-06, "loss": 0.5751, "step": 21406 }, { "epoch": 0.6560929263209513, "grad_norm": 1.306236776784395, "learning_rate": 2.793890444740715e-06, "loss": 0.6033, "step": 21407 }, { "epoch": 0.6561235748436925, "grad_norm": 1.218627020036845, "learning_rate": 2.793445060959417e-06, "loss": 0.6226, "step": 21408 }, { "epoch": 0.6561542233664337, "grad_norm": 1.5364588476072885, "learning_rate": 2.7929996989198695e-06, "loss": 0.7483, "step": 21409 }, { "epoch": 0.656184871889175, "grad_norm": 1.3014867564372372, "learning_rate": 2.7925543586264588e-06, "loss": 0.6224, "step": 21410 }, { "epoch": 0.6562155204119161, "grad_norm": 1.3959394392346964, "learning_rate": 2.7921090400835747e-06, "loss": 0.6282, "step": 21411 }, { "epoch": 0.6562461689346574, "grad_norm": 1.5403012664273235, "learning_rate": 2.7916637432956066e-06, "loss": 0.6836, "step": 21412 }, { "epoch": 0.6562768174573985, "grad_norm": 1.2792380663067666, "learning_rate": 2.7912184682669396e-06, "loss": 0.579, "step": 21413 }, { "epoch": 0.6563074659801398, "grad_norm": 1.6645148624154797, "learning_rate": 2.7907732150019617e-06, "loss": 0.5245, "step": 21414 }, { "epoch": 0.6563381145028809, "grad_norm": 1.3609411912322096, "learning_rate": 2.790327983505062e-06, "loss": 0.7153, "step": 21415 }, { "epoch": 0.6563687630256222, "grad_norm": 1.2102838422822668, "learning_rate": 2.789882773780625e-06, "loss": 0.6132, "step": 21416 }, { "epoch": 0.6563994115483633, "grad_norm": 1.347188840279512, "learning_rate": 2.78943758583304e-06, "loss": 0.5809, "step": 21417 }, { "epoch": 0.6564300600711046, "grad_norm": 1.308855930470491, "learning_rate": 2.7889924196666908e-06, "loss": 0.622, "step": 21418 }, { "epoch": 0.6564607085938458, "grad_norm": 0.46187468271145526, "learning_rate": 2.788547275285964e-06, "loss": 0.4067, "step": 21419 }, { "epoch": 0.656491357116587, "grad_norm": 0.4552356387688223, "learning_rate": 2.788102152695249e-06, "loss": 0.4018, "step": 21420 }, { "epoch": 0.6565220056393282, "grad_norm": 1.316353333625018, "learning_rate": 2.787657051898928e-06, "loss": 0.6518, "step": 21421 }, { "epoch": 0.6565526541620694, "grad_norm": 1.4167919104927393, "learning_rate": 2.787211972901387e-06, "loss": 0.691, "step": 21422 }, { "epoch": 0.6565833026848106, "grad_norm": 1.259620613774919, "learning_rate": 2.7867669157070155e-06, "loss": 0.6031, "step": 21423 }, { "epoch": 0.6566139512075518, "grad_norm": 1.2209166531811526, "learning_rate": 2.7863218803201938e-06, "loss": 0.6314, "step": 21424 }, { "epoch": 0.656644599730293, "grad_norm": 1.456575427482189, "learning_rate": 2.7858768667453107e-06, "loss": 0.6967, "step": 21425 }, { "epoch": 0.6566752482530342, "grad_norm": 1.2352081445314491, "learning_rate": 2.7854318749867454e-06, "loss": 0.5747, "step": 21426 }, { "epoch": 0.6567058967757754, "grad_norm": 1.501918514215606, "learning_rate": 2.784986905048891e-06, "loss": 0.6255, "step": 21427 }, { "epoch": 0.6567365452985167, "grad_norm": 1.2626284978437397, "learning_rate": 2.7845419569361263e-06, "loss": 0.6167, "step": 21428 }, { "epoch": 0.6567671938212578, "grad_norm": 1.4784150836353152, "learning_rate": 2.784097030652835e-06, "loss": 0.6204, "step": 21429 }, { "epoch": 0.6567978423439991, "grad_norm": 1.3300567136262331, "learning_rate": 2.7836521262034034e-06, "loss": 0.5562, "step": 21430 }, { "epoch": 0.6568284908667402, "grad_norm": 0.4458628490193017, "learning_rate": 2.7832072435922154e-06, "loss": 0.394, "step": 21431 }, { "epoch": 0.6568591393894814, "grad_norm": 1.2092353112519223, "learning_rate": 2.7827623828236523e-06, "loss": 0.5981, "step": 21432 }, { "epoch": 0.6568897879122226, "grad_norm": 1.3375086507964438, "learning_rate": 2.7823175439020984e-06, "loss": 0.6463, "step": 21433 }, { "epoch": 0.6569204364349638, "grad_norm": 1.3119521788072266, "learning_rate": 2.781872726831939e-06, "loss": 0.5827, "step": 21434 }, { "epoch": 0.656951084957705, "grad_norm": 1.4017876621936012, "learning_rate": 2.781427931617554e-06, "loss": 0.6676, "step": 21435 }, { "epoch": 0.6569817334804462, "grad_norm": 1.3891674811881285, "learning_rate": 2.7809831582633284e-06, "loss": 0.6789, "step": 21436 }, { "epoch": 0.6570123820031875, "grad_norm": 1.17778587214195, "learning_rate": 2.7805384067736397e-06, "loss": 0.5945, "step": 21437 }, { "epoch": 0.6570430305259286, "grad_norm": 0.4539724540219999, "learning_rate": 2.780093677152878e-06, "loss": 0.4017, "step": 21438 }, { "epoch": 0.6570736790486699, "grad_norm": 1.2896084475686675, "learning_rate": 2.7796489694054214e-06, "loss": 0.517, "step": 21439 }, { "epoch": 0.657104327571411, "grad_norm": 1.478643511759593, "learning_rate": 2.7792042835356492e-06, "loss": 0.6474, "step": 21440 }, { "epoch": 0.6571349760941523, "grad_norm": 1.2467738273128992, "learning_rate": 2.778759619547946e-06, "loss": 0.5742, "step": 21441 }, { "epoch": 0.6571656246168934, "grad_norm": 1.178181111512142, "learning_rate": 2.7783149774466944e-06, "loss": 0.5595, "step": 21442 }, { "epoch": 0.6571962731396347, "grad_norm": 0.4402867406436125, "learning_rate": 2.7778703572362714e-06, "loss": 0.4144, "step": 21443 }, { "epoch": 0.6572269216623758, "grad_norm": 1.291852409620339, "learning_rate": 2.7774257589210606e-06, "loss": 0.6027, "step": 21444 }, { "epoch": 0.6572575701851171, "grad_norm": 1.1836798681061693, "learning_rate": 2.7769811825054427e-06, "loss": 0.5781, "step": 21445 }, { "epoch": 0.6572882187078583, "grad_norm": 1.3249876855037628, "learning_rate": 2.7765366279938e-06, "loss": 0.6615, "step": 21446 }, { "epoch": 0.6573188672305995, "grad_norm": 1.456041170721291, "learning_rate": 2.7760920953905104e-06, "loss": 0.7045, "step": 21447 }, { "epoch": 0.6573495157533407, "grad_norm": 0.4335887833724295, "learning_rate": 2.7756475846999503e-06, "loss": 0.3924, "step": 21448 }, { "epoch": 0.6573801642760819, "grad_norm": 1.276416413064927, "learning_rate": 2.775203095926508e-06, "loss": 0.6036, "step": 21449 }, { "epoch": 0.6574108127988231, "grad_norm": 1.3193765405289148, "learning_rate": 2.7747586290745586e-06, "loss": 0.5983, "step": 21450 }, { "epoch": 0.6574414613215643, "grad_norm": 1.2450590135503947, "learning_rate": 2.77431418414848e-06, "loss": 0.6955, "step": 21451 }, { "epoch": 0.6574721098443055, "grad_norm": 1.2372215169274376, "learning_rate": 2.7738697611526533e-06, "loss": 0.5715, "step": 21452 }, { "epoch": 0.6575027583670467, "grad_norm": 1.3979354359143652, "learning_rate": 2.773425360091457e-06, "loss": 0.7103, "step": 21453 }, { "epoch": 0.6575334068897879, "grad_norm": 1.1665735718592878, "learning_rate": 2.7729809809692734e-06, "loss": 0.6025, "step": 21454 }, { "epoch": 0.6575640554125292, "grad_norm": 0.46270695900991543, "learning_rate": 2.772536623790475e-06, "loss": 0.4209, "step": 21455 }, { "epoch": 0.6575947039352703, "grad_norm": 1.2075566245551017, "learning_rate": 2.7720922885594433e-06, "loss": 0.6926, "step": 21456 }, { "epoch": 0.6576253524580116, "grad_norm": 1.4150805085794989, "learning_rate": 2.771647975280558e-06, "loss": 0.582, "step": 21457 }, { "epoch": 0.6576560009807527, "grad_norm": 1.4506367943315175, "learning_rate": 2.7712036839581956e-06, "loss": 0.6232, "step": 21458 }, { "epoch": 0.657686649503494, "grad_norm": 1.456219268559671, "learning_rate": 2.77075941459673e-06, "loss": 0.6691, "step": 21459 }, { "epoch": 0.6577172980262351, "grad_norm": 0.47991237184346003, "learning_rate": 2.7703151672005457e-06, "loss": 0.4002, "step": 21460 }, { "epoch": 0.6577479465489764, "grad_norm": 1.3172963619356008, "learning_rate": 2.7698709417740165e-06, "loss": 0.6011, "step": 21461 }, { "epoch": 0.6577785950717175, "grad_norm": 1.1422632938126498, "learning_rate": 2.769426738321518e-06, "loss": 0.6379, "step": 21462 }, { "epoch": 0.6578092435944587, "grad_norm": 1.1802634154677527, "learning_rate": 2.768982556847429e-06, "loss": 0.64, "step": 21463 }, { "epoch": 0.6578398921172, "grad_norm": 1.4094114822695376, "learning_rate": 2.768538397356125e-06, "loss": 0.6578, "step": 21464 }, { "epoch": 0.6578705406399411, "grad_norm": 1.3361339085489696, "learning_rate": 2.768094259851985e-06, "loss": 0.6683, "step": 21465 }, { "epoch": 0.6579011891626824, "grad_norm": 1.4821748980593534, "learning_rate": 2.767650144339381e-06, "loss": 0.6191, "step": 21466 }, { "epoch": 0.6579318376854235, "grad_norm": 1.3091537219516536, "learning_rate": 2.7672060508226923e-06, "loss": 0.5856, "step": 21467 }, { "epoch": 0.6579624862081648, "grad_norm": 1.1772036226976967, "learning_rate": 2.766761979306295e-06, "loss": 0.5768, "step": 21468 }, { "epoch": 0.6579931347309059, "grad_norm": 1.1145110184440956, "learning_rate": 2.7663179297945637e-06, "loss": 0.5998, "step": 21469 }, { "epoch": 0.6580237832536472, "grad_norm": 1.1715865427299486, "learning_rate": 2.765873902291871e-06, "loss": 0.5541, "step": 21470 }, { "epoch": 0.6580544317763883, "grad_norm": 1.3719109156891887, "learning_rate": 2.765429896802595e-06, "loss": 0.5757, "step": 21471 }, { "epoch": 0.6580850802991296, "grad_norm": 1.4212908315970996, "learning_rate": 2.7649859133311092e-06, "loss": 0.6383, "step": 21472 }, { "epoch": 0.6581157288218707, "grad_norm": 0.47560617256757176, "learning_rate": 2.764541951881791e-06, "loss": 0.4105, "step": 21473 }, { "epoch": 0.658146377344612, "grad_norm": 1.5035763758718208, "learning_rate": 2.7640980124590113e-06, "loss": 0.6617, "step": 21474 }, { "epoch": 0.6581770258673532, "grad_norm": 0.4660434275510502, "learning_rate": 2.7636540950671463e-06, "loss": 0.4214, "step": 21475 }, { "epoch": 0.6582076743900944, "grad_norm": 1.270828293480906, "learning_rate": 2.7632101997105708e-06, "loss": 0.5997, "step": 21476 }, { "epoch": 0.6582383229128356, "grad_norm": 1.3232364221494142, "learning_rate": 2.7627663263936582e-06, "loss": 0.6046, "step": 21477 }, { "epoch": 0.6582689714355768, "grad_norm": 1.2218768685273236, "learning_rate": 2.7623224751207773e-06, "loss": 0.6751, "step": 21478 }, { "epoch": 0.658299619958318, "grad_norm": 1.3084230037830007, "learning_rate": 2.7618786458963096e-06, "loss": 0.59, "step": 21479 }, { "epoch": 0.6583302684810592, "grad_norm": 1.372430084929893, "learning_rate": 2.761434838724622e-06, "loss": 0.6266, "step": 21480 }, { "epoch": 0.6583609170038004, "grad_norm": 1.27498258282089, "learning_rate": 2.760991053610092e-06, "loss": 0.6402, "step": 21481 }, { "epoch": 0.6583915655265417, "grad_norm": 1.4906280785578112, "learning_rate": 2.7605472905570875e-06, "loss": 0.647, "step": 21482 }, { "epoch": 0.6584222140492828, "grad_norm": 1.3654132338933478, "learning_rate": 2.7601035495699843e-06, "loss": 0.6545, "step": 21483 }, { "epoch": 0.6584528625720241, "grad_norm": 1.3817336300700838, "learning_rate": 2.7596598306531554e-06, "loss": 0.5955, "step": 21484 }, { "epoch": 0.6584835110947652, "grad_norm": 1.2266591559612126, "learning_rate": 2.75921613381097e-06, "loss": 0.6028, "step": 21485 }, { "epoch": 0.6585141596175065, "grad_norm": 1.4111835364351906, "learning_rate": 2.7587724590478005e-06, "loss": 0.6094, "step": 21486 }, { "epoch": 0.6585448081402476, "grad_norm": 1.3037736579903052, "learning_rate": 2.7583288063680214e-06, "loss": 0.578, "step": 21487 }, { "epoch": 0.6585754566629889, "grad_norm": 1.221589384166613, "learning_rate": 2.757885175776003e-06, "loss": 0.5826, "step": 21488 }, { "epoch": 0.65860610518573, "grad_norm": 1.2249094123756792, "learning_rate": 2.7574415672761113e-06, "loss": 0.619, "step": 21489 }, { "epoch": 0.6586367537084713, "grad_norm": 1.2728202588764015, "learning_rate": 2.7569979808727255e-06, "loss": 0.5468, "step": 21490 }, { "epoch": 0.6586674022312125, "grad_norm": 1.302836310650354, "learning_rate": 2.75655441657021e-06, "loss": 0.535, "step": 21491 }, { "epoch": 0.6586980507539537, "grad_norm": 0.4652635242706178, "learning_rate": 2.75611087437294e-06, "loss": 0.3963, "step": 21492 }, { "epoch": 0.6587286992766949, "grad_norm": 1.6091990033405517, "learning_rate": 2.7556673542852825e-06, "loss": 0.6454, "step": 21493 }, { "epoch": 0.658759347799436, "grad_norm": 1.329919377973116, "learning_rate": 2.7552238563116086e-06, "loss": 0.6831, "step": 21494 }, { "epoch": 0.6587899963221773, "grad_norm": 1.3301372857319342, "learning_rate": 2.75478038045629e-06, "loss": 0.7015, "step": 21495 }, { "epoch": 0.6588206448449184, "grad_norm": 1.2685767776765553, "learning_rate": 2.754336926723693e-06, "loss": 0.6128, "step": 21496 }, { "epoch": 0.6588512933676597, "grad_norm": 1.250150946275294, "learning_rate": 2.7538934951181884e-06, "loss": 0.6917, "step": 21497 }, { "epoch": 0.6588819418904008, "grad_norm": 1.244357463017689, "learning_rate": 2.7534500856441483e-06, "loss": 0.613, "step": 21498 }, { "epoch": 0.6589125904131421, "grad_norm": 1.2661487356188317, "learning_rate": 2.7530066983059365e-06, "loss": 0.7005, "step": 21499 }, { "epoch": 0.6589432389358832, "grad_norm": 1.1180603894806103, "learning_rate": 2.752563333107926e-06, "loss": 0.6374, "step": 21500 }, { "epoch": 0.6589738874586245, "grad_norm": 1.2499596864507834, "learning_rate": 2.7521199900544847e-06, "loss": 0.6341, "step": 21501 }, { "epoch": 0.6590045359813657, "grad_norm": 1.3377790341439801, "learning_rate": 2.7516766691499797e-06, "loss": 0.6044, "step": 21502 }, { "epoch": 0.6590351845041069, "grad_norm": 0.4338074621893261, "learning_rate": 2.7512333703987803e-06, "loss": 0.4243, "step": 21503 }, { "epoch": 0.6590658330268481, "grad_norm": 0.4524788190812765, "learning_rate": 2.750790093805253e-06, "loss": 0.4004, "step": 21504 }, { "epoch": 0.6590964815495893, "grad_norm": 1.1982076559046746, "learning_rate": 2.750346839373766e-06, "loss": 0.5731, "step": 21505 }, { "epoch": 0.6591271300723305, "grad_norm": 1.316673520076996, "learning_rate": 2.7499036071086893e-06, "loss": 0.6284, "step": 21506 }, { "epoch": 0.6591577785950717, "grad_norm": 1.2388878947363795, "learning_rate": 2.749460397014385e-06, "loss": 0.5192, "step": 21507 }, { "epoch": 0.6591884271178129, "grad_norm": 1.1272028885992937, "learning_rate": 2.749017209095225e-06, "loss": 0.5707, "step": 21508 }, { "epoch": 0.6592190756405542, "grad_norm": 1.4358079560222525, "learning_rate": 2.7485740433555753e-06, "loss": 0.5293, "step": 21509 }, { "epoch": 0.6592497241632953, "grad_norm": 1.4092175187273481, "learning_rate": 2.7481308997998e-06, "loss": 0.5866, "step": 21510 }, { "epoch": 0.6592803726860366, "grad_norm": 0.4460941999199262, "learning_rate": 2.7476877784322662e-06, "loss": 0.3971, "step": 21511 }, { "epoch": 0.6593110212087777, "grad_norm": 1.436630220831141, "learning_rate": 2.7472446792573435e-06, "loss": 0.68, "step": 21512 }, { "epoch": 0.659341669731519, "grad_norm": 1.4332886262766722, "learning_rate": 2.746801602279394e-06, "loss": 0.5185, "step": 21513 }, { "epoch": 0.6593723182542601, "grad_norm": 1.1506973730940642, "learning_rate": 2.7463585475027866e-06, "loss": 0.5492, "step": 21514 }, { "epoch": 0.6594029667770014, "grad_norm": 1.4620597415558056, "learning_rate": 2.7459155149318828e-06, "loss": 0.5937, "step": 21515 }, { "epoch": 0.6594336152997425, "grad_norm": 1.47400263140002, "learning_rate": 2.74547250457105e-06, "loss": 0.6238, "step": 21516 }, { "epoch": 0.6594642638224838, "grad_norm": 1.1678173884173997, "learning_rate": 2.7450295164246556e-06, "loss": 0.5677, "step": 21517 }, { "epoch": 0.659494912345225, "grad_norm": 1.3438235648236194, "learning_rate": 2.74458655049706e-06, "loss": 0.5649, "step": 21518 }, { "epoch": 0.6595255608679662, "grad_norm": 1.3296995345700011, "learning_rate": 2.7441436067926307e-06, "loss": 0.6489, "step": 21519 }, { "epoch": 0.6595562093907074, "grad_norm": 1.1488507554844474, "learning_rate": 2.743700685315734e-06, "loss": 0.5863, "step": 21520 }, { "epoch": 0.6595868579134486, "grad_norm": 1.13151341102455, "learning_rate": 2.743257786070729e-06, "loss": 0.5557, "step": 21521 }, { "epoch": 0.6596175064361898, "grad_norm": 0.4646858326131109, "learning_rate": 2.742814909061985e-06, "loss": 0.4242, "step": 21522 }, { "epoch": 0.659648154958931, "grad_norm": 1.2550025055119638, "learning_rate": 2.74237205429386e-06, "loss": 0.6323, "step": 21523 }, { "epoch": 0.6596788034816722, "grad_norm": 1.1544903382657339, "learning_rate": 2.741929221770723e-06, "loss": 0.5594, "step": 21524 }, { "epoch": 0.6597094520044133, "grad_norm": 1.337755524364742, "learning_rate": 2.7414864114969355e-06, "loss": 0.6668, "step": 21525 }, { "epoch": 0.6597401005271546, "grad_norm": 1.3174963911578228, "learning_rate": 2.7410436234768584e-06, "loss": 0.629, "step": 21526 }, { "epoch": 0.6597707490498957, "grad_norm": 1.251481920097303, "learning_rate": 2.740600857714857e-06, "loss": 0.5748, "step": 21527 }, { "epoch": 0.659801397572637, "grad_norm": 1.2493463194813383, "learning_rate": 2.7401581142152945e-06, "loss": 0.6465, "step": 21528 }, { "epoch": 0.6598320460953782, "grad_norm": 1.4233584085911748, "learning_rate": 2.7397153929825317e-06, "loss": 0.6184, "step": 21529 }, { "epoch": 0.6598626946181194, "grad_norm": 1.4071849621229173, "learning_rate": 2.73927269402093e-06, "loss": 0.5916, "step": 21530 }, { "epoch": 0.6598933431408606, "grad_norm": 1.2406661215472874, "learning_rate": 2.7388300173348557e-06, "loss": 0.5595, "step": 21531 }, { "epoch": 0.6599239916636018, "grad_norm": 1.09199346229204, "learning_rate": 2.7383873629286658e-06, "loss": 0.5971, "step": 21532 }, { "epoch": 0.659954640186343, "grad_norm": 1.3875499211895514, "learning_rate": 2.737944730806725e-06, "loss": 0.5569, "step": 21533 }, { "epoch": 0.6599852887090842, "grad_norm": 1.259634978781929, "learning_rate": 2.737502120973391e-06, "loss": 0.6443, "step": 21534 }, { "epoch": 0.6600159372318254, "grad_norm": 1.2945845355138206, "learning_rate": 2.737059533433031e-06, "loss": 0.705, "step": 21535 }, { "epoch": 0.6600465857545667, "grad_norm": 1.4148234017379135, "learning_rate": 2.7366169681900013e-06, "loss": 0.6782, "step": 21536 }, { "epoch": 0.6600772342773078, "grad_norm": 1.1894887726966257, "learning_rate": 2.7361744252486626e-06, "loss": 0.629, "step": 21537 }, { "epoch": 0.6601078828000491, "grad_norm": 1.4762597866984326, "learning_rate": 2.735731904613377e-06, "loss": 0.6294, "step": 21538 }, { "epoch": 0.6601385313227902, "grad_norm": 0.450580769821319, "learning_rate": 2.735289406288505e-06, "loss": 0.4002, "step": 21539 }, { "epoch": 0.6601691798455315, "grad_norm": 0.4477534279481156, "learning_rate": 2.734846930278405e-06, "loss": 0.4026, "step": 21540 }, { "epoch": 0.6601998283682726, "grad_norm": 1.218412670327911, "learning_rate": 2.734404476587438e-06, "loss": 0.5937, "step": 21541 }, { "epoch": 0.6602304768910139, "grad_norm": 0.45816371451321036, "learning_rate": 2.7339620452199646e-06, "loss": 0.3847, "step": 21542 }, { "epoch": 0.660261125413755, "grad_norm": 1.3598578516324629, "learning_rate": 2.7335196361803408e-06, "loss": 0.5742, "step": 21543 }, { "epoch": 0.6602917739364963, "grad_norm": 1.3464012917377959, "learning_rate": 2.7330772494729304e-06, "loss": 0.6751, "step": 21544 }, { "epoch": 0.6603224224592374, "grad_norm": 1.2337424662606544, "learning_rate": 2.732634885102086e-06, "loss": 0.6263, "step": 21545 }, { "epoch": 0.6603530709819787, "grad_norm": 1.297071150346236, "learning_rate": 2.732192543072174e-06, "loss": 0.5544, "step": 21546 }, { "epoch": 0.6603837195047199, "grad_norm": 1.2231231848157798, "learning_rate": 2.7317502233875487e-06, "loss": 0.5928, "step": 21547 }, { "epoch": 0.6604143680274611, "grad_norm": 1.328576739883212, "learning_rate": 2.731307926052568e-06, "loss": 0.6453, "step": 21548 }, { "epoch": 0.6604450165502023, "grad_norm": 1.287042223937104, "learning_rate": 2.730865651071589e-06, "loss": 0.6149, "step": 21549 }, { "epoch": 0.6604756650729435, "grad_norm": 1.205854351414827, "learning_rate": 2.7304233984489746e-06, "loss": 0.5948, "step": 21550 }, { "epoch": 0.6605063135956847, "grad_norm": 1.211586695004074, "learning_rate": 2.7299811681890764e-06, "loss": 0.6052, "step": 21551 }, { "epoch": 0.6605369621184259, "grad_norm": 1.4762156123699628, "learning_rate": 2.729538960296255e-06, "loss": 0.5631, "step": 21552 }, { "epoch": 0.6605676106411671, "grad_norm": 1.204753496114472, "learning_rate": 2.7290967747748676e-06, "loss": 0.6204, "step": 21553 }, { "epoch": 0.6605982591639084, "grad_norm": 0.48162138969674184, "learning_rate": 2.7286546116292722e-06, "loss": 0.4279, "step": 21554 }, { "epoch": 0.6606289076866495, "grad_norm": 1.3790680101067674, "learning_rate": 2.7282124708638242e-06, "loss": 0.6817, "step": 21555 }, { "epoch": 0.6606595562093907, "grad_norm": 1.529114857370493, "learning_rate": 2.7277703524828757e-06, "loss": 0.5908, "step": 21556 }, { "epoch": 0.6606902047321319, "grad_norm": 1.2874418969961297, "learning_rate": 2.7273282564907918e-06, "loss": 0.6845, "step": 21557 }, { "epoch": 0.6607208532548731, "grad_norm": 0.4637640401629738, "learning_rate": 2.7268861828919237e-06, "loss": 0.396, "step": 21558 }, { "epoch": 0.6607515017776143, "grad_norm": 1.1657832374399644, "learning_rate": 2.7264441316906253e-06, "loss": 0.5895, "step": 21559 }, { "epoch": 0.6607821503003555, "grad_norm": 1.261073914042251, "learning_rate": 2.7260021028912553e-06, "loss": 0.6569, "step": 21560 }, { "epoch": 0.6608127988230967, "grad_norm": 1.2070677449205143, "learning_rate": 2.7255600964981683e-06, "loss": 0.5731, "step": 21561 }, { "epoch": 0.6608434473458379, "grad_norm": 1.4123445430083097, "learning_rate": 2.725118112515721e-06, "loss": 0.6467, "step": 21562 }, { "epoch": 0.6608740958685791, "grad_norm": 1.3627165284425482, "learning_rate": 2.7246761509482657e-06, "loss": 0.6397, "step": 21563 }, { "epoch": 0.6609047443913203, "grad_norm": 1.2503239206043397, "learning_rate": 2.7242342118001584e-06, "loss": 0.5406, "step": 21564 }, { "epoch": 0.6609353929140616, "grad_norm": 1.3325369035559216, "learning_rate": 2.7237922950757554e-06, "loss": 0.6172, "step": 21565 }, { "epoch": 0.6609660414368027, "grad_norm": 1.2897415082792965, "learning_rate": 2.7233504007794093e-06, "loss": 0.6537, "step": 21566 }, { "epoch": 0.660996689959544, "grad_norm": 1.3149083179400567, "learning_rate": 2.722908528915472e-06, "loss": 0.6327, "step": 21567 }, { "epoch": 0.6610273384822851, "grad_norm": 0.4563132534609598, "learning_rate": 2.7224666794883002e-06, "loss": 0.3831, "step": 21568 }, { "epoch": 0.6610579870050264, "grad_norm": 1.2849355267032707, "learning_rate": 2.7220248525022485e-06, "loss": 0.5596, "step": 21569 }, { "epoch": 0.6610886355277675, "grad_norm": 1.3198964166660092, "learning_rate": 2.721583047961667e-06, "loss": 0.6031, "step": 21570 }, { "epoch": 0.6611192840505088, "grad_norm": 1.3383885715988173, "learning_rate": 2.72114126587091e-06, "loss": 0.6218, "step": 21571 }, { "epoch": 0.6611499325732499, "grad_norm": 0.9935346180051162, "learning_rate": 2.7206995062343323e-06, "loss": 0.5426, "step": 21572 }, { "epoch": 0.6611805810959912, "grad_norm": 0.44843667861610864, "learning_rate": 2.720257769056287e-06, "loss": 0.4189, "step": 21573 }, { "epoch": 0.6612112296187324, "grad_norm": 1.2703633831716146, "learning_rate": 2.719816054341125e-06, "loss": 0.6136, "step": 21574 }, { "epoch": 0.6612418781414736, "grad_norm": 1.3045664606988716, "learning_rate": 2.719374362093195e-06, "loss": 0.6211, "step": 21575 }, { "epoch": 0.6612725266642148, "grad_norm": 1.2758185452313602, "learning_rate": 2.7189326923168567e-06, "loss": 0.6283, "step": 21576 }, { "epoch": 0.661303175186956, "grad_norm": 1.2021626565789731, "learning_rate": 2.7184910450164586e-06, "loss": 0.6091, "step": 21577 }, { "epoch": 0.6613338237096972, "grad_norm": 1.2942902794008038, "learning_rate": 2.7180494201963505e-06, "loss": 0.6215, "step": 21578 }, { "epoch": 0.6613644722324384, "grad_norm": 1.4641509752919952, "learning_rate": 2.7176078178608844e-06, "loss": 0.6211, "step": 21579 }, { "epoch": 0.6613951207551796, "grad_norm": 1.2107566102872933, "learning_rate": 2.7171662380144124e-06, "loss": 0.6025, "step": 21580 }, { "epoch": 0.6614257692779208, "grad_norm": 1.216866363789544, "learning_rate": 2.716724680661288e-06, "loss": 0.5227, "step": 21581 }, { "epoch": 0.661456417800662, "grad_norm": 1.2085038192762405, "learning_rate": 2.7162831458058573e-06, "loss": 0.6041, "step": 21582 }, { "epoch": 0.6614870663234033, "grad_norm": 1.2720257163286703, "learning_rate": 2.7158416334524728e-06, "loss": 0.6165, "step": 21583 }, { "epoch": 0.6615177148461444, "grad_norm": 1.1470302831646464, "learning_rate": 2.7154001436054876e-06, "loss": 0.589, "step": 21584 }, { "epoch": 0.6615483633688857, "grad_norm": 1.3033784953663126, "learning_rate": 2.714958676269249e-06, "loss": 0.6351, "step": 21585 }, { "epoch": 0.6615790118916268, "grad_norm": 1.2457323578547863, "learning_rate": 2.7145172314481037e-06, "loss": 0.5241, "step": 21586 }, { "epoch": 0.661609660414368, "grad_norm": 1.2366625444550166, "learning_rate": 2.714075809146409e-06, "loss": 0.5529, "step": 21587 }, { "epoch": 0.6616403089371092, "grad_norm": 1.4942400769683257, "learning_rate": 2.7136344093685075e-06, "loss": 0.6936, "step": 21588 }, { "epoch": 0.6616709574598504, "grad_norm": 0.44645481380185115, "learning_rate": 2.713193032118754e-06, "loss": 0.4141, "step": 21589 }, { "epoch": 0.6617016059825916, "grad_norm": 1.3901889084990042, "learning_rate": 2.7127516774014915e-06, "loss": 0.6761, "step": 21590 }, { "epoch": 0.6617322545053328, "grad_norm": 1.2199202907305242, "learning_rate": 2.712310345221073e-06, "loss": 0.5612, "step": 21591 }, { "epoch": 0.6617629030280741, "grad_norm": 1.4079002891838148, "learning_rate": 2.711869035581848e-06, "loss": 0.5934, "step": 21592 }, { "epoch": 0.6617935515508152, "grad_norm": 1.255538889810124, "learning_rate": 2.71142774848816e-06, "loss": 0.6205, "step": 21593 }, { "epoch": 0.6618242000735565, "grad_norm": 1.2575209270774261, "learning_rate": 2.71098648394436e-06, "loss": 0.587, "step": 21594 }, { "epoch": 0.6618548485962976, "grad_norm": 1.2518363786793651, "learning_rate": 2.7105452419547982e-06, "loss": 0.6014, "step": 21595 }, { "epoch": 0.6618854971190389, "grad_norm": 1.2967311649967288, "learning_rate": 2.7101040225238205e-06, "loss": 0.6395, "step": 21596 }, { "epoch": 0.66191614564178, "grad_norm": 1.2794426869115507, "learning_rate": 2.709662825655769e-06, "loss": 0.6537, "step": 21597 }, { "epoch": 0.6619467941645213, "grad_norm": 1.387978684453758, "learning_rate": 2.7092216513549997e-06, "loss": 0.6771, "step": 21598 }, { "epoch": 0.6619774426872624, "grad_norm": 1.2861435312365848, "learning_rate": 2.708780499625854e-06, "loss": 0.5929, "step": 21599 }, { "epoch": 0.6620080912100037, "grad_norm": 1.3648770364430027, "learning_rate": 2.7083393704726824e-06, "loss": 0.6265, "step": 21600 }, { "epoch": 0.6620387397327449, "grad_norm": 1.2953534519763892, "learning_rate": 2.7078982638998265e-06, "loss": 0.5635, "step": 21601 }, { "epoch": 0.6620693882554861, "grad_norm": 1.4203535074484754, "learning_rate": 2.7074571799116354e-06, "loss": 0.6554, "step": 21602 }, { "epoch": 0.6621000367782273, "grad_norm": 1.3543794608782111, "learning_rate": 2.7070161185124582e-06, "loss": 0.5468, "step": 21603 }, { "epoch": 0.6621306853009685, "grad_norm": 1.2855687368489916, "learning_rate": 2.706575079706636e-06, "loss": 0.5314, "step": 21604 }, { "epoch": 0.6621613338237097, "grad_norm": 1.3929546189611268, "learning_rate": 2.7061340634985155e-06, "loss": 0.6248, "step": 21605 }, { "epoch": 0.6621919823464509, "grad_norm": 1.3494016081376554, "learning_rate": 2.7056930698924457e-06, "loss": 0.6144, "step": 21606 }, { "epoch": 0.6622226308691921, "grad_norm": 1.2520657469259697, "learning_rate": 2.7052520988927666e-06, "loss": 0.6516, "step": 21607 }, { "epoch": 0.6622532793919333, "grad_norm": 1.4321892649034635, "learning_rate": 2.7048111505038253e-06, "loss": 0.6308, "step": 21608 }, { "epoch": 0.6622839279146745, "grad_norm": 1.3857676457957528, "learning_rate": 2.7043702247299695e-06, "loss": 0.6099, "step": 21609 }, { "epoch": 0.6623145764374158, "grad_norm": 1.2843880377475267, "learning_rate": 2.703929321575539e-06, "loss": 0.6193, "step": 21610 }, { "epoch": 0.6623452249601569, "grad_norm": 1.3023860929977458, "learning_rate": 2.703488441044883e-06, "loss": 0.6552, "step": 21611 }, { "epoch": 0.6623758734828982, "grad_norm": 1.2471674987219212, "learning_rate": 2.7030475831423406e-06, "loss": 0.5911, "step": 21612 }, { "epoch": 0.6624065220056393, "grad_norm": 1.555164876433541, "learning_rate": 2.702606747872258e-06, "loss": 0.7147, "step": 21613 }, { "epoch": 0.6624371705283806, "grad_norm": 1.1996400187024359, "learning_rate": 2.7021659352389814e-06, "loss": 0.5461, "step": 21614 }, { "epoch": 0.6624678190511217, "grad_norm": 1.3963666731978372, "learning_rate": 2.701725145246849e-06, "loss": 0.6443, "step": 21615 }, { "epoch": 0.662498467573863, "grad_norm": 1.3411472939874072, "learning_rate": 2.7012843779002074e-06, "loss": 0.5922, "step": 21616 }, { "epoch": 0.6625291160966041, "grad_norm": 1.281246838317989, "learning_rate": 2.7008436332034004e-06, "loss": 0.6589, "step": 21617 }, { "epoch": 0.6625597646193453, "grad_norm": 1.406574647646681, "learning_rate": 2.700402911160768e-06, "loss": 0.61, "step": 21618 }, { "epoch": 0.6625904131420866, "grad_norm": 1.3382168788655637, "learning_rate": 2.6999622117766553e-06, "loss": 0.5561, "step": 21619 }, { "epoch": 0.6626210616648277, "grad_norm": 1.1202617687962917, "learning_rate": 2.6995215350554015e-06, "loss": 0.6297, "step": 21620 }, { "epoch": 0.662651710187569, "grad_norm": 0.45846655614118165, "learning_rate": 2.699080881001351e-06, "loss": 0.423, "step": 21621 }, { "epoch": 0.6626823587103101, "grad_norm": 1.4376569496832385, "learning_rate": 2.698640249618848e-06, "loss": 0.5636, "step": 21622 }, { "epoch": 0.6627130072330514, "grad_norm": 1.3579211688424595, "learning_rate": 2.6981996409122285e-06, "loss": 0.6056, "step": 21623 }, { "epoch": 0.6627436557557925, "grad_norm": 1.3977036530540607, "learning_rate": 2.697759054885837e-06, "loss": 0.6971, "step": 21624 }, { "epoch": 0.6627743042785338, "grad_norm": 1.2387637424403626, "learning_rate": 2.6973184915440165e-06, "loss": 0.5497, "step": 21625 }, { "epoch": 0.6628049528012749, "grad_norm": 0.45522007874714787, "learning_rate": 2.6968779508911047e-06, "loss": 0.4044, "step": 21626 }, { "epoch": 0.6628356013240162, "grad_norm": 1.243925894969965, "learning_rate": 2.696437432931443e-06, "loss": 0.5672, "step": 21627 }, { "epoch": 0.6628662498467573, "grad_norm": 1.4321928774895254, "learning_rate": 2.695996937669375e-06, "loss": 0.6028, "step": 21628 }, { "epoch": 0.6628968983694986, "grad_norm": 0.45245218613797883, "learning_rate": 2.6955564651092368e-06, "loss": 0.3869, "step": 21629 }, { "epoch": 0.6629275468922398, "grad_norm": 0.4392231931281614, "learning_rate": 2.6951160152553724e-06, "loss": 0.3965, "step": 21630 }, { "epoch": 0.662958195414981, "grad_norm": 1.42211718749936, "learning_rate": 2.694675588112117e-06, "loss": 0.6547, "step": 21631 }, { "epoch": 0.6629888439377222, "grad_norm": 1.3542618117581493, "learning_rate": 2.6942351836838133e-06, "loss": 0.6436, "step": 21632 }, { "epoch": 0.6630194924604634, "grad_norm": 1.3595714294709944, "learning_rate": 2.6937948019748024e-06, "loss": 0.641, "step": 21633 }, { "epoch": 0.6630501409832046, "grad_norm": 0.46033665501330245, "learning_rate": 2.6933544429894192e-06, "loss": 0.3964, "step": 21634 }, { "epoch": 0.6630807895059458, "grad_norm": 1.260097675042412, "learning_rate": 2.6929141067320052e-06, "loss": 0.5963, "step": 21635 }, { "epoch": 0.663111438028687, "grad_norm": 1.2690302590409346, "learning_rate": 2.6924737932069003e-06, "loss": 0.6229, "step": 21636 }, { "epoch": 0.6631420865514283, "grad_norm": 1.316679429359343, "learning_rate": 2.6920335024184398e-06, "loss": 0.646, "step": 21637 }, { "epoch": 0.6631727350741694, "grad_norm": 1.3122088406668218, "learning_rate": 2.691593234370964e-06, "loss": 0.6165, "step": 21638 }, { "epoch": 0.6632033835969107, "grad_norm": 1.226429832122343, "learning_rate": 2.691152989068812e-06, "loss": 0.6702, "step": 21639 }, { "epoch": 0.6632340321196518, "grad_norm": 1.3080798681595898, "learning_rate": 2.690712766516319e-06, "loss": 0.554, "step": 21640 }, { "epoch": 0.6632646806423931, "grad_norm": 0.46421034779595055, "learning_rate": 2.6902725667178254e-06, "loss": 0.4196, "step": 21641 }, { "epoch": 0.6632953291651342, "grad_norm": 1.4824308863537121, "learning_rate": 2.689832389677666e-06, "loss": 0.6757, "step": 21642 }, { "epoch": 0.6633259776878755, "grad_norm": 1.4161432933754126, "learning_rate": 2.6893922354001777e-06, "loss": 0.6001, "step": 21643 }, { "epoch": 0.6633566262106166, "grad_norm": 1.5380722384616703, "learning_rate": 2.6889521038897022e-06, "loss": 0.6363, "step": 21644 }, { "epoch": 0.6633872747333579, "grad_norm": 1.372109790796926, "learning_rate": 2.68851199515057e-06, "loss": 0.6008, "step": 21645 }, { "epoch": 0.663417923256099, "grad_norm": 1.408804380533745, "learning_rate": 2.6880719091871212e-06, "loss": 0.7179, "step": 21646 }, { "epoch": 0.6634485717788403, "grad_norm": 0.45476415421100636, "learning_rate": 2.687631846003693e-06, "loss": 0.4273, "step": 21647 }, { "epoch": 0.6634792203015815, "grad_norm": 1.331572079232132, "learning_rate": 2.6871918056046186e-06, "loss": 0.5818, "step": 21648 }, { "epoch": 0.6635098688243226, "grad_norm": 1.3736533679030536, "learning_rate": 2.6867517879942345e-06, "loss": 0.6295, "step": 21649 }, { "epoch": 0.6635405173470639, "grad_norm": 1.2750602318318935, "learning_rate": 2.686311793176879e-06, "loss": 0.5956, "step": 21650 }, { "epoch": 0.663571165869805, "grad_norm": 1.212609169896789, "learning_rate": 2.6858718211568834e-06, "loss": 0.6223, "step": 21651 }, { "epoch": 0.6636018143925463, "grad_norm": 1.2904791303804828, "learning_rate": 2.685431871938587e-06, "loss": 0.5802, "step": 21652 }, { "epoch": 0.6636324629152874, "grad_norm": 1.3055769342409071, "learning_rate": 2.6849919455263183e-06, "loss": 0.5809, "step": 21653 }, { "epoch": 0.6636631114380287, "grad_norm": 1.3842712057150959, "learning_rate": 2.684552041924421e-06, "loss": 0.5779, "step": 21654 }, { "epoch": 0.6636937599607698, "grad_norm": 1.3525073172759658, "learning_rate": 2.6841121611372234e-06, "loss": 0.624, "step": 21655 }, { "epoch": 0.6637244084835111, "grad_norm": 0.46535317372940743, "learning_rate": 2.6836723031690604e-06, "loss": 0.3946, "step": 21656 }, { "epoch": 0.6637550570062523, "grad_norm": 1.403674496779334, "learning_rate": 2.6832324680242667e-06, "loss": 0.6363, "step": 21657 }, { "epoch": 0.6637857055289935, "grad_norm": 1.3375677962607717, "learning_rate": 2.682792655707178e-06, "loss": 0.5727, "step": 21658 }, { "epoch": 0.6638163540517347, "grad_norm": 1.2650118430558472, "learning_rate": 2.6823528662221245e-06, "loss": 0.5622, "step": 21659 }, { "epoch": 0.6638470025744759, "grad_norm": 1.386152110171202, "learning_rate": 2.681913099573441e-06, "loss": 0.6623, "step": 21660 }, { "epoch": 0.6638776510972171, "grad_norm": 1.3858110679586848, "learning_rate": 2.6814733557654604e-06, "loss": 0.5667, "step": 21661 }, { "epoch": 0.6639082996199583, "grad_norm": 0.41434693957422225, "learning_rate": 2.6810336348025185e-06, "loss": 0.3749, "step": 21662 }, { "epoch": 0.6639389481426995, "grad_norm": 1.2440737031034232, "learning_rate": 2.6805939366889455e-06, "loss": 0.6716, "step": 21663 }, { "epoch": 0.6639695966654408, "grad_norm": 1.387909011521165, "learning_rate": 2.680154261429072e-06, "loss": 0.5791, "step": 21664 }, { "epoch": 0.6640002451881819, "grad_norm": 1.230959249461221, "learning_rate": 2.679714609027232e-06, "loss": 0.5948, "step": 21665 }, { "epoch": 0.6640308937109232, "grad_norm": 0.46048419699306803, "learning_rate": 2.679274979487759e-06, "loss": 0.3966, "step": 21666 }, { "epoch": 0.6640615422336643, "grad_norm": 0.47598813973171283, "learning_rate": 2.6788353728149826e-06, "loss": 0.4011, "step": 21667 }, { "epoch": 0.6640921907564056, "grad_norm": 1.2014434272734384, "learning_rate": 2.6783957890132344e-06, "loss": 0.6265, "step": 21668 }, { "epoch": 0.6641228392791467, "grad_norm": 1.3634857784111867, "learning_rate": 2.677956228086849e-06, "loss": 0.6509, "step": 21669 }, { "epoch": 0.664153487801888, "grad_norm": 0.46353511149864957, "learning_rate": 2.6775166900401527e-06, "loss": 0.4043, "step": 21670 }, { "epoch": 0.6641841363246291, "grad_norm": 1.328707861010682, "learning_rate": 2.6770771748774806e-06, "loss": 0.6077, "step": 21671 }, { "epoch": 0.6642147848473704, "grad_norm": 0.44666424807700783, "learning_rate": 2.676637682603157e-06, "loss": 0.4219, "step": 21672 }, { "epoch": 0.6642454333701115, "grad_norm": 1.2757674039752736, "learning_rate": 2.6761982132215212e-06, "loss": 0.7163, "step": 21673 }, { "epoch": 0.6642760818928528, "grad_norm": 1.1511117500221497, "learning_rate": 2.6757587667368996e-06, "loss": 0.5481, "step": 21674 }, { "epoch": 0.664306730415594, "grad_norm": 0.44669554190227384, "learning_rate": 2.675319343153619e-06, "loss": 0.403, "step": 21675 }, { "epoch": 0.6643373789383352, "grad_norm": 1.270397008692704, "learning_rate": 2.674879942476012e-06, "loss": 0.6226, "step": 21676 }, { "epoch": 0.6643680274610764, "grad_norm": 1.387702737440939, "learning_rate": 2.674440564708409e-06, "loss": 0.7229, "step": 21677 }, { "epoch": 0.6643986759838176, "grad_norm": 1.4495625922355446, "learning_rate": 2.674001209855137e-06, "loss": 0.6615, "step": 21678 }, { "epoch": 0.6644293245065588, "grad_norm": 1.1846006190349698, "learning_rate": 2.673561877920526e-06, "loss": 0.579, "step": 21679 }, { "epoch": 0.6644599730292999, "grad_norm": 0.4514455859035241, "learning_rate": 2.6731225689089045e-06, "loss": 0.3894, "step": 21680 }, { "epoch": 0.6644906215520412, "grad_norm": 1.278018924585876, "learning_rate": 2.672683282824604e-06, "loss": 0.5688, "step": 21681 }, { "epoch": 0.6645212700747823, "grad_norm": 1.4697614886121244, "learning_rate": 2.6722440196719514e-06, "loss": 0.6776, "step": 21682 }, { "epoch": 0.6645519185975236, "grad_norm": 1.2840253834157507, "learning_rate": 2.6718047794552693e-06, "loss": 0.6006, "step": 21683 }, { "epoch": 0.6645825671202648, "grad_norm": 1.2901852408466972, "learning_rate": 2.6713655621788944e-06, "loss": 0.6566, "step": 21684 }, { "epoch": 0.664613215643006, "grad_norm": 1.3910847095486278, "learning_rate": 2.6709263678471504e-06, "loss": 0.6773, "step": 21685 }, { "epoch": 0.6646438641657472, "grad_norm": 1.3322013282539307, "learning_rate": 2.670487196464363e-06, "loss": 0.6093, "step": 21686 }, { "epoch": 0.6646745126884884, "grad_norm": 0.4484828483886932, "learning_rate": 2.670048048034861e-06, "loss": 0.3696, "step": 21687 }, { "epoch": 0.6647051612112296, "grad_norm": 1.4003813561283545, "learning_rate": 2.6696089225629718e-06, "loss": 0.6273, "step": 21688 }, { "epoch": 0.6647358097339708, "grad_norm": 1.2213435581136787, "learning_rate": 2.6691698200530247e-06, "loss": 0.5386, "step": 21689 }, { "epoch": 0.664766458256712, "grad_norm": 1.2420052945404945, "learning_rate": 2.668730740509341e-06, "loss": 0.6093, "step": 21690 }, { "epoch": 0.6647971067794533, "grad_norm": 1.4146755302480174, "learning_rate": 2.6682916839362504e-06, "loss": 0.5594, "step": 21691 }, { "epoch": 0.6648277553021944, "grad_norm": 1.2198989398513382, "learning_rate": 2.6678526503380795e-06, "loss": 0.5804, "step": 21692 }, { "epoch": 0.6648584038249357, "grad_norm": 1.2972883876793937, "learning_rate": 2.667413639719154e-06, "loss": 0.6311, "step": 21693 }, { "epoch": 0.6648890523476768, "grad_norm": 1.3380621078445243, "learning_rate": 2.666974652083795e-06, "loss": 0.6386, "step": 21694 }, { "epoch": 0.6649197008704181, "grad_norm": 0.4427165494519097, "learning_rate": 2.666535687436335e-06, "loss": 0.4126, "step": 21695 }, { "epoch": 0.6649503493931592, "grad_norm": 0.4480294731703408, "learning_rate": 2.666096745781096e-06, "loss": 0.3872, "step": 21696 }, { "epoch": 0.6649809979159005, "grad_norm": 1.2266914665821564, "learning_rate": 2.665657827122401e-06, "loss": 0.5953, "step": 21697 }, { "epoch": 0.6650116464386416, "grad_norm": 1.6367878894276844, "learning_rate": 2.665218931464577e-06, "loss": 0.656, "step": 21698 }, { "epoch": 0.6650422949613829, "grad_norm": 1.3220469895750586, "learning_rate": 2.6647800588119477e-06, "loss": 0.5977, "step": 21699 }, { "epoch": 0.665072943484124, "grad_norm": 1.2077279854510035, "learning_rate": 2.6643412091688403e-06, "loss": 0.5538, "step": 21700 }, { "epoch": 0.6651035920068653, "grad_norm": 1.2261373327957956, "learning_rate": 2.663902382539575e-06, "loss": 0.6158, "step": 21701 }, { "epoch": 0.6651342405296065, "grad_norm": 1.394115180466945, "learning_rate": 2.6634635789284762e-06, "loss": 0.7044, "step": 21702 }, { "epoch": 0.6651648890523477, "grad_norm": 1.1504692294040344, "learning_rate": 2.6630247983398717e-06, "loss": 0.5886, "step": 21703 }, { "epoch": 0.6651955375750889, "grad_norm": 1.4009727223715063, "learning_rate": 2.6625860407780806e-06, "loss": 0.6844, "step": 21704 }, { "epoch": 0.6652261860978301, "grad_norm": 1.2409053442198559, "learning_rate": 2.6621473062474244e-06, "loss": 0.7105, "step": 21705 }, { "epoch": 0.6652568346205713, "grad_norm": 1.4715494217581946, "learning_rate": 2.6617085947522325e-06, "loss": 0.7462, "step": 21706 }, { "epoch": 0.6652874831433125, "grad_norm": 1.3062742138401329, "learning_rate": 2.6612699062968217e-06, "loss": 0.6469, "step": 21707 }, { "epoch": 0.6653181316660537, "grad_norm": 1.3219528847231783, "learning_rate": 2.66083124088552e-06, "loss": 0.606, "step": 21708 }, { "epoch": 0.665348780188795, "grad_norm": 1.3021532803967686, "learning_rate": 2.660392598522643e-06, "loss": 0.6509, "step": 21709 }, { "epoch": 0.6653794287115361, "grad_norm": 1.16241149628243, "learning_rate": 2.659953979212517e-06, "loss": 0.5444, "step": 21710 }, { "epoch": 0.6654100772342773, "grad_norm": 1.3390591541953414, "learning_rate": 2.6595153829594654e-06, "loss": 0.6198, "step": 21711 }, { "epoch": 0.6654407257570185, "grad_norm": 1.3568505761285292, "learning_rate": 2.659076809767806e-06, "loss": 0.6488, "step": 21712 }, { "epoch": 0.6654713742797597, "grad_norm": 1.2820814141078545, "learning_rate": 2.6586382596418615e-06, "loss": 0.5804, "step": 21713 }, { "epoch": 0.6655020228025009, "grad_norm": 1.2112850006506577, "learning_rate": 2.658199732585955e-06, "loss": 0.5409, "step": 21714 }, { "epoch": 0.6655326713252421, "grad_norm": 1.205421814907232, "learning_rate": 2.657761228604404e-06, "loss": 0.6161, "step": 21715 }, { "epoch": 0.6655633198479833, "grad_norm": 1.2371859922594795, "learning_rate": 2.657322747701532e-06, "loss": 0.5487, "step": 21716 }, { "epoch": 0.6655939683707245, "grad_norm": 1.2141549826233964, "learning_rate": 2.656884289881657e-06, "loss": 0.5198, "step": 21717 }, { "epoch": 0.6656246168934657, "grad_norm": 1.3189067877873915, "learning_rate": 2.656445855149101e-06, "loss": 0.5897, "step": 21718 }, { "epoch": 0.6656552654162069, "grad_norm": 1.2039529432902352, "learning_rate": 2.656007443508185e-06, "loss": 0.5769, "step": 21719 }, { "epoch": 0.6656859139389482, "grad_norm": 1.3269211432791266, "learning_rate": 2.655569054963226e-06, "loss": 0.559, "step": 21720 }, { "epoch": 0.6657165624616893, "grad_norm": 1.766534168294161, "learning_rate": 2.6551306895185447e-06, "loss": 0.6811, "step": 21721 }, { "epoch": 0.6657472109844306, "grad_norm": 1.2650087746314527, "learning_rate": 2.6546923471784623e-06, "loss": 0.6088, "step": 21722 }, { "epoch": 0.6657778595071717, "grad_norm": 1.2485595672448504, "learning_rate": 2.6542540279472974e-06, "loss": 0.6178, "step": 21723 }, { "epoch": 0.665808508029913, "grad_norm": 1.3810376915563614, "learning_rate": 2.653815731829362e-06, "loss": 0.5218, "step": 21724 }, { "epoch": 0.6658391565526541, "grad_norm": 1.2384201426733723, "learning_rate": 2.653377458828986e-06, "loss": 0.6299, "step": 21725 }, { "epoch": 0.6658698050753954, "grad_norm": 1.3287858460202828, "learning_rate": 2.6529392089504798e-06, "loss": 0.5772, "step": 21726 }, { "epoch": 0.6659004535981365, "grad_norm": 1.2296510770774718, "learning_rate": 2.6525009821981663e-06, "loss": 0.5195, "step": 21727 }, { "epoch": 0.6659311021208778, "grad_norm": 1.3077916258704738, "learning_rate": 2.6520627785763588e-06, "loss": 0.6663, "step": 21728 }, { "epoch": 0.665961750643619, "grad_norm": 1.3229283076124294, "learning_rate": 2.6516245980893775e-06, "loss": 0.6069, "step": 21729 }, { "epoch": 0.6659923991663602, "grad_norm": 1.35801486258928, "learning_rate": 2.6511864407415423e-06, "loss": 0.6451, "step": 21730 }, { "epoch": 0.6660230476891014, "grad_norm": 0.4664238371504976, "learning_rate": 2.6507483065371654e-06, "loss": 0.4103, "step": 21731 }, { "epoch": 0.6660536962118426, "grad_norm": 1.5428086621803185, "learning_rate": 2.650310195480567e-06, "loss": 0.6837, "step": 21732 }, { "epoch": 0.6660843447345838, "grad_norm": 1.2882424910688737, "learning_rate": 2.649872107576066e-06, "loss": 0.6541, "step": 21733 }, { "epoch": 0.666114993257325, "grad_norm": 1.300235432455944, "learning_rate": 2.649434042827973e-06, "loss": 0.579, "step": 21734 }, { "epoch": 0.6661456417800662, "grad_norm": 1.1802320830856996, "learning_rate": 2.648996001240609e-06, "loss": 0.5651, "step": 21735 }, { "epoch": 0.6661762903028075, "grad_norm": 1.1168248746453062, "learning_rate": 2.6485579828182908e-06, "loss": 0.5599, "step": 21736 }, { "epoch": 0.6662069388255486, "grad_norm": 1.2792456333435145, "learning_rate": 2.6481199875653296e-06, "loss": 0.5856, "step": 21737 }, { "epoch": 0.6662375873482899, "grad_norm": 0.4619836004127, "learning_rate": 2.6476820154860467e-06, "loss": 0.4042, "step": 21738 }, { "epoch": 0.666268235871031, "grad_norm": 0.4454348973525808, "learning_rate": 2.647244066584753e-06, "loss": 0.3811, "step": 21739 }, { "epoch": 0.6662988843937723, "grad_norm": 1.349296227828735, "learning_rate": 2.6468061408657647e-06, "loss": 0.5832, "step": 21740 }, { "epoch": 0.6663295329165134, "grad_norm": 1.2202601290384352, "learning_rate": 2.6463682383333998e-06, "loss": 0.6066, "step": 21741 }, { "epoch": 0.6663601814392546, "grad_norm": 1.335019246999933, "learning_rate": 2.6459303589919693e-06, "loss": 0.6347, "step": 21742 }, { "epoch": 0.6663908299619958, "grad_norm": 1.3042576324673798, "learning_rate": 2.645492502845789e-06, "loss": 0.5983, "step": 21743 }, { "epoch": 0.666421478484737, "grad_norm": 1.3082580083674917, "learning_rate": 2.645054669899175e-06, "loss": 0.5686, "step": 21744 }, { "epoch": 0.6664521270074782, "grad_norm": 1.4648359604682426, "learning_rate": 2.6446168601564387e-06, "loss": 0.6447, "step": 21745 }, { "epoch": 0.6664827755302194, "grad_norm": 1.232409352316046, "learning_rate": 2.644179073621895e-06, "loss": 0.557, "step": 21746 }, { "epoch": 0.6665134240529607, "grad_norm": 1.3339432453206799, "learning_rate": 2.643741310299859e-06, "loss": 0.5973, "step": 21747 }, { "epoch": 0.6665440725757018, "grad_norm": 1.2417772351915124, "learning_rate": 2.643303570194641e-06, "loss": 0.5515, "step": 21748 }, { "epoch": 0.6665747210984431, "grad_norm": 1.238050894205818, "learning_rate": 2.6428658533105585e-06, "loss": 0.6581, "step": 21749 }, { "epoch": 0.6666053696211842, "grad_norm": 1.4106869362776349, "learning_rate": 2.64242815965192e-06, "loss": 0.6186, "step": 21750 }, { "epoch": 0.6666360181439255, "grad_norm": 1.4998817368136725, "learning_rate": 2.6419904892230386e-06, "loss": 0.6691, "step": 21751 }, { "epoch": 0.6666666666666666, "grad_norm": 1.0755960175193588, "learning_rate": 2.641552842028231e-06, "loss": 0.6091, "step": 21752 }, { "epoch": 0.6666973151894079, "grad_norm": 1.336744929038421, "learning_rate": 2.6411152180718046e-06, "loss": 0.6949, "step": 21753 }, { "epoch": 0.666727963712149, "grad_norm": 1.6695035862480094, "learning_rate": 2.640677617358074e-06, "loss": 0.6453, "step": 21754 }, { "epoch": 0.6667586122348903, "grad_norm": 1.2397191965432741, "learning_rate": 2.6402400398913525e-06, "loss": 0.6412, "step": 21755 }, { "epoch": 0.6667892607576315, "grad_norm": 1.286711794616524, "learning_rate": 2.6398024856759472e-06, "loss": 0.5313, "step": 21756 }, { "epoch": 0.6668199092803727, "grad_norm": 1.1435397197838122, "learning_rate": 2.639364954716172e-06, "loss": 0.5483, "step": 21757 }, { "epoch": 0.6668505578031139, "grad_norm": 0.4338540487238444, "learning_rate": 2.63892744701634e-06, "loss": 0.3962, "step": 21758 }, { "epoch": 0.6668812063258551, "grad_norm": 1.2445375236062588, "learning_rate": 2.638489962580758e-06, "loss": 0.5072, "step": 21759 }, { "epoch": 0.6669118548485963, "grad_norm": 1.395714477008882, "learning_rate": 2.63805250141374e-06, "loss": 0.6772, "step": 21760 }, { "epoch": 0.6669425033713375, "grad_norm": 1.3674281965519766, "learning_rate": 2.6376150635195942e-06, "loss": 0.7255, "step": 21761 }, { "epoch": 0.6669731518940787, "grad_norm": 1.3021998386898768, "learning_rate": 2.6371776489026307e-06, "loss": 0.5633, "step": 21762 }, { "epoch": 0.66700380041682, "grad_norm": 1.4118120895704322, "learning_rate": 2.636740257567163e-06, "loss": 0.6372, "step": 21763 }, { "epoch": 0.6670344489395611, "grad_norm": 1.4687575833520599, "learning_rate": 2.636302889517496e-06, "loss": 0.5991, "step": 21764 }, { "epoch": 0.6670650974623024, "grad_norm": 1.3591926703274573, "learning_rate": 2.6358655447579407e-06, "loss": 0.574, "step": 21765 }, { "epoch": 0.6670957459850435, "grad_norm": 1.4074590317283262, "learning_rate": 2.6354282232928098e-06, "loss": 0.6602, "step": 21766 }, { "epoch": 0.6671263945077848, "grad_norm": 1.4140579891854108, "learning_rate": 2.634990925126407e-06, "loss": 0.6464, "step": 21767 }, { "epoch": 0.6671570430305259, "grad_norm": 1.2243419388573122, "learning_rate": 2.6345536502630464e-06, "loss": 0.6245, "step": 21768 }, { "epoch": 0.6671876915532672, "grad_norm": 1.2819338121785357, "learning_rate": 2.634116398707032e-06, "loss": 0.6292, "step": 21769 }, { "epoch": 0.6672183400760083, "grad_norm": 1.356878182491622, "learning_rate": 2.633679170462674e-06, "loss": 0.7152, "step": 21770 }, { "epoch": 0.6672489885987496, "grad_norm": 1.4152360715017605, "learning_rate": 2.6332419655342823e-06, "loss": 0.6992, "step": 21771 }, { "epoch": 0.6672796371214907, "grad_norm": 1.1713538247869426, "learning_rate": 2.6328047839261608e-06, "loss": 0.6654, "step": 21772 }, { "epoch": 0.6673102856442319, "grad_norm": 1.4714803664889777, "learning_rate": 2.63236762564262e-06, "loss": 0.6948, "step": 21773 }, { "epoch": 0.6673409341669732, "grad_norm": 1.2843886705220227, "learning_rate": 2.6319304906879682e-06, "loss": 0.6312, "step": 21774 }, { "epoch": 0.6673715826897143, "grad_norm": 1.284578659459178, "learning_rate": 2.63149337906651e-06, "loss": 0.5467, "step": 21775 }, { "epoch": 0.6674022312124556, "grad_norm": 1.2229691692075673, "learning_rate": 2.631056290782553e-06, "loss": 0.6299, "step": 21776 }, { "epoch": 0.6674328797351967, "grad_norm": 0.45043111910027117, "learning_rate": 2.6306192258404062e-06, "loss": 0.4137, "step": 21777 }, { "epoch": 0.667463528257938, "grad_norm": 0.46066764397670457, "learning_rate": 2.6301821842443732e-06, "loss": 0.4012, "step": 21778 }, { "epoch": 0.6674941767806791, "grad_norm": 1.356613932367642, "learning_rate": 2.6297451659987626e-06, "loss": 0.629, "step": 21779 }, { "epoch": 0.6675248253034204, "grad_norm": 1.2806949358646758, "learning_rate": 2.629308171107876e-06, "loss": 0.6283, "step": 21780 }, { "epoch": 0.6675554738261615, "grad_norm": 1.1614849698308174, "learning_rate": 2.628871199576026e-06, "loss": 0.646, "step": 21781 }, { "epoch": 0.6675861223489028, "grad_norm": 0.4611041941459431, "learning_rate": 2.6284342514075155e-06, "loss": 0.3905, "step": 21782 }, { "epoch": 0.667616770871644, "grad_norm": 1.316851384656868, "learning_rate": 2.627997326606646e-06, "loss": 0.632, "step": 21783 }, { "epoch": 0.6676474193943852, "grad_norm": 1.1795411014967683, "learning_rate": 2.6275604251777265e-06, "loss": 0.5438, "step": 21784 }, { "epoch": 0.6676780679171264, "grad_norm": 1.3207156803017621, "learning_rate": 2.6271235471250633e-06, "loss": 0.6158, "step": 21785 }, { "epoch": 0.6677087164398676, "grad_norm": 1.3870914818765416, "learning_rate": 2.6266866924529566e-06, "loss": 0.6752, "step": 21786 }, { "epoch": 0.6677393649626088, "grad_norm": 1.3080300215827916, "learning_rate": 2.6262498611657134e-06, "loss": 0.6131, "step": 21787 }, { "epoch": 0.66777001348535, "grad_norm": 1.2440110724208155, "learning_rate": 2.625813053267637e-06, "loss": 0.6212, "step": 21788 }, { "epoch": 0.6678006620080912, "grad_norm": 0.41669875767306563, "learning_rate": 2.625376268763035e-06, "loss": 0.3776, "step": 21789 }, { "epoch": 0.6678313105308324, "grad_norm": 1.4522278508812256, "learning_rate": 2.6249395076562078e-06, "loss": 0.6202, "step": 21790 }, { "epoch": 0.6678619590535736, "grad_norm": 1.2121606533287597, "learning_rate": 2.6245027699514554e-06, "loss": 0.6232, "step": 21791 }, { "epoch": 0.6678926075763149, "grad_norm": 1.3818353439214797, "learning_rate": 2.624066055653089e-06, "loss": 0.6081, "step": 21792 }, { "epoch": 0.667923256099056, "grad_norm": 1.2258867932360384, "learning_rate": 2.6236293647654077e-06, "loss": 0.5839, "step": 21793 }, { "epoch": 0.6679539046217973, "grad_norm": 1.2918140561182652, "learning_rate": 2.623192697292712e-06, "loss": 0.6673, "step": 21794 }, { "epoch": 0.6679845531445384, "grad_norm": 1.297932863818515, "learning_rate": 2.622756053239307e-06, "loss": 0.6269, "step": 21795 }, { "epoch": 0.6680152016672797, "grad_norm": 1.248367670359927, "learning_rate": 2.6223194326094966e-06, "loss": 0.6857, "step": 21796 }, { "epoch": 0.6680458501900208, "grad_norm": 1.2368938448919686, "learning_rate": 2.621882835407579e-06, "loss": 0.5684, "step": 21797 }, { "epoch": 0.6680764987127621, "grad_norm": 1.2780409647203759, "learning_rate": 2.621446261637859e-06, "loss": 0.5656, "step": 21798 }, { "epoch": 0.6681071472355032, "grad_norm": 1.1914726400769369, "learning_rate": 2.6210097113046373e-06, "loss": 0.665, "step": 21799 }, { "epoch": 0.6681377957582445, "grad_norm": 0.45620828499198085, "learning_rate": 2.620573184412217e-06, "loss": 0.4005, "step": 21800 }, { "epoch": 0.6681684442809857, "grad_norm": 1.4152938041089254, "learning_rate": 2.6201366809648986e-06, "loss": 0.5973, "step": 21801 }, { "epoch": 0.6681990928037269, "grad_norm": 1.2838144730837298, "learning_rate": 2.6197002009669804e-06, "loss": 0.5485, "step": 21802 }, { "epoch": 0.6682297413264681, "grad_norm": 1.184278819028924, "learning_rate": 2.6192637444227646e-06, "loss": 0.677, "step": 21803 }, { "epoch": 0.6682603898492092, "grad_norm": 1.250977852033351, "learning_rate": 2.618827311336555e-06, "loss": 0.5758, "step": 21804 }, { "epoch": 0.6682910383719505, "grad_norm": 1.4429001465036222, "learning_rate": 2.6183909017126462e-06, "loss": 0.6386, "step": 21805 }, { "epoch": 0.6683216868946916, "grad_norm": 1.3255418326803587, "learning_rate": 2.617954515555342e-06, "loss": 0.6458, "step": 21806 }, { "epoch": 0.6683523354174329, "grad_norm": 0.47081373924437125, "learning_rate": 2.6175181528689416e-06, "loss": 0.4024, "step": 21807 }, { "epoch": 0.668382983940174, "grad_norm": 1.3574597896434095, "learning_rate": 2.617081813657746e-06, "loss": 0.6211, "step": 21808 }, { "epoch": 0.6684136324629153, "grad_norm": 1.2765044691403111, "learning_rate": 2.6166454979260525e-06, "loss": 0.5217, "step": 21809 }, { "epoch": 0.6684442809856564, "grad_norm": 1.2558268849598848, "learning_rate": 2.6162092056781573e-06, "loss": 0.7149, "step": 21810 }, { "epoch": 0.6684749295083977, "grad_norm": 1.2688654343432504, "learning_rate": 2.615772936918367e-06, "loss": 0.558, "step": 21811 }, { "epoch": 0.6685055780311389, "grad_norm": 1.4479691297708956, "learning_rate": 2.6153366916509757e-06, "loss": 0.6446, "step": 21812 }, { "epoch": 0.6685362265538801, "grad_norm": 1.0977932900411187, "learning_rate": 2.61490046988028e-06, "loss": 0.5055, "step": 21813 }, { "epoch": 0.6685668750766213, "grad_norm": 1.136843326742298, "learning_rate": 2.6144642716105805e-06, "loss": 0.6328, "step": 21814 }, { "epoch": 0.6685975235993625, "grad_norm": 1.1130082966953438, "learning_rate": 2.6140280968461746e-06, "loss": 0.4747, "step": 21815 }, { "epoch": 0.6686281721221037, "grad_norm": 1.3080181868205887, "learning_rate": 2.613591945591362e-06, "loss": 0.5505, "step": 21816 }, { "epoch": 0.6686588206448449, "grad_norm": 1.2389595673232088, "learning_rate": 2.6131558178504375e-06, "loss": 0.6078, "step": 21817 }, { "epoch": 0.6686894691675861, "grad_norm": 1.394657931242221, "learning_rate": 2.6127197136276987e-06, "loss": 0.6061, "step": 21818 }, { "epoch": 0.6687201176903274, "grad_norm": 1.3218835777275448, "learning_rate": 2.612283632927446e-06, "loss": 0.6773, "step": 21819 }, { "epoch": 0.6687507662130685, "grad_norm": 0.45656473873220027, "learning_rate": 2.611847575753973e-06, "loss": 0.4309, "step": 21820 }, { "epoch": 0.6687814147358098, "grad_norm": 1.2523300938193718, "learning_rate": 2.6114115421115727e-06, "loss": 0.6424, "step": 21821 }, { "epoch": 0.6688120632585509, "grad_norm": 1.2905150262920142, "learning_rate": 2.6109755320045505e-06, "loss": 0.6092, "step": 21822 }, { "epoch": 0.6688427117812922, "grad_norm": 1.1442187784455717, "learning_rate": 2.610539545437196e-06, "loss": 0.5567, "step": 21823 }, { "epoch": 0.6688733603040333, "grad_norm": 1.1653479263379791, "learning_rate": 2.6101035824138064e-06, "loss": 0.6558, "step": 21824 }, { "epoch": 0.6689040088267746, "grad_norm": 1.3944094330224095, "learning_rate": 2.6096676429386767e-06, "loss": 0.6127, "step": 21825 }, { "epoch": 0.6689346573495157, "grad_norm": 1.1935444814907255, "learning_rate": 2.6092317270161037e-06, "loss": 0.6046, "step": 21826 }, { "epoch": 0.668965305872257, "grad_norm": 1.240440923667216, "learning_rate": 2.608795834650385e-06, "loss": 0.6297, "step": 21827 }, { "epoch": 0.6689959543949981, "grad_norm": 1.29966366450858, "learning_rate": 2.6083599658458096e-06, "loss": 0.5771, "step": 21828 }, { "epoch": 0.6690266029177394, "grad_norm": 0.45494823501696074, "learning_rate": 2.607924120606676e-06, "loss": 0.3882, "step": 21829 }, { "epoch": 0.6690572514404806, "grad_norm": 1.3723070221911338, "learning_rate": 2.6074882989372798e-06, "loss": 0.59, "step": 21830 }, { "epoch": 0.6690878999632218, "grad_norm": 1.2670734028975918, "learning_rate": 2.6070525008419135e-06, "loss": 0.6625, "step": 21831 }, { "epoch": 0.669118548485963, "grad_norm": 1.3877375120679742, "learning_rate": 2.6066167263248677e-06, "loss": 0.654, "step": 21832 }, { "epoch": 0.6691491970087042, "grad_norm": 1.2328687458470224, "learning_rate": 2.6061809753904426e-06, "loss": 0.6006, "step": 21833 }, { "epoch": 0.6691798455314454, "grad_norm": 1.3952394566383268, "learning_rate": 2.6057452480429278e-06, "loss": 0.6578, "step": 21834 }, { "epoch": 0.6692104940541865, "grad_norm": 1.1813948948468356, "learning_rate": 2.6053095442866196e-06, "loss": 0.5701, "step": 21835 }, { "epoch": 0.6692411425769278, "grad_norm": 0.4562715804668527, "learning_rate": 2.6048738641258063e-06, "loss": 0.4078, "step": 21836 }, { "epoch": 0.669271791099669, "grad_norm": 1.3477424239132778, "learning_rate": 2.6044382075647844e-06, "loss": 0.7281, "step": 21837 }, { "epoch": 0.6693024396224102, "grad_norm": 1.446667365644732, "learning_rate": 2.604002574607847e-06, "loss": 0.6032, "step": 21838 }, { "epoch": 0.6693330881451514, "grad_norm": 1.370515607494999, "learning_rate": 2.6035669652592843e-06, "loss": 0.6991, "step": 21839 }, { "epoch": 0.6693637366678926, "grad_norm": 0.44978463522677836, "learning_rate": 2.6031313795233894e-06, "loss": 0.4096, "step": 21840 }, { "epoch": 0.6693943851906338, "grad_norm": 1.1885332320783222, "learning_rate": 2.6026958174044557e-06, "loss": 0.685, "step": 21841 }, { "epoch": 0.669425033713375, "grad_norm": 1.2269423785674507, "learning_rate": 2.602260278906772e-06, "loss": 0.5534, "step": 21842 }, { "epoch": 0.6694556822361162, "grad_norm": 1.1919881571168658, "learning_rate": 2.6018247640346304e-06, "loss": 0.5687, "step": 21843 }, { "epoch": 0.6694863307588574, "grad_norm": 1.3538696747264838, "learning_rate": 2.601389272792326e-06, "loss": 0.5951, "step": 21844 }, { "epoch": 0.6695169792815986, "grad_norm": 1.3393378738519386, "learning_rate": 2.6009538051841443e-06, "loss": 0.6369, "step": 21845 }, { "epoch": 0.6695476278043399, "grad_norm": 1.478678155633255, "learning_rate": 2.60051836121438e-06, "loss": 0.5491, "step": 21846 }, { "epoch": 0.669578276327081, "grad_norm": 1.5121598766643254, "learning_rate": 2.600082940887321e-06, "loss": 0.6071, "step": 21847 }, { "epoch": 0.6696089248498223, "grad_norm": 1.3105459515392532, "learning_rate": 2.599647544207259e-06, "loss": 0.71, "step": 21848 }, { "epoch": 0.6696395733725634, "grad_norm": 1.2935633069021257, "learning_rate": 2.5992121711784858e-06, "loss": 0.6051, "step": 21849 }, { "epoch": 0.6696702218953047, "grad_norm": 1.1744166393556759, "learning_rate": 2.5987768218052866e-06, "loss": 0.603, "step": 21850 }, { "epoch": 0.6697008704180458, "grad_norm": 1.1855192614886243, "learning_rate": 2.5983414960919547e-06, "loss": 0.7184, "step": 21851 }, { "epoch": 0.6697315189407871, "grad_norm": 1.3649400860151266, "learning_rate": 2.5979061940427798e-06, "loss": 0.6266, "step": 21852 }, { "epoch": 0.6697621674635282, "grad_norm": 1.3484365058363945, "learning_rate": 2.5974709156620483e-06, "loss": 0.625, "step": 21853 }, { "epoch": 0.6697928159862695, "grad_norm": 1.2929347471766228, "learning_rate": 2.5970356609540522e-06, "loss": 0.5972, "step": 21854 }, { "epoch": 0.6698234645090106, "grad_norm": 1.3337306641153486, "learning_rate": 2.596600429923076e-06, "loss": 0.657, "step": 21855 }, { "epoch": 0.6698541130317519, "grad_norm": 1.6774256178898654, "learning_rate": 2.5961652225734126e-06, "loss": 0.6491, "step": 21856 }, { "epoch": 0.6698847615544931, "grad_norm": 0.4290697113382283, "learning_rate": 2.5957300389093486e-06, "loss": 0.3691, "step": 21857 }, { "epoch": 0.6699154100772343, "grad_norm": 0.49442506654206075, "learning_rate": 2.5952948789351708e-06, "loss": 0.3865, "step": 21858 }, { "epoch": 0.6699460585999755, "grad_norm": 1.1415448963759713, "learning_rate": 2.594859742655167e-06, "loss": 0.6263, "step": 21859 }, { "epoch": 0.6699767071227167, "grad_norm": 1.2718823625758324, "learning_rate": 2.5944246300736274e-06, "loss": 0.5746, "step": 21860 }, { "epoch": 0.6700073556454579, "grad_norm": 1.3396557872785668, "learning_rate": 2.5939895411948355e-06, "loss": 0.6032, "step": 21861 }, { "epoch": 0.6700380041681991, "grad_norm": 1.3843453338403677, "learning_rate": 2.5935544760230813e-06, "loss": 0.5376, "step": 21862 }, { "epoch": 0.6700686526909403, "grad_norm": 1.1582749175687028, "learning_rate": 2.5931194345626516e-06, "loss": 0.6344, "step": 21863 }, { "epoch": 0.6700993012136816, "grad_norm": 1.3321056575633887, "learning_rate": 2.59268441681783e-06, "loss": 0.657, "step": 21864 }, { "epoch": 0.6701299497364227, "grad_norm": 1.3612490933385184, "learning_rate": 2.592249422792907e-06, "loss": 0.5288, "step": 21865 }, { "epoch": 0.6701605982591639, "grad_norm": 1.3832501382318434, "learning_rate": 2.591814452492164e-06, "loss": 0.6313, "step": 21866 }, { "epoch": 0.6701912467819051, "grad_norm": 0.44105833653971704, "learning_rate": 2.59137950591989e-06, "loss": 0.4034, "step": 21867 }, { "epoch": 0.6702218953046463, "grad_norm": 1.303758248701586, "learning_rate": 2.590944583080372e-06, "loss": 0.641, "step": 21868 }, { "epoch": 0.6702525438273875, "grad_norm": 1.286005040478853, "learning_rate": 2.5905096839778907e-06, "loss": 0.6017, "step": 21869 }, { "epoch": 0.6702831923501287, "grad_norm": 1.3565935615389706, "learning_rate": 2.590074808616735e-06, "loss": 0.6295, "step": 21870 }, { "epoch": 0.6703138408728699, "grad_norm": 1.3296465671579474, "learning_rate": 2.58963995700119e-06, "loss": 0.6435, "step": 21871 }, { "epoch": 0.6703444893956111, "grad_norm": 0.44076101127684186, "learning_rate": 2.589205129135538e-06, "loss": 0.4038, "step": 21872 }, { "epoch": 0.6703751379183523, "grad_norm": 1.3165189715193963, "learning_rate": 2.5887703250240637e-06, "loss": 0.5854, "step": 21873 }, { "epoch": 0.6704057864410935, "grad_norm": 1.4182509452555423, "learning_rate": 2.5883355446710547e-06, "loss": 0.6069, "step": 21874 }, { "epoch": 0.6704364349638348, "grad_norm": 1.2602232211405393, "learning_rate": 2.587900788080791e-06, "loss": 0.6489, "step": 21875 }, { "epoch": 0.6704670834865759, "grad_norm": 1.0708398036306646, "learning_rate": 2.58746605525756e-06, "loss": 0.5262, "step": 21876 }, { "epoch": 0.6704977320093172, "grad_norm": 0.44218558887660714, "learning_rate": 2.5870313462056405e-06, "loss": 0.4007, "step": 21877 }, { "epoch": 0.6705283805320583, "grad_norm": 1.2053888199465979, "learning_rate": 2.5865966609293193e-06, "loss": 0.5188, "step": 21878 }, { "epoch": 0.6705590290547996, "grad_norm": 1.4163477708024506, "learning_rate": 2.5861619994328802e-06, "loss": 0.6925, "step": 21879 }, { "epoch": 0.6705896775775407, "grad_norm": 1.171820628152063, "learning_rate": 2.5857273617206024e-06, "loss": 0.5282, "step": 21880 }, { "epoch": 0.670620326100282, "grad_norm": 1.1430684926742085, "learning_rate": 2.5852927477967714e-06, "loss": 0.6469, "step": 21881 }, { "epoch": 0.6706509746230231, "grad_norm": 1.4415363931436342, "learning_rate": 2.5848581576656707e-06, "loss": 0.6333, "step": 21882 }, { "epoch": 0.6706816231457644, "grad_norm": 1.3362574172507853, "learning_rate": 2.5844235913315773e-06, "loss": 0.5683, "step": 21883 }, { "epoch": 0.6707122716685056, "grad_norm": 1.2416273656773742, "learning_rate": 2.5839890487987773e-06, "loss": 0.5783, "step": 21884 }, { "epoch": 0.6707429201912468, "grad_norm": 1.427543111611214, "learning_rate": 2.5835545300715537e-06, "loss": 0.6931, "step": 21885 }, { "epoch": 0.670773568713988, "grad_norm": 1.3209154775539296, "learning_rate": 2.583120035154183e-06, "loss": 0.6588, "step": 21886 }, { "epoch": 0.6708042172367292, "grad_norm": 1.2688508975738577, "learning_rate": 2.5826855640509507e-06, "loss": 0.63, "step": 21887 }, { "epoch": 0.6708348657594704, "grad_norm": 1.3922444818536586, "learning_rate": 2.5822511167661328e-06, "loss": 0.6899, "step": 21888 }, { "epoch": 0.6708655142822116, "grad_norm": 1.3406017941498225, "learning_rate": 2.581816693304017e-06, "loss": 0.6399, "step": 21889 }, { "epoch": 0.6708961628049528, "grad_norm": 1.4285598129494754, "learning_rate": 2.58138229366888e-06, "loss": 0.6292, "step": 21890 }, { "epoch": 0.670926811327694, "grad_norm": 1.3444123678291602, "learning_rate": 2.580947917865e-06, "loss": 0.5678, "step": 21891 }, { "epoch": 0.6709574598504352, "grad_norm": 0.46138210347380426, "learning_rate": 2.580513565896659e-06, "loss": 0.4153, "step": 21892 }, { "epoch": 0.6709881083731765, "grad_norm": 0.43226294703152884, "learning_rate": 2.5800792377681386e-06, "loss": 0.3915, "step": 21893 }, { "epoch": 0.6710187568959176, "grad_norm": 1.4507712945216313, "learning_rate": 2.579644933483715e-06, "loss": 0.6815, "step": 21894 }, { "epoch": 0.6710494054186589, "grad_norm": 1.2421471319903803, "learning_rate": 2.5792106530476695e-06, "loss": 0.5769, "step": 21895 }, { "epoch": 0.6710800539414, "grad_norm": 1.207267180324238, "learning_rate": 2.578776396464281e-06, "loss": 0.6424, "step": 21896 }, { "epoch": 0.6711107024641412, "grad_norm": 0.45490999976687746, "learning_rate": 2.5783421637378293e-06, "loss": 0.421, "step": 21897 }, { "epoch": 0.6711413509868824, "grad_norm": 1.341216172945373, "learning_rate": 2.5779079548725923e-06, "loss": 0.5685, "step": 21898 }, { "epoch": 0.6711719995096236, "grad_norm": 1.1945410023291132, "learning_rate": 2.5774737698728458e-06, "loss": 0.5763, "step": 21899 }, { "epoch": 0.6712026480323648, "grad_norm": 1.2600409649140025, "learning_rate": 2.57703960874287e-06, "loss": 0.6415, "step": 21900 }, { "epoch": 0.671233296555106, "grad_norm": 1.3710171738180155, "learning_rate": 2.576605471486945e-06, "loss": 0.6657, "step": 21901 }, { "epoch": 0.6712639450778473, "grad_norm": 1.139871425649863, "learning_rate": 2.5761713581093444e-06, "loss": 0.5192, "step": 21902 }, { "epoch": 0.6712945936005884, "grad_norm": 1.1849211823499048, "learning_rate": 2.5757372686143478e-06, "loss": 0.4824, "step": 21903 }, { "epoch": 0.6713252421233297, "grad_norm": 0.45335208502316865, "learning_rate": 2.5753032030062337e-06, "loss": 0.3937, "step": 21904 }, { "epoch": 0.6713558906460708, "grad_norm": 1.3068305306490742, "learning_rate": 2.5748691612892757e-06, "loss": 0.6206, "step": 21905 }, { "epoch": 0.6713865391688121, "grad_norm": 1.3183377137458965, "learning_rate": 2.5744351434677544e-06, "loss": 0.6067, "step": 21906 }, { "epoch": 0.6714171876915532, "grad_norm": 1.424305724676111, "learning_rate": 2.5740011495459403e-06, "loss": 0.6569, "step": 21907 }, { "epoch": 0.6714478362142945, "grad_norm": 1.5227890994150313, "learning_rate": 2.5735671795281177e-06, "loss": 0.6783, "step": 21908 }, { "epoch": 0.6714784847370356, "grad_norm": 1.2674368146028614, "learning_rate": 2.5731332334185577e-06, "loss": 0.5488, "step": 21909 }, { "epoch": 0.6715091332597769, "grad_norm": 1.3767706318295465, "learning_rate": 2.572699311221536e-06, "loss": 0.6137, "step": 21910 }, { "epoch": 0.671539781782518, "grad_norm": 1.3167990854778002, "learning_rate": 2.5722654129413283e-06, "loss": 0.5804, "step": 21911 }, { "epoch": 0.6715704303052593, "grad_norm": 1.3211128908340917, "learning_rate": 2.571831538582213e-06, "loss": 0.5872, "step": 21912 }, { "epoch": 0.6716010788280005, "grad_norm": 1.1607344135298907, "learning_rate": 2.5713976881484605e-06, "loss": 0.6224, "step": 21913 }, { "epoch": 0.6716317273507417, "grad_norm": 1.3990328697416723, "learning_rate": 2.5709638616443483e-06, "loss": 0.698, "step": 21914 }, { "epoch": 0.6716623758734829, "grad_norm": 1.3656022313049878, "learning_rate": 2.570530059074151e-06, "loss": 0.5984, "step": 21915 }, { "epoch": 0.6716930243962241, "grad_norm": 1.2206898042922762, "learning_rate": 2.570096280442144e-06, "loss": 0.5153, "step": 21916 }, { "epoch": 0.6717236729189653, "grad_norm": 1.3208406235902852, "learning_rate": 2.5696625257526e-06, "loss": 0.6391, "step": 21917 }, { "epoch": 0.6717543214417065, "grad_norm": 1.3787978972224035, "learning_rate": 2.5692287950097894e-06, "loss": 0.568, "step": 21918 }, { "epoch": 0.6717849699644477, "grad_norm": 1.188536221760324, "learning_rate": 2.5687950882179935e-06, "loss": 0.6667, "step": 21919 }, { "epoch": 0.671815618487189, "grad_norm": 1.1693362052946812, "learning_rate": 2.568361405381481e-06, "loss": 0.5977, "step": 21920 }, { "epoch": 0.6718462670099301, "grad_norm": 1.570291137401615, "learning_rate": 2.567927746504524e-06, "loss": 0.7054, "step": 21921 }, { "epoch": 0.6718769155326714, "grad_norm": 1.4252561303312001, "learning_rate": 2.5674941115913975e-06, "loss": 0.5633, "step": 21922 }, { "epoch": 0.6719075640554125, "grad_norm": 1.2649476732919067, "learning_rate": 2.567060500646373e-06, "loss": 0.5775, "step": 21923 }, { "epoch": 0.6719382125781538, "grad_norm": 1.2243204964810286, "learning_rate": 2.5666269136737277e-06, "loss": 0.5117, "step": 21924 }, { "epoch": 0.6719688611008949, "grad_norm": 1.377008319962474, "learning_rate": 2.5661933506777266e-06, "loss": 0.6898, "step": 21925 }, { "epoch": 0.6719995096236362, "grad_norm": 1.3111081520971426, "learning_rate": 2.5657598116626454e-06, "loss": 0.6697, "step": 21926 }, { "epoch": 0.6720301581463773, "grad_norm": 0.47221996920233567, "learning_rate": 2.5653262966327572e-06, "loss": 0.4196, "step": 21927 }, { "epoch": 0.6720608066691185, "grad_norm": 1.3491919651001816, "learning_rate": 2.564892805592333e-06, "loss": 0.627, "step": 21928 }, { "epoch": 0.6720914551918598, "grad_norm": 1.1090390921536364, "learning_rate": 2.5644593385456386e-06, "loss": 0.5726, "step": 21929 }, { "epoch": 0.6721221037146009, "grad_norm": 1.4354303592183226, "learning_rate": 2.5640258954969533e-06, "loss": 0.7028, "step": 21930 }, { "epoch": 0.6721527522373422, "grad_norm": 1.2814104885259965, "learning_rate": 2.5635924764505437e-06, "loss": 0.6017, "step": 21931 }, { "epoch": 0.6721834007600833, "grad_norm": 0.4619581765693108, "learning_rate": 2.5631590814106793e-06, "loss": 0.4281, "step": 21932 }, { "epoch": 0.6722140492828246, "grad_norm": 1.2204404018095945, "learning_rate": 2.5627257103816315e-06, "loss": 0.6184, "step": 21933 }, { "epoch": 0.6722446978055657, "grad_norm": 1.4366858981772406, "learning_rate": 2.5622923633676715e-06, "loss": 0.6019, "step": 21934 }, { "epoch": 0.672275346328307, "grad_norm": 1.1937951748080176, "learning_rate": 2.5618590403730702e-06, "loss": 0.6157, "step": 21935 }, { "epoch": 0.6723059948510481, "grad_norm": 1.4488174682317627, "learning_rate": 2.5614257414020936e-06, "loss": 0.5932, "step": 21936 }, { "epoch": 0.6723366433737894, "grad_norm": 1.6297239752510364, "learning_rate": 2.5609924664590136e-06, "loss": 0.6946, "step": 21937 }, { "epoch": 0.6723672918965306, "grad_norm": 1.4919632640242508, "learning_rate": 2.5605592155481007e-06, "loss": 0.6702, "step": 21938 }, { "epoch": 0.6723979404192718, "grad_norm": 1.3103621345033545, "learning_rate": 2.5601259886736217e-06, "loss": 0.6238, "step": 21939 }, { "epoch": 0.672428588942013, "grad_norm": 1.133610682643526, "learning_rate": 2.559692785839842e-06, "loss": 0.6319, "step": 21940 }, { "epoch": 0.6724592374647542, "grad_norm": 1.4161571495170948, "learning_rate": 2.5592596070510375e-06, "loss": 0.6685, "step": 21941 }, { "epoch": 0.6724898859874954, "grad_norm": 1.333427319081002, "learning_rate": 2.5588264523114703e-06, "loss": 0.6413, "step": 21942 }, { "epoch": 0.6725205345102366, "grad_norm": 1.2471216945919648, "learning_rate": 2.5583933216254133e-06, "loss": 0.6227, "step": 21943 }, { "epoch": 0.6725511830329778, "grad_norm": 1.2774447146006969, "learning_rate": 2.5579602149971282e-06, "loss": 0.6289, "step": 21944 }, { "epoch": 0.672581831555719, "grad_norm": 1.4387864729909552, "learning_rate": 2.5575271324308876e-06, "loss": 0.7172, "step": 21945 }, { "epoch": 0.6726124800784602, "grad_norm": 0.4815518349335488, "learning_rate": 2.557094073930958e-06, "loss": 0.4348, "step": 21946 }, { "epoch": 0.6726431286012015, "grad_norm": 0.46542551591594794, "learning_rate": 2.5566610395016047e-06, "loss": 0.4025, "step": 21947 }, { "epoch": 0.6726737771239426, "grad_norm": 1.7442692214975613, "learning_rate": 2.556228029147094e-06, "loss": 0.7275, "step": 21948 }, { "epoch": 0.6727044256466839, "grad_norm": 1.3503680494693553, "learning_rate": 2.555795042871696e-06, "loss": 0.649, "step": 21949 }, { "epoch": 0.672735074169425, "grad_norm": 1.304147882854423, "learning_rate": 2.555362080679675e-06, "loss": 0.638, "step": 21950 }, { "epoch": 0.6727657226921663, "grad_norm": 0.4819520977220723, "learning_rate": 2.5549291425752954e-06, "loss": 0.404, "step": 21951 }, { "epoch": 0.6727963712149074, "grad_norm": 1.4000353824558767, "learning_rate": 2.5544962285628243e-06, "loss": 0.6938, "step": 21952 }, { "epoch": 0.6728270197376487, "grad_norm": 1.44256219811614, "learning_rate": 2.5540633386465276e-06, "loss": 0.612, "step": 21953 }, { "epoch": 0.6728576682603898, "grad_norm": 1.2374080294264884, "learning_rate": 2.5536304728306725e-06, "loss": 0.6031, "step": 21954 }, { "epoch": 0.6728883167831311, "grad_norm": 1.3110843900445392, "learning_rate": 2.5531976311195205e-06, "loss": 0.5729, "step": 21955 }, { "epoch": 0.6729189653058723, "grad_norm": 1.293166444293192, "learning_rate": 2.5527648135173377e-06, "loss": 0.6183, "step": 21956 }, { "epoch": 0.6729496138286135, "grad_norm": 1.1294503383724332, "learning_rate": 2.552332020028392e-06, "loss": 0.5206, "step": 21957 }, { "epoch": 0.6729802623513547, "grad_norm": 1.3863734927268097, "learning_rate": 2.5518992506569453e-06, "loss": 0.6876, "step": 21958 }, { "epoch": 0.6730109108740958, "grad_norm": 1.345950281308376, "learning_rate": 2.5514665054072572e-06, "loss": 0.6132, "step": 21959 }, { "epoch": 0.6730415593968371, "grad_norm": 0.47237589936568986, "learning_rate": 2.5510337842835997e-06, "loss": 0.4066, "step": 21960 }, { "epoch": 0.6730722079195782, "grad_norm": 1.3360329190884994, "learning_rate": 2.550601087290232e-06, "loss": 0.6091, "step": 21961 }, { "epoch": 0.6731028564423195, "grad_norm": 1.3451006447999543, "learning_rate": 2.55016841443142e-06, "loss": 0.681, "step": 21962 }, { "epoch": 0.6731335049650606, "grad_norm": 0.42755063466797827, "learning_rate": 2.549735765711423e-06, "loss": 0.4025, "step": 21963 }, { "epoch": 0.6731641534878019, "grad_norm": 1.3525218021804295, "learning_rate": 2.549303141134507e-06, "loss": 0.604, "step": 21964 }, { "epoch": 0.673194802010543, "grad_norm": 1.3124521517122243, "learning_rate": 2.5488705407049353e-06, "loss": 0.5814, "step": 21965 }, { "epoch": 0.6732254505332843, "grad_norm": 1.2961052988349901, "learning_rate": 2.5484379644269687e-06, "loss": 0.552, "step": 21966 }, { "epoch": 0.6732560990560255, "grad_norm": 1.4021509375039438, "learning_rate": 2.5480054123048693e-06, "loss": 0.6452, "step": 21967 }, { "epoch": 0.6732867475787667, "grad_norm": 1.3642989657319253, "learning_rate": 2.5475728843429017e-06, "loss": 0.6316, "step": 21968 }, { "epoch": 0.6733173961015079, "grad_norm": 0.46224600465630217, "learning_rate": 2.547140380545324e-06, "loss": 0.4156, "step": 21969 }, { "epoch": 0.6733480446242491, "grad_norm": 0.43352320238611003, "learning_rate": 2.5467079009164e-06, "loss": 0.3998, "step": 21970 }, { "epoch": 0.6733786931469903, "grad_norm": 1.363914781159392, "learning_rate": 2.5462754454603927e-06, "loss": 0.6089, "step": 21971 }, { "epoch": 0.6734093416697315, "grad_norm": 1.4165551054174972, "learning_rate": 2.545843014181559e-06, "loss": 0.6694, "step": 21972 }, { "epoch": 0.6734399901924727, "grad_norm": 1.2218945692578684, "learning_rate": 2.5454106070841644e-06, "loss": 0.6791, "step": 21973 }, { "epoch": 0.673470638715214, "grad_norm": 1.4806923918024975, "learning_rate": 2.544978224172465e-06, "loss": 0.6942, "step": 21974 }, { "epoch": 0.6735012872379551, "grad_norm": 1.3748294296289862, "learning_rate": 2.544545865450724e-06, "loss": 0.6645, "step": 21975 }, { "epoch": 0.6735319357606964, "grad_norm": 1.4566354806276955, "learning_rate": 2.544113530923201e-06, "loss": 0.6103, "step": 21976 }, { "epoch": 0.6735625842834375, "grad_norm": 1.3027385698625848, "learning_rate": 2.543681220594155e-06, "loss": 0.6324, "step": 21977 }, { "epoch": 0.6735932328061788, "grad_norm": 1.2478362177776663, "learning_rate": 2.5432489344678467e-06, "loss": 0.6306, "step": 21978 }, { "epoch": 0.6736238813289199, "grad_norm": 1.3379030561994614, "learning_rate": 2.5428166725485372e-06, "loss": 0.529, "step": 21979 }, { "epoch": 0.6736545298516612, "grad_norm": 1.285956454632935, "learning_rate": 2.5423844348404812e-06, "loss": 0.6843, "step": 21980 }, { "epoch": 0.6736851783744023, "grad_norm": 1.2357816308405267, "learning_rate": 2.54195222134794e-06, "loss": 0.5128, "step": 21981 }, { "epoch": 0.6737158268971436, "grad_norm": 1.2684876856586271, "learning_rate": 2.5415200320751754e-06, "loss": 0.5099, "step": 21982 }, { "epoch": 0.6737464754198847, "grad_norm": 1.2829029909507819, "learning_rate": 2.54108786702644e-06, "loss": 0.6747, "step": 21983 }, { "epoch": 0.673777123942626, "grad_norm": 1.3915250923300952, "learning_rate": 2.5406557262059973e-06, "loss": 0.5672, "step": 21984 }, { "epoch": 0.6738077724653672, "grad_norm": 1.5538992479959466, "learning_rate": 2.540223609618101e-06, "loss": 0.7475, "step": 21985 }, { "epoch": 0.6738384209881084, "grad_norm": 1.3020129614953635, "learning_rate": 2.5397915172670105e-06, "loss": 0.651, "step": 21986 }, { "epoch": 0.6738690695108496, "grad_norm": 1.3455509549818967, "learning_rate": 2.539359449156986e-06, "loss": 0.6348, "step": 21987 }, { "epoch": 0.6738997180335908, "grad_norm": 0.4469840849331393, "learning_rate": 2.5389274052922807e-06, "loss": 0.3978, "step": 21988 }, { "epoch": 0.673930366556332, "grad_norm": 0.4359832976749309, "learning_rate": 2.5384953856771533e-06, "loss": 0.3818, "step": 21989 }, { "epoch": 0.6739610150790731, "grad_norm": 1.3768786494674607, "learning_rate": 2.5380633903158623e-06, "loss": 0.6445, "step": 21990 }, { "epoch": 0.6739916636018144, "grad_norm": 1.582350579776557, "learning_rate": 2.537631419212661e-06, "loss": 0.5864, "step": 21991 }, { "epoch": 0.6740223121245555, "grad_norm": 1.1833297544445651, "learning_rate": 2.5371994723718075e-06, "loss": 0.5139, "step": 21992 }, { "epoch": 0.6740529606472968, "grad_norm": 1.1576176268075715, "learning_rate": 2.536767549797559e-06, "loss": 0.5674, "step": 21993 }, { "epoch": 0.674083609170038, "grad_norm": 1.2390385753796214, "learning_rate": 2.5363356514941684e-06, "loss": 0.5408, "step": 21994 }, { "epoch": 0.6741142576927792, "grad_norm": 1.2047281298720822, "learning_rate": 2.535903777465895e-06, "loss": 0.5464, "step": 21995 }, { "epoch": 0.6741449062155204, "grad_norm": 1.4253926236614263, "learning_rate": 2.5354719277169906e-06, "loss": 0.6457, "step": 21996 }, { "epoch": 0.6741755547382616, "grad_norm": 1.1584120197705994, "learning_rate": 2.5350401022517114e-06, "loss": 0.5775, "step": 21997 }, { "epoch": 0.6742062032610028, "grad_norm": 1.2532382285164982, "learning_rate": 2.534608301074315e-06, "loss": 0.5839, "step": 21998 }, { "epoch": 0.674236851783744, "grad_norm": 1.2933314530509104, "learning_rate": 2.5341765241890516e-06, "loss": 0.6497, "step": 21999 }, { "epoch": 0.6742675003064852, "grad_norm": 1.4589406416564945, "learning_rate": 2.5337447716001773e-06, "loss": 0.6387, "step": 22000 }, { "epoch": 0.6742981488292265, "grad_norm": 0.4306596093406133, "learning_rate": 2.5333130433119495e-06, "loss": 0.3885, "step": 22001 }, { "epoch": 0.6743287973519676, "grad_norm": 1.2557325737731306, "learning_rate": 2.532881339328617e-06, "loss": 0.6236, "step": 22002 }, { "epoch": 0.6743594458747089, "grad_norm": 1.2317016088117418, "learning_rate": 2.5324496596544383e-06, "loss": 0.5767, "step": 22003 }, { "epoch": 0.67439009439745, "grad_norm": 1.2721683515584514, "learning_rate": 2.5320180042936627e-06, "loss": 0.5642, "step": 22004 }, { "epoch": 0.6744207429201913, "grad_norm": 1.4006117758021417, "learning_rate": 2.531586373250544e-06, "loss": 0.6905, "step": 22005 }, { "epoch": 0.6744513914429324, "grad_norm": 1.2980803516964774, "learning_rate": 2.5311547665293397e-06, "loss": 0.6546, "step": 22006 }, { "epoch": 0.6744820399656737, "grad_norm": 1.249724707233273, "learning_rate": 2.5307231841342962e-06, "loss": 0.6011, "step": 22007 }, { "epoch": 0.6745126884884148, "grad_norm": 1.5140348764493063, "learning_rate": 2.5302916260696698e-06, "loss": 0.6107, "step": 22008 }, { "epoch": 0.6745433370111561, "grad_norm": 0.4350739684635267, "learning_rate": 2.5298600923397133e-06, "loss": 0.3874, "step": 22009 }, { "epoch": 0.6745739855338972, "grad_norm": 1.2649177121632194, "learning_rate": 2.529428582948675e-06, "loss": 0.5877, "step": 22010 }, { "epoch": 0.6746046340566385, "grad_norm": 1.3379727426651327, "learning_rate": 2.52899709790081e-06, "loss": 0.6199, "step": 22011 }, { "epoch": 0.6746352825793797, "grad_norm": 1.2813538735137262, "learning_rate": 2.52856563720037e-06, "loss": 0.594, "step": 22012 }, { "epoch": 0.6746659311021209, "grad_norm": 1.1150637375490675, "learning_rate": 2.5281342008516035e-06, "loss": 0.6222, "step": 22013 }, { "epoch": 0.6746965796248621, "grad_norm": 1.3586938837663736, "learning_rate": 2.527702788858765e-06, "loss": 0.6619, "step": 22014 }, { "epoch": 0.6747272281476033, "grad_norm": 1.1809037078740203, "learning_rate": 2.5272714012260996e-06, "loss": 0.635, "step": 22015 }, { "epoch": 0.6747578766703445, "grad_norm": 1.417480096463802, "learning_rate": 2.5268400379578663e-06, "loss": 0.5998, "step": 22016 }, { "epoch": 0.6747885251930857, "grad_norm": 1.138788709641876, "learning_rate": 2.5264086990583097e-06, "loss": 0.5898, "step": 22017 }, { "epoch": 0.6748191737158269, "grad_norm": 1.2927185350330166, "learning_rate": 2.5259773845316798e-06, "loss": 0.6644, "step": 22018 }, { "epoch": 0.6748498222385682, "grad_norm": 1.5600621779483712, "learning_rate": 2.5255460943822273e-06, "loss": 0.5891, "step": 22019 }, { "epoch": 0.6748804707613093, "grad_norm": 1.3977424916690884, "learning_rate": 2.5251148286142045e-06, "loss": 0.6424, "step": 22020 }, { "epoch": 0.6749111192840505, "grad_norm": 1.5174172197029538, "learning_rate": 2.524683587231857e-06, "loss": 0.6349, "step": 22021 }, { "epoch": 0.6749417678067917, "grad_norm": 1.4045135814101843, "learning_rate": 2.524252370239435e-06, "loss": 0.6407, "step": 22022 }, { "epoch": 0.6749724163295329, "grad_norm": 1.3607280980975809, "learning_rate": 2.523821177641188e-06, "loss": 0.5817, "step": 22023 }, { "epoch": 0.6750030648522741, "grad_norm": 1.2540580909490326, "learning_rate": 2.5233900094413668e-06, "loss": 0.587, "step": 22024 }, { "epoch": 0.6750337133750153, "grad_norm": 1.3408618176215894, "learning_rate": 2.522958865644217e-06, "loss": 0.6955, "step": 22025 }, { "epoch": 0.6750643618977565, "grad_norm": 1.3934270755041658, "learning_rate": 2.5225277462539833e-06, "loss": 0.6022, "step": 22026 }, { "epoch": 0.6750950104204977, "grad_norm": 1.4913651284582128, "learning_rate": 2.5220966512749213e-06, "loss": 0.6293, "step": 22027 }, { "epoch": 0.675125658943239, "grad_norm": 0.4539125192319458, "learning_rate": 2.5216655807112756e-06, "loss": 0.3891, "step": 22028 }, { "epoch": 0.6751563074659801, "grad_norm": 1.3190968403379033, "learning_rate": 2.521234534567291e-06, "loss": 0.6818, "step": 22029 }, { "epoch": 0.6751869559887214, "grad_norm": 1.4159990012108827, "learning_rate": 2.5208035128472164e-06, "loss": 0.653, "step": 22030 }, { "epoch": 0.6752176045114625, "grad_norm": 1.297926584840214, "learning_rate": 2.5203725155553012e-06, "loss": 0.5686, "step": 22031 }, { "epoch": 0.6752482530342038, "grad_norm": 1.183325871725703, "learning_rate": 2.519941542695788e-06, "loss": 0.6783, "step": 22032 }, { "epoch": 0.6752789015569449, "grad_norm": 1.1695993414574557, "learning_rate": 2.5195105942729257e-06, "loss": 0.5864, "step": 22033 }, { "epoch": 0.6753095500796862, "grad_norm": 1.3967715151148592, "learning_rate": 2.51907967029096e-06, "loss": 0.6785, "step": 22034 }, { "epoch": 0.6753401986024273, "grad_norm": 1.2875775097120952, "learning_rate": 2.5186487707541384e-06, "loss": 0.548, "step": 22035 }, { "epoch": 0.6753708471251686, "grad_norm": 0.44445159018632335, "learning_rate": 2.5182178956667057e-06, "loss": 0.3864, "step": 22036 }, { "epoch": 0.6754014956479097, "grad_norm": 1.5049762027860503, "learning_rate": 2.517787045032904e-06, "loss": 0.6412, "step": 22037 }, { "epoch": 0.675432144170651, "grad_norm": 1.4233235215510256, "learning_rate": 2.5173562188569843e-06, "loss": 0.6656, "step": 22038 }, { "epoch": 0.6754627926933922, "grad_norm": 1.060437088057069, "learning_rate": 2.5169254171431903e-06, "loss": 0.5812, "step": 22039 }, { "epoch": 0.6754934412161334, "grad_norm": 1.327335652858738, "learning_rate": 2.5164946398957624e-06, "loss": 0.5953, "step": 22040 }, { "epoch": 0.6755240897388746, "grad_norm": 1.2711245874184691, "learning_rate": 2.516063887118949e-06, "loss": 0.5819, "step": 22041 }, { "epoch": 0.6755547382616158, "grad_norm": 1.3812329059199087, "learning_rate": 2.5156331588169937e-06, "loss": 0.7721, "step": 22042 }, { "epoch": 0.675585386784357, "grad_norm": 1.3100811057895214, "learning_rate": 2.5152024549941417e-06, "loss": 0.6366, "step": 22043 }, { "epoch": 0.6756160353070982, "grad_norm": 1.2413505932965092, "learning_rate": 2.5147717756546343e-06, "loss": 0.6144, "step": 22044 }, { "epoch": 0.6756466838298394, "grad_norm": 0.4508515341654198, "learning_rate": 2.5143411208027165e-06, "loss": 0.3795, "step": 22045 }, { "epoch": 0.6756773323525807, "grad_norm": 1.2762476450633522, "learning_rate": 2.513910490442633e-06, "loss": 0.5905, "step": 22046 }, { "epoch": 0.6757079808753218, "grad_norm": 1.2736801813025775, "learning_rate": 2.513479884578626e-06, "loss": 0.5157, "step": 22047 }, { "epoch": 0.6757386293980631, "grad_norm": 1.2128109235480105, "learning_rate": 2.513049303214936e-06, "loss": 0.6287, "step": 22048 }, { "epoch": 0.6757692779208042, "grad_norm": 1.483122267495964, "learning_rate": 2.512618746355807e-06, "loss": 0.6428, "step": 22049 }, { "epoch": 0.6757999264435455, "grad_norm": 1.1925902785512865, "learning_rate": 2.5121882140054834e-06, "loss": 0.5524, "step": 22050 }, { "epoch": 0.6758305749662866, "grad_norm": 1.1415909244817528, "learning_rate": 2.5117577061682063e-06, "loss": 0.5915, "step": 22051 }, { "epoch": 0.6758612234890278, "grad_norm": 0.44078675963426633, "learning_rate": 2.5113272228482157e-06, "loss": 0.3854, "step": 22052 }, { "epoch": 0.675891872011769, "grad_norm": 1.350490729596925, "learning_rate": 2.5108967640497544e-06, "loss": 0.6999, "step": 22053 }, { "epoch": 0.6759225205345102, "grad_norm": 1.4104637763217813, "learning_rate": 2.5104663297770664e-06, "loss": 0.7772, "step": 22054 }, { "epoch": 0.6759531690572514, "grad_norm": 1.329566743773586, "learning_rate": 2.5100359200343903e-06, "loss": 0.673, "step": 22055 }, { "epoch": 0.6759838175799926, "grad_norm": 1.2964780598052006, "learning_rate": 2.509605534825964e-06, "loss": 0.6187, "step": 22056 }, { "epoch": 0.6760144661027339, "grad_norm": 1.4324492367708734, "learning_rate": 2.5091751741560353e-06, "loss": 0.6658, "step": 22057 }, { "epoch": 0.676045114625475, "grad_norm": 1.2558669925473513, "learning_rate": 2.508744838028841e-06, "loss": 0.5453, "step": 22058 }, { "epoch": 0.6760757631482163, "grad_norm": 1.4281323029488502, "learning_rate": 2.5083145264486193e-06, "loss": 0.6303, "step": 22059 }, { "epoch": 0.6761064116709574, "grad_norm": 1.2665074898718336, "learning_rate": 2.507884239419612e-06, "loss": 0.5992, "step": 22060 }, { "epoch": 0.6761370601936987, "grad_norm": 1.2482062246923535, "learning_rate": 2.5074539769460588e-06, "loss": 0.6104, "step": 22061 }, { "epoch": 0.6761677087164398, "grad_norm": 1.3192127021040922, "learning_rate": 2.507023739032201e-06, "loss": 0.5866, "step": 22062 }, { "epoch": 0.6761983572391811, "grad_norm": 1.4475612772130808, "learning_rate": 2.506593525682275e-06, "loss": 0.7208, "step": 22063 }, { "epoch": 0.6762290057619222, "grad_norm": 1.3195662980193594, "learning_rate": 2.50616333690052e-06, "loss": 0.6059, "step": 22064 }, { "epoch": 0.6762596542846635, "grad_norm": 1.356711847345768, "learning_rate": 2.505733172691178e-06, "loss": 0.6134, "step": 22065 }, { "epoch": 0.6762903028074047, "grad_norm": 1.3129073431566336, "learning_rate": 2.5053030330584858e-06, "loss": 0.6608, "step": 22066 }, { "epoch": 0.6763209513301459, "grad_norm": 1.422672372478968, "learning_rate": 2.5048729180066765e-06, "loss": 0.5862, "step": 22067 }, { "epoch": 0.6763515998528871, "grad_norm": 1.2616146069818683, "learning_rate": 2.5044428275399968e-06, "loss": 0.5919, "step": 22068 }, { "epoch": 0.6763822483756283, "grad_norm": 0.4729297315410419, "learning_rate": 2.5040127616626784e-06, "loss": 0.3892, "step": 22069 }, { "epoch": 0.6764128968983695, "grad_norm": 1.5443282898027613, "learning_rate": 2.503582720378964e-06, "loss": 0.6304, "step": 22070 }, { "epoch": 0.6764435454211107, "grad_norm": 5.288515194731718, "learning_rate": 2.503152703693085e-06, "loss": 0.7204, "step": 22071 }, { "epoch": 0.6764741939438519, "grad_norm": 1.2753230383485283, "learning_rate": 2.5027227116092806e-06, "loss": 0.6354, "step": 22072 }, { "epoch": 0.6765048424665931, "grad_norm": 0.45127862776199823, "learning_rate": 2.5022927441317912e-06, "loss": 0.4047, "step": 22073 }, { "epoch": 0.6765354909893343, "grad_norm": 1.2093146911197779, "learning_rate": 2.501862801264848e-06, "loss": 0.5975, "step": 22074 }, { "epoch": 0.6765661395120756, "grad_norm": 1.3015372421129277, "learning_rate": 2.50143288301269e-06, "loss": 0.6863, "step": 22075 }, { "epoch": 0.6765967880348167, "grad_norm": 1.440411144907704, "learning_rate": 2.5010029893795546e-06, "loss": 0.6606, "step": 22076 }, { "epoch": 0.676627436557558, "grad_norm": 0.4739569710999082, "learning_rate": 2.500573120369675e-06, "loss": 0.4082, "step": 22077 }, { "epoch": 0.6766580850802991, "grad_norm": 1.2750087990008225, "learning_rate": 2.5001432759872867e-06, "loss": 0.5771, "step": 22078 }, { "epoch": 0.6766887336030404, "grad_norm": 1.3046272514423858, "learning_rate": 2.4997134562366293e-06, "loss": 0.6657, "step": 22079 }, { "epoch": 0.6767193821257815, "grad_norm": 1.1698478734605096, "learning_rate": 2.499283661121933e-06, "loss": 0.5742, "step": 22080 }, { "epoch": 0.6767500306485228, "grad_norm": 1.1703471298187307, "learning_rate": 2.4988538906474357e-06, "loss": 0.575, "step": 22081 }, { "epoch": 0.6767806791712639, "grad_norm": 0.4295489914220186, "learning_rate": 2.4984241448173703e-06, "loss": 0.3865, "step": 22082 }, { "epoch": 0.6768113276940051, "grad_norm": 0.45954077943255645, "learning_rate": 2.497994423635971e-06, "loss": 0.396, "step": 22083 }, { "epoch": 0.6768419762167464, "grad_norm": 1.2457518046690839, "learning_rate": 2.497564727107475e-06, "loss": 0.5762, "step": 22084 }, { "epoch": 0.6768726247394875, "grad_norm": 1.3781548450206516, "learning_rate": 2.4971350552361117e-06, "loss": 0.685, "step": 22085 }, { "epoch": 0.6769032732622288, "grad_norm": 0.4610717856646723, "learning_rate": 2.4967054080261173e-06, "loss": 0.3785, "step": 22086 }, { "epoch": 0.6769339217849699, "grad_norm": 0.4221871299115458, "learning_rate": 2.496275785481727e-06, "loss": 0.3941, "step": 22087 }, { "epoch": 0.6769645703077112, "grad_norm": 1.6549661646444118, "learning_rate": 2.4958461876071704e-06, "loss": 0.6216, "step": 22088 }, { "epoch": 0.6769952188304523, "grad_norm": 1.3604135689965966, "learning_rate": 2.4954166144066815e-06, "loss": 0.6265, "step": 22089 }, { "epoch": 0.6770258673531936, "grad_norm": 1.3040754593825103, "learning_rate": 2.4949870658844953e-06, "loss": 0.6383, "step": 22090 }, { "epoch": 0.6770565158759347, "grad_norm": 0.4258167528376135, "learning_rate": 2.4945575420448417e-06, "loss": 0.3745, "step": 22091 }, { "epoch": 0.677087164398676, "grad_norm": 1.5788135906351775, "learning_rate": 2.4941280428919543e-06, "loss": 0.6027, "step": 22092 }, { "epoch": 0.6771178129214172, "grad_norm": 1.2754678027462418, "learning_rate": 2.493698568430063e-06, "loss": 0.6496, "step": 22093 }, { "epoch": 0.6771484614441584, "grad_norm": 1.218065045925474, "learning_rate": 2.493269118663401e-06, "loss": 0.6377, "step": 22094 }, { "epoch": 0.6771791099668996, "grad_norm": 1.2049556059390338, "learning_rate": 2.492839693596202e-06, "loss": 0.5339, "step": 22095 }, { "epoch": 0.6772097584896408, "grad_norm": 1.1820635585614987, "learning_rate": 2.4924102932326927e-06, "loss": 0.6663, "step": 22096 }, { "epoch": 0.677240407012382, "grad_norm": 1.3417988914791752, "learning_rate": 2.4919809175771066e-06, "loss": 0.6877, "step": 22097 }, { "epoch": 0.6772710555351232, "grad_norm": 1.3550233939481904, "learning_rate": 2.4915515666336758e-06, "loss": 0.6531, "step": 22098 }, { "epoch": 0.6773017040578644, "grad_norm": 1.3068693176024768, "learning_rate": 2.4911222404066274e-06, "loss": 0.5441, "step": 22099 }, { "epoch": 0.6773323525806056, "grad_norm": 1.3183304536033729, "learning_rate": 2.4906929389001954e-06, "loss": 0.6725, "step": 22100 }, { "epoch": 0.6773630011033468, "grad_norm": 1.2415341286228188, "learning_rate": 2.490263662118606e-06, "loss": 0.5772, "step": 22101 }, { "epoch": 0.6773936496260881, "grad_norm": 1.364435801033927, "learning_rate": 2.489834410066091e-06, "loss": 0.6609, "step": 22102 }, { "epoch": 0.6774242981488292, "grad_norm": 1.3690913616660032, "learning_rate": 2.4894051827468817e-06, "loss": 0.6783, "step": 22103 }, { "epoch": 0.6774549466715705, "grad_norm": 1.3440744446529878, "learning_rate": 2.4889759801652035e-06, "loss": 0.6946, "step": 22104 }, { "epoch": 0.6774855951943116, "grad_norm": 1.3781179679214313, "learning_rate": 2.488546802325288e-06, "loss": 0.6458, "step": 22105 }, { "epoch": 0.6775162437170529, "grad_norm": 1.2245527624952257, "learning_rate": 2.488117649231365e-06, "loss": 0.5369, "step": 22106 }, { "epoch": 0.677546892239794, "grad_norm": 1.2788277658219394, "learning_rate": 2.487688520887659e-06, "loss": 0.6023, "step": 22107 }, { "epoch": 0.6775775407625353, "grad_norm": 1.3565251997823662, "learning_rate": 2.487259417298401e-06, "loss": 0.6745, "step": 22108 }, { "epoch": 0.6776081892852764, "grad_norm": 0.4640493329251016, "learning_rate": 2.4868303384678216e-06, "loss": 0.4118, "step": 22109 }, { "epoch": 0.6776388378080177, "grad_norm": 0.4491601616010447, "learning_rate": 2.4864012844001435e-06, "loss": 0.3853, "step": 22110 }, { "epoch": 0.6776694863307589, "grad_norm": 1.138871660983313, "learning_rate": 2.4859722550995978e-06, "loss": 0.5318, "step": 22111 }, { "epoch": 0.6777001348535001, "grad_norm": 1.3709908786426115, "learning_rate": 2.4855432505704095e-06, "loss": 0.5372, "step": 22112 }, { "epoch": 0.6777307833762413, "grad_norm": 1.4369718653680137, "learning_rate": 2.4851142708168075e-06, "loss": 0.7818, "step": 22113 }, { "epoch": 0.6777614318989824, "grad_norm": 1.2320691167694195, "learning_rate": 2.484685315843019e-06, "loss": 0.5404, "step": 22114 }, { "epoch": 0.6777920804217237, "grad_norm": 1.300141384792499, "learning_rate": 2.484256385653268e-06, "loss": 0.6417, "step": 22115 }, { "epoch": 0.6778227289444648, "grad_norm": 1.2415575186396297, "learning_rate": 2.483827480251783e-06, "loss": 0.5408, "step": 22116 }, { "epoch": 0.6778533774672061, "grad_norm": 1.5159467266420956, "learning_rate": 2.483398599642791e-06, "loss": 0.6063, "step": 22117 }, { "epoch": 0.6778840259899472, "grad_norm": 1.5332992981368563, "learning_rate": 2.482969743830515e-06, "loss": 0.6634, "step": 22118 }, { "epoch": 0.6779146745126885, "grad_norm": 1.2544865202587163, "learning_rate": 2.4825409128191818e-06, "loss": 0.5821, "step": 22119 }, { "epoch": 0.6779453230354296, "grad_norm": 0.4682166062757854, "learning_rate": 2.482112106613019e-06, "loss": 0.413, "step": 22120 }, { "epoch": 0.6779759715581709, "grad_norm": 1.2291997546395175, "learning_rate": 2.4816833252162482e-06, "loss": 0.5882, "step": 22121 }, { "epoch": 0.6780066200809121, "grad_norm": 1.293185639678265, "learning_rate": 2.4812545686330976e-06, "loss": 0.5687, "step": 22122 }, { "epoch": 0.6780372686036533, "grad_norm": 1.2938554501766069, "learning_rate": 2.4808258368677863e-06, "loss": 0.6184, "step": 22123 }, { "epoch": 0.6780679171263945, "grad_norm": 0.4537225751291482, "learning_rate": 2.4803971299245467e-06, "loss": 0.4337, "step": 22124 }, { "epoch": 0.6780985656491357, "grad_norm": 1.4773032927509755, "learning_rate": 2.4799684478075985e-06, "loss": 0.7115, "step": 22125 }, { "epoch": 0.6781292141718769, "grad_norm": 1.31626923150827, "learning_rate": 2.4795397905211644e-06, "loss": 0.574, "step": 22126 }, { "epoch": 0.6781598626946181, "grad_norm": 1.164625909635632, "learning_rate": 2.479111158069469e-06, "loss": 0.6495, "step": 22127 }, { "epoch": 0.6781905112173593, "grad_norm": 1.395047928231093, "learning_rate": 2.478682550456739e-06, "loss": 0.5638, "step": 22128 }, { "epoch": 0.6782211597401006, "grad_norm": 1.301491565859847, "learning_rate": 2.4782539676871932e-06, "loss": 0.6171, "step": 22129 }, { "epoch": 0.6782518082628417, "grad_norm": 1.45032903330607, "learning_rate": 2.477825409765056e-06, "loss": 0.6624, "step": 22130 }, { "epoch": 0.678282456785583, "grad_norm": 1.2962850286663135, "learning_rate": 2.4773968766945516e-06, "loss": 0.5179, "step": 22131 }, { "epoch": 0.6783131053083241, "grad_norm": 1.2886290749708489, "learning_rate": 2.4769683684799003e-06, "loss": 0.6539, "step": 22132 }, { "epoch": 0.6783437538310654, "grad_norm": 1.3959288262292924, "learning_rate": 2.4765398851253262e-06, "loss": 0.662, "step": 22133 }, { "epoch": 0.6783744023538065, "grad_norm": 1.5059118009657064, "learning_rate": 2.4761114266350473e-06, "loss": 0.6119, "step": 22134 }, { "epoch": 0.6784050508765478, "grad_norm": 0.43245094210698637, "learning_rate": 2.4756829930132915e-06, "loss": 0.3999, "step": 22135 }, { "epoch": 0.6784356993992889, "grad_norm": 1.220758386382522, "learning_rate": 2.475254584264277e-06, "loss": 0.5902, "step": 22136 }, { "epoch": 0.6784663479220302, "grad_norm": 1.4709599557519402, "learning_rate": 2.4748262003922234e-06, "loss": 0.5786, "step": 22137 }, { "epoch": 0.6784969964447713, "grad_norm": 1.2921909811064027, "learning_rate": 2.4743978414013535e-06, "loss": 0.6161, "step": 22138 }, { "epoch": 0.6785276449675126, "grad_norm": 1.237402256730061, "learning_rate": 2.4739695072958898e-06, "loss": 0.6656, "step": 22139 }, { "epoch": 0.6785582934902538, "grad_norm": 1.3540994630916705, "learning_rate": 2.4735411980800483e-06, "loss": 0.5821, "step": 22140 }, { "epoch": 0.678588942012995, "grad_norm": 1.3019223765406858, "learning_rate": 2.4731129137580524e-06, "loss": 0.6437, "step": 22141 }, { "epoch": 0.6786195905357362, "grad_norm": 1.2731856205643182, "learning_rate": 2.4726846543341215e-06, "loss": 0.5672, "step": 22142 }, { "epoch": 0.6786502390584774, "grad_norm": 1.3949918349972221, "learning_rate": 2.472256419812477e-06, "loss": 0.5389, "step": 22143 }, { "epoch": 0.6786808875812186, "grad_norm": 1.268130015630277, "learning_rate": 2.471828210197337e-06, "loss": 0.5489, "step": 22144 }, { "epoch": 0.6787115361039597, "grad_norm": 1.3823072996443444, "learning_rate": 2.4714000254929183e-06, "loss": 0.5647, "step": 22145 }, { "epoch": 0.678742184626701, "grad_norm": 1.4190976111141766, "learning_rate": 2.470971865703442e-06, "loss": 0.6156, "step": 22146 }, { "epoch": 0.6787728331494421, "grad_norm": 1.3172786050701146, "learning_rate": 2.4705437308331292e-06, "loss": 0.5776, "step": 22147 }, { "epoch": 0.6788034816721834, "grad_norm": 1.1215425153460246, "learning_rate": 2.4701156208861944e-06, "loss": 0.5873, "step": 22148 }, { "epoch": 0.6788341301949246, "grad_norm": 1.3331121553477818, "learning_rate": 2.469687535866858e-06, "loss": 0.6363, "step": 22149 }, { "epoch": 0.6788647787176658, "grad_norm": 1.406681101623021, "learning_rate": 2.469259475779337e-06, "loss": 0.5658, "step": 22150 }, { "epoch": 0.678895427240407, "grad_norm": 0.44390635892873415, "learning_rate": 2.468831440627852e-06, "loss": 0.3873, "step": 22151 }, { "epoch": 0.6789260757631482, "grad_norm": 1.3944918396764814, "learning_rate": 2.468403430416618e-06, "loss": 0.7413, "step": 22152 }, { "epoch": 0.6789567242858894, "grad_norm": 1.4295853161627325, "learning_rate": 2.46797544514985e-06, "loss": 0.6307, "step": 22153 }, { "epoch": 0.6789873728086306, "grad_norm": 1.3676444645491919, "learning_rate": 2.467547484831771e-06, "loss": 0.6142, "step": 22154 }, { "epoch": 0.6790180213313718, "grad_norm": 1.2837398887063498, "learning_rate": 2.4671195494665946e-06, "loss": 0.6825, "step": 22155 }, { "epoch": 0.679048669854113, "grad_norm": 1.3423137751018877, "learning_rate": 2.4666916390585354e-06, "loss": 0.5564, "step": 22156 }, { "epoch": 0.6790793183768542, "grad_norm": 0.44116693929614154, "learning_rate": 2.4662637536118116e-06, "loss": 0.4035, "step": 22157 }, { "epoch": 0.6791099668995955, "grad_norm": 1.4493001106072818, "learning_rate": 2.4658358931306415e-06, "loss": 0.6008, "step": 22158 }, { "epoch": 0.6791406154223366, "grad_norm": 1.2441078601660716, "learning_rate": 2.465408057619237e-06, "loss": 0.5864, "step": 22159 }, { "epoch": 0.6791712639450779, "grad_norm": 1.3413879610615476, "learning_rate": 2.4649802470818146e-06, "loss": 0.4982, "step": 22160 }, { "epoch": 0.679201912467819, "grad_norm": 1.3238238688823483, "learning_rate": 2.464552461522591e-06, "loss": 0.5749, "step": 22161 }, { "epoch": 0.6792325609905603, "grad_norm": 1.4066320943752024, "learning_rate": 2.4641247009457827e-06, "loss": 0.6965, "step": 22162 }, { "epoch": 0.6792632095133014, "grad_norm": 1.587069827867328, "learning_rate": 2.463696965355602e-06, "loss": 0.6247, "step": 22163 }, { "epoch": 0.6792938580360427, "grad_norm": 1.3852497253088236, "learning_rate": 2.463269254756261e-06, "loss": 0.6698, "step": 22164 }, { "epoch": 0.6793245065587838, "grad_norm": 1.2695402272485876, "learning_rate": 2.4628415691519804e-06, "loss": 0.6382, "step": 22165 }, { "epoch": 0.6793551550815251, "grad_norm": 1.334029997453054, "learning_rate": 2.462413908546971e-06, "loss": 0.654, "step": 22166 }, { "epoch": 0.6793858036042663, "grad_norm": 1.398030058592925, "learning_rate": 2.4619862729454447e-06, "loss": 0.7338, "step": 22167 }, { "epoch": 0.6794164521270075, "grad_norm": 1.3418569074853985, "learning_rate": 2.4615586623516174e-06, "loss": 0.6448, "step": 22168 }, { "epoch": 0.6794471006497487, "grad_norm": 0.46841531733105785, "learning_rate": 2.4611310767697015e-06, "loss": 0.4215, "step": 22169 }, { "epoch": 0.6794777491724899, "grad_norm": 1.322520503492233, "learning_rate": 2.4607035162039135e-06, "loss": 0.6533, "step": 22170 }, { "epoch": 0.6795083976952311, "grad_norm": 1.204645262335385, "learning_rate": 2.460275980658461e-06, "loss": 0.5112, "step": 22171 }, { "epoch": 0.6795390462179723, "grad_norm": 1.4703594855152904, "learning_rate": 2.459848470137559e-06, "loss": 0.6259, "step": 22172 }, { "epoch": 0.6795696947407135, "grad_norm": 0.45102363912392945, "learning_rate": 2.459420984645422e-06, "loss": 0.4085, "step": 22173 }, { "epoch": 0.6796003432634548, "grad_norm": 1.4161528941087127, "learning_rate": 2.45899352418626e-06, "loss": 0.6361, "step": 22174 }, { "epoch": 0.6796309917861959, "grad_norm": 1.3469296656072862, "learning_rate": 2.458566088764281e-06, "loss": 0.5819, "step": 22175 }, { "epoch": 0.679661640308937, "grad_norm": 1.3148207818547755, "learning_rate": 2.458138678383705e-06, "loss": 0.5764, "step": 22176 }, { "epoch": 0.6796922888316783, "grad_norm": 1.529229224515906, "learning_rate": 2.457711293048736e-06, "loss": 0.6548, "step": 22177 }, { "epoch": 0.6797229373544195, "grad_norm": 1.215238583275481, "learning_rate": 2.4572839327635904e-06, "loss": 0.5249, "step": 22178 }, { "epoch": 0.6797535858771607, "grad_norm": 1.2641343560403253, "learning_rate": 2.4568565975324755e-06, "loss": 0.6108, "step": 22179 }, { "epoch": 0.6797842343999019, "grad_norm": 1.3014925403940731, "learning_rate": 2.456429287359603e-06, "loss": 0.5783, "step": 22180 }, { "epoch": 0.6798148829226431, "grad_norm": 1.338418854519605, "learning_rate": 2.456002002249185e-06, "loss": 0.6463, "step": 22181 }, { "epoch": 0.6798455314453843, "grad_norm": 0.45159731285492866, "learning_rate": 2.4555747422054287e-06, "loss": 0.3996, "step": 22182 }, { "epoch": 0.6798761799681255, "grad_norm": 1.3892445914918474, "learning_rate": 2.4551475072325453e-06, "loss": 0.6555, "step": 22183 }, { "epoch": 0.6799068284908667, "grad_norm": 1.3147608053522097, "learning_rate": 2.454720297334747e-06, "loss": 0.5213, "step": 22184 }, { "epoch": 0.679937477013608, "grad_norm": 1.3204354877731355, "learning_rate": 2.45429311251624e-06, "loss": 0.5786, "step": 22185 }, { "epoch": 0.6799681255363491, "grad_norm": 1.2350905853050225, "learning_rate": 2.453865952781231e-06, "loss": 0.5629, "step": 22186 }, { "epoch": 0.6799987740590904, "grad_norm": 1.4150451474750414, "learning_rate": 2.453438818133936e-06, "loss": 0.6823, "step": 22187 }, { "epoch": 0.6800294225818315, "grad_norm": 1.2792262743859977, "learning_rate": 2.4530117085785576e-06, "loss": 0.6122, "step": 22188 }, { "epoch": 0.6800600711045728, "grad_norm": 1.2533202852479381, "learning_rate": 2.452584624119309e-06, "loss": 0.5647, "step": 22189 }, { "epoch": 0.6800907196273139, "grad_norm": 1.1852791359905221, "learning_rate": 2.4521575647603936e-06, "loss": 0.5433, "step": 22190 }, { "epoch": 0.6801213681500552, "grad_norm": 1.3986378623856912, "learning_rate": 2.451730530506022e-06, "loss": 0.6886, "step": 22191 }, { "epoch": 0.6801520166727963, "grad_norm": 1.5238243177598456, "learning_rate": 2.451303521360403e-06, "loss": 0.5805, "step": 22192 }, { "epoch": 0.6801826651955376, "grad_norm": 1.362683246243826, "learning_rate": 2.4508765373277412e-06, "loss": 0.7152, "step": 22193 }, { "epoch": 0.6802133137182788, "grad_norm": 1.3099139257560783, "learning_rate": 2.450449578412244e-06, "loss": 0.5825, "step": 22194 }, { "epoch": 0.68024396224102, "grad_norm": 1.391593505444455, "learning_rate": 2.4500226446181217e-06, "loss": 0.6769, "step": 22195 }, { "epoch": 0.6802746107637612, "grad_norm": 1.347177208714107, "learning_rate": 2.4495957359495774e-06, "loss": 0.5229, "step": 22196 }, { "epoch": 0.6803052592865024, "grad_norm": 1.2452313991263244, "learning_rate": 2.449168852410821e-06, "loss": 0.6212, "step": 22197 }, { "epoch": 0.6803359078092436, "grad_norm": 1.329069164595499, "learning_rate": 2.4487419940060538e-06, "loss": 0.6607, "step": 22198 }, { "epoch": 0.6803665563319848, "grad_norm": 0.47313778485453595, "learning_rate": 2.448315160739485e-06, "loss": 0.4098, "step": 22199 }, { "epoch": 0.680397204854726, "grad_norm": 1.243458694082663, "learning_rate": 2.447888352615321e-06, "loss": 0.606, "step": 22200 }, { "epoch": 0.6804278533774673, "grad_norm": 1.1133176500447939, "learning_rate": 2.447461569637765e-06, "loss": 0.6427, "step": 22201 }, { "epoch": 0.6804585019002084, "grad_norm": 1.353953991866461, "learning_rate": 2.447034811811023e-06, "loss": 0.6809, "step": 22202 }, { "epoch": 0.6804891504229497, "grad_norm": 1.2570658740855503, "learning_rate": 2.446608079139302e-06, "loss": 0.5757, "step": 22203 }, { "epoch": 0.6805197989456908, "grad_norm": 1.2765186719744006, "learning_rate": 2.446181371626803e-06, "loss": 0.5656, "step": 22204 }, { "epoch": 0.6805504474684321, "grad_norm": 1.246798287837615, "learning_rate": 2.445754689277732e-06, "loss": 0.6365, "step": 22205 }, { "epoch": 0.6805810959911732, "grad_norm": 1.239903555250823, "learning_rate": 2.4453280320962964e-06, "loss": 0.6212, "step": 22206 }, { "epoch": 0.6806117445139144, "grad_norm": 0.4411473921631256, "learning_rate": 2.4449014000866948e-06, "loss": 0.4155, "step": 22207 }, { "epoch": 0.6806423930366556, "grad_norm": 1.3724743866555755, "learning_rate": 2.4444747932531354e-06, "loss": 0.6477, "step": 22208 }, { "epoch": 0.6806730415593968, "grad_norm": 1.3242663633943716, "learning_rate": 2.4440482115998182e-06, "loss": 0.6232, "step": 22209 }, { "epoch": 0.680703690082138, "grad_norm": 1.4909396498117145, "learning_rate": 2.443621655130947e-06, "loss": 0.7111, "step": 22210 }, { "epoch": 0.6807343386048792, "grad_norm": 1.3357943224228437, "learning_rate": 2.4431951238507285e-06, "loss": 0.6495, "step": 22211 }, { "epoch": 0.6807649871276205, "grad_norm": 1.465682008167204, "learning_rate": 2.442768617763361e-06, "loss": 0.5384, "step": 22212 }, { "epoch": 0.6807956356503616, "grad_norm": 1.2639690611742107, "learning_rate": 2.4423421368730477e-06, "loss": 0.6496, "step": 22213 }, { "epoch": 0.6808262841731029, "grad_norm": 0.448952935579749, "learning_rate": 2.441915681183994e-06, "loss": 0.4091, "step": 22214 }, { "epoch": 0.680856932695844, "grad_norm": 1.2325439545913814, "learning_rate": 2.441489250700398e-06, "loss": 0.5293, "step": 22215 }, { "epoch": 0.6808875812185853, "grad_norm": 0.4304871984988083, "learning_rate": 2.4410628454264625e-06, "loss": 0.3805, "step": 22216 }, { "epoch": 0.6809182297413264, "grad_norm": 1.360469113716179, "learning_rate": 2.4406364653663917e-06, "loss": 0.6723, "step": 22217 }, { "epoch": 0.6809488782640677, "grad_norm": 0.4533845418698645, "learning_rate": 2.4402101105243824e-06, "loss": 0.4231, "step": 22218 }, { "epoch": 0.6809795267868088, "grad_norm": 1.3667511746271088, "learning_rate": 2.4397837809046405e-06, "loss": 0.6287, "step": 22219 }, { "epoch": 0.6810101753095501, "grad_norm": 1.198252434036235, "learning_rate": 2.4393574765113616e-06, "loss": 0.5662, "step": 22220 }, { "epoch": 0.6810408238322913, "grad_norm": 1.4605683289481763, "learning_rate": 2.438931197348749e-06, "loss": 0.6273, "step": 22221 }, { "epoch": 0.6810714723550325, "grad_norm": 0.4538293208227035, "learning_rate": 2.438504943421004e-06, "loss": 0.3953, "step": 22222 }, { "epoch": 0.6811021208777737, "grad_norm": 1.4338831815167037, "learning_rate": 2.4380787147323236e-06, "loss": 0.6197, "step": 22223 }, { "epoch": 0.6811327694005149, "grad_norm": 0.4445385658991785, "learning_rate": 2.437652511286909e-06, "loss": 0.4073, "step": 22224 }, { "epoch": 0.6811634179232561, "grad_norm": 1.4171029003181344, "learning_rate": 2.4372263330889616e-06, "loss": 0.7226, "step": 22225 }, { "epoch": 0.6811940664459973, "grad_norm": 1.2345659173005978, "learning_rate": 2.436800180142677e-06, "loss": 0.5756, "step": 22226 }, { "epoch": 0.6812247149687385, "grad_norm": 1.3849108946228113, "learning_rate": 2.4363740524522567e-06, "loss": 0.6355, "step": 22227 }, { "epoch": 0.6812553634914797, "grad_norm": 1.2203189074725505, "learning_rate": 2.4359479500218995e-06, "loss": 0.5868, "step": 22228 }, { "epoch": 0.6812860120142209, "grad_norm": 1.293205403561315, "learning_rate": 2.4355218728558022e-06, "loss": 0.5654, "step": 22229 }, { "epoch": 0.6813166605369622, "grad_norm": 1.2934489947093033, "learning_rate": 2.435095820958166e-06, "loss": 0.575, "step": 22230 }, { "epoch": 0.6813473090597033, "grad_norm": 1.251033953002317, "learning_rate": 2.4346697943331826e-06, "loss": 0.578, "step": 22231 }, { "epoch": 0.6813779575824446, "grad_norm": 1.3070199912583036, "learning_rate": 2.434243792985058e-06, "loss": 0.6948, "step": 22232 }, { "epoch": 0.6814086061051857, "grad_norm": 1.2967188149078341, "learning_rate": 2.433817816917986e-06, "loss": 0.6328, "step": 22233 }, { "epoch": 0.681439254627927, "grad_norm": 0.45901437381336685, "learning_rate": 2.4333918661361616e-06, "loss": 0.4105, "step": 22234 }, { "epoch": 0.6814699031506681, "grad_norm": 0.45211098645262515, "learning_rate": 2.432965940643784e-06, "loss": 0.3779, "step": 22235 }, { "epoch": 0.6815005516734094, "grad_norm": 1.3337062191082623, "learning_rate": 2.432540040445052e-06, "loss": 0.6253, "step": 22236 }, { "epoch": 0.6815312001961505, "grad_norm": 1.385722461702511, "learning_rate": 2.4321141655441573e-06, "loss": 0.5718, "step": 22237 }, { "epoch": 0.6815618487188917, "grad_norm": 1.1665481747541058, "learning_rate": 2.4316883159452985e-06, "loss": 0.5121, "step": 22238 }, { "epoch": 0.681592497241633, "grad_norm": 1.4358361101205688, "learning_rate": 2.4312624916526744e-06, "loss": 0.6553, "step": 22239 }, { "epoch": 0.6816231457643741, "grad_norm": 1.3316177187951017, "learning_rate": 2.4308366926704763e-06, "loss": 0.5634, "step": 22240 }, { "epoch": 0.6816537942871154, "grad_norm": 1.2775737524892457, "learning_rate": 2.4304109190029036e-06, "loss": 0.6055, "step": 22241 }, { "epoch": 0.6816844428098565, "grad_norm": 1.1768123587487878, "learning_rate": 2.4299851706541473e-06, "loss": 0.678, "step": 22242 }, { "epoch": 0.6817150913325978, "grad_norm": 0.453150433463955, "learning_rate": 2.4295594476284044e-06, "loss": 0.4007, "step": 22243 }, { "epoch": 0.6817457398553389, "grad_norm": 0.4561634176473314, "learning_rate": 2.429133749929873e-06, "loss": 0.4023, "step": 22244 }, { "epoch": 0.6817763883780802, "grad_norm": 1.3231502862126383, "learning_rate": 2.4287080775627413e-06, "loss": 0.558, "step": 22245 }, { "epoch": 0.6818070369008213, "grad_norm": 1.145147963790183, "learning_rate": 2.4282824305312075e-06, "loss": 0.5664, "step": 22246 }, { "epoch": 0.6818376854235626, "grad_norm": 1.272398685355973, "learning_rate": 2.4278568088394674e-06, "loss": 0.5829, "step": 22247 }, { "epoch": 0.6818683339463038, "grad_norm": 1.5365176175262036, "learning_rate": 2.4274312124917094e-06, "loss": 0.6027, "step": 22248 }, { "epoch": 0.681898982469045, "grad_norm": 1.2076515981809228, "learning_rate": 2.427005641492132e-06, "loss": 0.566, "step": 22249 }, { "epoch": 0.6819296309917862, "grad_norm": 1.3858428566504253, "learning_rate": 2.4265800958449227e-06, "loss": 0.5993, "step": 22250 }, { "epoch": 0.6819602795145274, "grad_norm": 1.5831861771133644, "learning_rate": 2.426154575554282e-06, "loss": 0.6161, "step": 22251 }, { "epoch": 0.6819909280372686, "grad_norm": 1.3322736713658037, "learning_rate": 2.4257290806243983e-06, "loss": 0.586, "step": 22252 }, { "epoch": 0.6820215765600098, "grad_norm": 1.2063581196499311, "learning_rate": 2.4253036110594634e-06, "loss": 0.5124, "step": 22253 }, { "epoch": 0.682052225082751, "grad_norm": 1.2613451810256977, "learning_rate": 2.4248781668636704e-06, "loss": 0.6728, "step": 22254 }, { "epoch": 0.6820828736054922, "grad_norm": 1.3629184885855226, "learning_rate": 2.424452748041214e-06, "loss": 0.6163, "step": 22255 }, { "epoch": 0.6821135221282334, "grad_norm": 1.2532230264649413, "learning_rate": 2.424027354596281e-06, "loss": 0.786, "step": 22256 }, { "epoch": 0.6821441706509747, "grad_norm": 0.44090172847346404, "learning_rate": 2.4236019865330664e-06, "loss": 0.3977, "step": 22257 }, { "epoch": 0.6821748191737158, "grad_norm": 1.3198518292587544, "learning_rate": 2.4231766438557604e-06, "loss": 0.5844, "step": 22258 }, { "epoch": 0.6822054676964571, "grad_norm": 1.2040536252872471, "learning_rate": 2.4227513265685558e-06, "loss": 0.5277, "step": 22259 }, { "epoch": 0.6822361162191982, "grad_norm": 1.4571283589671582, "learning_rate": 2.4223260346756416e-06, "loss": 0.699, "step": 22260 }, { "epoch": 0.6822667647419395, "grad_norm": 1.2657708921348376, "learning_rate": 2.421900768181205e-06, "loss": 0.6045, "step": 22261 }, { "epoch": 0.6822974132646806, "grad_norm": 1.2248283006803546, "learning_rate": 2.421475527089444e-06, "loss": 0.5761, "step": 22262 }, { "epoch": 0.6823280617874219, "grad_norm": 1.1980400336076205, "learning_rate": 2.421050311404544e-06, "loss": 0.6422, "step": 22263 }, { "epoch": 0.682358710310163, "grad_norm": 1.2941851183495812, "learning_rate": 2.4206251211306935e-06, "loss": 0.62, "step": 22264 }, { "epoch": 0.6823893588329043, "grad_norm": 0.4567892685991721, "learning_rate": 2.4201999562720835e-06, "loss": 0.4158, "step": 22265 }, { "epoch": 0.6824200073556455, "grad_norm": 1.182897197079375, "learning_rate": 2.419774816832905e-06, "loss": 0.5249, "step": 22266 }, { "epoch": 0.6824506558783867, "grad_norm": 1.334566288354557, "learning_rate": 2.4193497028173435e-06, "loss": 0.6655, "step": 22267 }, { "epoch": 0.6824813044011279, "grad_norm": 1.2910575156461468, "learning_rate": 2.4189246142295904e-06, "loss": 0.5823, "step": 22268 }, { "epoch": 0.682511952923869, "grad_norm": 1.2080484107363285, "learning_rate": 2.418499551073833e-06, "loss": 0.64, "step": 22269 }, { "epoch": 0.6825426014466103, "grad_norm": 1.2137763532084576, "learning_rate": 2.4180745133542617e-06, "loss": 0.5743, "step": 22270 }, { "epoch": 0.6825732499693514, "grad_norm": 1.3280935161624234, "learning_rate": 2.4176495010750626e-06, "loss": 0.6202, "step": 22271 }, { "epoch": 0.6826038984920927, "grad_norm": 1.4664404036520948, "learning_rate": 2.4172245142404207e-06, "loss": 0.5201, "step": 22272 }, { "epoch": 0.6826345470148338, "grad_norm": 1.222232664803758, "learning_rate": 2.4167995528545296e-06, "loss": 0.5888, "step": 22273 }, { "epoch": 0.6826651955375751, "grad_norm": 1.2846741658360292, "learning_rate": 2.416374616921574e-06, "loss": 0.602, "step": 22274 }, { "epoch": 0.6826958440603162, "grad_norm": 1.2869054630511838, "learning_rate": 2.415949706445738e-06, "loss": 0.6339, "step": 22275 }, { "epoch": 0.6827264925830575, "grad_norm": 1.4699745275399483, "learning_rate": 2.415524821431211e-06, "loss": 0.6125, "step": 22276 }, { "epoch": 0.6827571411057987, "grad_norm": 1.3993287021838714, "learning_rate": 2.415099961882179e-06, "loss": 0.6071, "step": 22277 }, { "epoch": 0.6827877896285399, "grad_norm": 1.3134833668903632, "learning_rate": 2.4146751278028306e-06, "loss": 0.5747, "step": 22278 }, { "epoch": 0.6828184381512811, "grad_norm": 1.3345927580128552, "learning_rate": 2.4142503191973475e-06, "loss": 0.5097, "step": 22279 }, { "epoch": 0.6828490866740223, "grad_norm": 1.465032700228514, "learning_rate": 2.4138255360699183e-06, "loss": 0.5765, "step": 22280 }, { "epoch": 0.6828797351967635, "grad_norm": 1.1804794819939284, "learning_rate": 2.4134007784247287e-06, "loss": 0.5562, "step": 22281 }, { "epoch": 0.6829103837195047, "grad_norm": 1.3365171991770033, "learning_rate": 2.4129760462659634e-06, "loss": 0.6775, "step": 22282 }, { "epoch": 0.6829410322422459, "grad_norm": 1.2252503171639806, "learning_rate": 2.4125513395978034e-06, "loss": 0.6181, "step": 22283 }, { "epoch": 0.6829716807649872, "grad_norm": 1.356164799716164, "learning_rate": 2.4121266584244407e-06, "loss": 0.6183, "step": 22284 }, { "epoch": 0.6830023292877283, "grad_norm": 1.630342502597369, "learning_rate": 2.411702002750056e-06, "loss": 0.6265, "step": 22285 }, { "epoch": 0.6830329778104696, "grad_norm": 0.4529961296577787, "learning_rate": 2.4112773725788324e-06, "loss": 0.3878, "step": 22286 }, { "epoch": 0.6830636263332107, "grad_norm": 1.318867005888944, "learning_rate": 2.4108527679149548e-06, "loss": 0.6021, "step": 22287 }, { "epoch": 0.683094274855952, "grad_norm": 0.4944181156943623, "learning_rate": 2.4104281887626075e-06, "loss": 0.4073, "step": 22288 }, { "epoch": 0.6831249233786931, "grad_norm": 1.4084999330906287, "learning_rate": 2.4100036351259754e-06, "loss": 0.7425, "step": 22289 }, { "epoch": 0.6831555719014344, "grad_norm": 1.474723650395511, "learning_rate": 2.4095791070092385e-06, "loss": 0.6037, "step": 22290 }, { "epoch": 0.6831862204241755, "grad_norm": 0.47556883307442327, "learning_rate": 2.4091546044165816e-06, "loss": 0.4122, "step": 22291 }, { "epoch": 0.6832168689469168, "grad_norm": 1.4330432443058063, "learning_rate": 2.4087301273521883e-06, "loss": 0.6791, "step": 22292 }, { "epoch": 0.683247517469658, "grad_norm": 1.2591078970414558, "learning_rate": 2.408305675820241e-06, "loss": 0.6196, "step": 22293 }, { "epoch": 0.6832781659923992, "grad_norm": 1.3237360491826735, "learning_rate": 2.407881249824919e-06, "loss": 0.6577, "step": 22294 }, { "epoch": 0.6833088145151404, "grad_norm": 1.2717236403571233, "learning_rate": 2.407456849370406e-06, "loss": 0.6693, "step": 22295 }, { "epoch": 0.6833394630378816, "grad_norm": 0.4618425417268577, "learning_rate": 2.407032474460884e-06, "loss": 0.3979, "step": 22296 }, { "epoch": 0.6833701115606228, "grad_norm": 1.4892878184470566, "learning_rate": 2.406608125100536e-06, "loss": 0.6519, "step": 22297 }, { "epoch": 0.683400760083364, "grad_norm": 1.446977671893896, "learning_rate": 2.4061838012935405e-06, "loss": 0.6541, "step": 22298 }, { "epoch": 0.6834314086061052, "grad_norm": 1.4362626367746643, "learning_rate": 2.40575950304408e-06, "loss": 0.7242, "step": 22299 }, { "epoch": 0.6834620571288463, "grad_norm": 1.4822025028394505, "learning_rate": 2.405335230356336e-06, "loss": 0.5878, "step": 22300 }, { "epoch": 0.6834927056515876, "grad_norm": 1.3061679519530063, "learning_rate": 2.404910983234488e-06, "loss": 0.6303, "step": 22301 }, { "epoch": 0.6835233541743287, "grad_norm": 1.401658795267007, "learning_rate": 2.404486761682712e-06, "loss": 0.592, "step": 22302 }, { "epoch": 0.68355400269707, "grad_norm": 1.348894102178923, "learning_rate": 2.4040625657051965e-06, "loss": 0.572, "step": 22303 }, { "epoch": 0.6835846512198112, "grad_norm": 1.442027254009297, "learning_rate": 2.403638395306114e-06, "loss": 0.5868, "step": 22304 }, { "epoch": 0.6836152997425524, "grad_norm": 1.2901729253176206, "learning_rate": 2.4032142504896494e-06, "loss": 0.6617, "step": 22305 }, { "epoch": 0.6836459482652936, "grad_norm": 1.2407886018019387, "learning_rate": 2.4027901312599773e-06, "loss": 0.5595, "step": 22306 }, { "epoch": 0.6836765967880348, "grad_norm": 1.248199143547177, "learning_rate": 2.4023660376212783e-06, "loss": 0.5762, "step": 22307 }, { "epoch": 0.683707245310776, "grad_norm": 1.3602336182172956, "learning_rate": 2.401941969577733e-06, "loss": 0.5448, "step": 22308 }, { "epoch": 0.6837378938335172, "grad_norm": 1.123691772718014, "learning_rate": 2.4015179271335167e-06, "loss": 0.5336, "step": 22309 }, { "epoch": 0.6837685423562584, "grad_norm": 0.4695863610357435, "learning_rate": 2.4010939102928086e-06, "loss": 0.4023, "step": 22310 }, { "epoch": 0.6837991908789997, "grad_norm": 1.504565119678936, "learning_rate": 2.4006699190597895e-06, "loss": 0.693, "step": 22311 }, { "epoch": 0.6838298394017408, "grad_norm": 1.188306095607123, "learning_rate": 2.400245953438635e-06, "loss": 0.6965, "step": 22312 }, { "epoch": 0.6838604879244821, "grad_norm": 0.4620256949842428, "learning_rate": 2.399822013433518e-06, "loss": 0.3963, "step": 22313 }, { "epoch": 0.6838911364472232, "grad_norm": 1.1465212674691856, "learning_rate": 2.3993980990486238e-06, "loss": 0.568, "step": 22314 }, { "epoch": 0.6839217849699645, "grad_norm": 1.446176900586057, "learning_rate": 2.3989742102881234e-06, "loss": 0.6975, "step": 22315 }, { "epoch": 0.6839524334927056, "grad_norm": 1.3869979569768027, "learning_rate": 2.398550347156198e-06, "loss": 0.4802, "step": 22316 }, { "epoch": 0.6839830820154469, "grad_norm": 1.3445728066052445, "learning_rate": 2.39812650965702e-06, "loss": 0.5513, "step": 22317 }, { "epoch": 0.684013730538188, "grad_norm": 1.310046703939262, "learning_rate": 2.3977026977947666e-06, "loss": 0.6147, "step": 22318 }, { "epoch": 0.6840443790609293, "grad_norm": 0.4438035523897195, "learning_rate": 2.397278911573617e-06, "loss": 0.4167, "step": 22319 }, { "epoch": 0.6840750275836704, "grad_norm": 1.1884285353315012, "learning_rate": 2.3968551509977413e-06, "loss": 0.513, "step": 22320 }, { "epoch": 0.6841056761064117, "grad_norm": 1.3593829167926095, "learning_rate": 2.396431416071318e-06, "loss": 0.638, "step": 22321 }, { "epoch": 0.6841363246291529, "grad_norm": 1.4149233385466546, "learning_rate": 2.396007706798525e-06, "loss": 0.5601, "step": 22322 }, { "epoch": 0.6841669731518941, "grad_norm": 1.5587368474498922, "learning_rate": 2.3955840231835314e-06, "loss": 0.6821, "step": 22323 }, { "epoch": 0.6841976216746353, "grad_norm": 1.2152112088452596, "learning_rate": 2.395160365230515e-06, "loss": 0.5073, "step": 22324 }, { "epoch": 0.6842282701973765, "grad_norm": 1.3231534202397153, "learning_rate": 2.3947367329436523e-06, "loss": 0.5599, "step": 22325 }, { "epoch": 0.6842589187201177, "grad_norm": 0.476870462992695, "learning_rate": 2.394313126327113e-06, "loss": 0.3871, "step": 22326 }, { "epoch": 0.6842895672428589, "grad_norm": 1.086740424693541, "learning_rate": 2.3938895453850753e-06, "loss": 0.541, "step": 22327 }, { "epoch": 0.6843202157656001, "grad_norm": 0.4429394142076442, "learning_rate": 2.393465990121708e-06, "loss": 0.3937, "step": 22328 }, { "epoch": 0.6843508642883414, "grad_norm": 0.46031243008895883, "learning_rate": 2.3930424605411885e-06, "loss": 0.3807, "step": 22329 }, { "epoch": 0.6843815128110825, "grad_norm": 1.2040491465329344, "learning_rate": 2.392618956647689e-06, "loss": 0.5941, "step": 22330 }, { "epoch": 0.6844121613338237, "grad_norm": 1.2644850745967267, "learning_rate": 2.3921954784453814e-06, "loss": 0.5569, "step": 22331 }, { "epoch": 0.6844428098565649, "grad_norm": 1.3090590217615001, "learning_rate": 2.3917720259384386e-06, "loss": 0.601, "step": 22332 }, { "epoch": 0.6844734583793061, "grad_norm": 1.369670386219072, "learning_rate": 2.3913485991310352e-06, "loss": 0.6617, "step": 22333 }, { "epoch": 0.6845041069020473, "grad_norm": 1.2538661120540673, "learning_rate": 2.3909251980273397e-06, "loss": 0.5849, "step": 22334 }, { "epoch": 0.6845347554247885, "grad_norm": 1.2136835545580105, "learning_rate": 2.3905018226315256e-06, "loss": 0.5481, "step": 22335 }, { "epoch": 0.6845654039475297, "grad_norm": 1.3080921259217437, "learning_rate": 2.3900784729477672e-06, "loss": 0.5594, "step": 22336 }, { "epoch": 0.6845960524702709, "grad_norm": 1.473259704664427, "learning_rate": 2.3896551489802307e-06, "loss": 0.6579, "step": 22337 }, { "epoch": 0.6846267009930121, "grad_norm": 1.3000264967660917, "learning_rate": 2.389231850733092e-06, "loss": 0.6722, "step": 22338 }, { "epoch": 0.6846573495157533, "grad_norm": 1.53058391580427, "learning_rate": 2.388808578210518e-06, "loss": 0.6354, "step": 22339 }, { "epoch": 0.6846879980384946, "grad_norm": 0.47228517053940405, "learning_rate": 2.3883853314166815e-06, "loss": 0.4169, "step": 22340 }, { "epoch": 0.6847186465612357, "grad_norm": 1.2464351240379836, "learning_rate": 2.3879621103557545e-06, "loss": 0.5978, "step": 22341 }, { "epoch": 0.684749295083977, "grad_norm": 0.455983369917684, "learning_rate": 2.387538915031903e-06, "loss": 0.4046, "step": 22342 }, { "epoch": 0.6847799436067181, "grad_norm": 1.3835526349146139, "learning_rate": 2.3871157454492987e-06, "loss": 0.6158, "step": 22343 }, { "epoch": 0.6848105921294594, "grad_norm": 0.4511211530241448, "learning_rate": 2.386692601612114e-06, "loss": 0.3921, "step": 22344 }, { "epoch": 0.6848412406522005, "grad_norm": 1.1286621135906347, "learning_rate": 2.386269483524513e-06, "loss": 0.5399, "step": 22345 }, { "epoch": 0.6848718891749418, "grad_norm": 0.45738725174241057, "learning_rate": 2.3858463911906704e-06, "loss": 0.4181, "step": 22346 }, { "epoch": 0.684902537697683, "grad_norm": 1.1771102351791913, "learning_rate": 2.3854233246147494e-06, "loss": 0.5492, "step": 22347 }, { "epoch": 0.6849331862204242, "grad_norm": 1.240241262698766, "learning_rate": 2.3850002838009216e-06, "loss": 0.6494, "step": 22348 }, { "epoch": 0.6849638347431654, "grad_norm": 1.2503245841928714, "learning_rate": 2.3845772687533576e-06, "loss": 0.5016, "step": 22349 }, { "epoch": 0.6849944832659066, "grad_norm": 1.2060684052148338, "learning_rate": 2.384154279476221e-06, "loss": 0.5783, "step": 22350 }, { "epoch": 0.6850251317886478, "grad_norm": 1.4033226223332151, "learning_rate": 2.383731315973681e-06, "loss": 0.5948, "step": 22351 }, { "epoch": 0.685055780311389, "grad_norm": 1.237244615768236, "learning_rate": 2.383308378249907e-06, "loss": 0.6029, "step": 22352 }, { "epoch": 0.6850864288341302, "grad_norm": 0.44460938102292735, "learning_rate": 2.3828854663090646e-06, "loss": 0.3894, "step": 22353 }, { "epoch": 0.6851170773568714, "grad_norm": 1.7136823302427007, "learning_rate": 2.3824625801553203e-06, "loss": 0.5486, "step": 22354 }, { "epoch": 0.6851477258796126, "grad_norm": 1.3162388444809083, "learning_rate": 2.382039719792844e-06, "loss": 0.6358, "step": 22355 }, { "epoch": 0.6851783744023539, "grad_norm": 1.169461265434956, "learning_rate": 2.3816168852257986e-06, "loss": 0.6625, "step": 22356 }, { "epoch": 0.685209022925095, "grad_norm": 1.6180737448815903, "learning_rate": 2.381194076458354e-06, "loss": 0.6843, "step": 22357 }, { "epoch": 0.6852396714478363, "grad_norm": 1.1773050512256633, "learning_rate": 2.3807712934946703e-06, "loss": 0.5852, "step": 22358 }, { "epoch": 0.6852703199705774, "grad_norm": 1.386325685197079, "learning_rate": 2.3803485363389205e-06, "loss": 0.6559, "step": 22359 }, { "epoch": 0.6853009684933187, "grad_norm": 1.3326693803727985, "learning_rate": 2.3799258049952674e-06, "loss": 0.5356, "step": 22360 }, { "epoch": 0.6853316170160598, "grad_norm": 1.3118059428159918, "learning_rate": 2.3795030994678736e-06, "loss": 0.6624, "step": 22361 }, { "epoch": 0.685362265538801, "grad_norm": 1.4053811537961403, "learning_rate": 2.3790804197609062e-06, "loss": 0.611, "step": 22362 }, { "epoch": 0.6853929140615422, "grad_norm": 1.4079300886002768, "learning_rate": 2.378657765878532e-06, "loss": 0.492, "step": 22363 }, { "epoch": 0.6854235625842834, "grad_norm": 0.4573459205985918, "learning_rate": 2.378235137824912e-06, "loss": 0.4063, "step": 22364 }, { "epoch": 0.6854542111070246, "grad_norm": 1.4600410501992707, "learning_rate": 2.3778125356042112e-06, "loss": 0.6116, "step": 22365 }, { "epoch": 0.6854848596297658, "grad_norm": 1.242028867596286, "learning_rate": 2.3773899592205966e-06, "loss": 0.6078, "step": 22366 }, { "epoch": 0.6855155081525071, "grad_norm": 1.44684220637632, "learning_rate": 2.3769674086782284e-06, "loss": 0.6672, "step": 22367 }, { "epoch": 0.6855461566752482, "grad_norm": 1.4057125489452265, "learning_rate": 2.3765448839812727e-06, "loss": 0.6341, "step": 22368 }, { "epoch": 0.6855768051979895, "grad_norm": 1.3499140690485172, "learning_rate": 2.376122385133888e-06, "loss": 0.5876, "step": 22369 }, { "epoch": 0.6856074537207306, "grad_norm": 0.4356878036619357, "learning_rate": 2.3756999121402446e-06, "loss": 0.3899, "step": 22370 }, { "epoch": 0.6856381022434719, "grad_norm": 0.46079084928834385, "learning_rate": 2.3752774650045014e-06, "loss": 0.3987, "step": 22371 }, { "epoch": 0.685668750766213, "grad_norm": 1.3535346137648836, "learning_rate": 2.3748550437308187e-06, "loss": 0.5777, "step": 22372 }, { "epoch": 0.6856993992889543, "grad_norm": 1.1775828319959745, "learning_rate": 2.3744326483233615e-06, "loss": 0.5672, "step": 22373 }, { "epoch": 0.6857300478116954, "grad_norm": 0.46322787345910255, "learning_rate": 2.3740102787862925e-06, "loss": 0.4175, "step": 22374 }, { "epoch": 0.6857606963344367, "grad_norm": 1.2902788396965967, "learning_rate": 2.3735879351237706e-06, "loss": 0.6509, "step": 22375 }, { "epoch": 0.6857913448571779, "grad_norm": 0.4649694924943162, "learning_rate": 2.3731656173399585e-06, "loss": 0.3841, "step": 22376 }, { "epoch": 0.6858219933799191, "grad_norm": 1.3184492078998626, "learning_rate": 2.372743325439018e-06, "loss": 0.5351, "step": 22377 }, { "epoch": 0.6858526419026603, "grad_norm": 1.3479758505194805, "learning_rate": 2.372321059425111e-06, "loss": 0.6682, "step": 22378 }, { "epoch": 0.6858832904254015, "grad_norm": 0.44027002682475774, "learning_rate": 2.3718988193023977e-06, "loss": 0.4127, "step": 22379 }, { "epoch": 0.6859139389481427, "grad_norm": 1.194029219354386, "learning_rate": 2.3714766050750355e-06, "loss": 0.6434, "step": 22380 }, { "epoch": 0.6859445874708839, "grad_norm": 1.3855708834092728, "learning_rate": 2.3710544167471867e-06, "loss": 0.6339, "step": 22381 }, { "epoch": 0.6859752359936251, "grad_norm": 1.3287281693693167, "learning_rate": 2.3706322543230136e-06, "loss": 0.5542, "step": 22382 }, { "epoch": 0.6860058845163663, "grad_norm": 1.2820292291689543, "learning_rate": 2.3702101178066718e-06, "loss": 0.5823, "step": 22383 }, { "epoch": 0.6860365330391075, "grad_norm": 1.215413461399439, "learning_rate": 2.3697880072023223e-06, "loss": 0.5709, "step": 22384 }, { "epoch": 0.6860671815618488, "grad_norm": 1.2911896333364083, "learning_rate": 2.369365922514125e-06, "loss": 0.6093, "step": 22385 }, { "epoch": 0.6860978300845899, "grad_norm": 1.2392684445279118, "learning_rate": 2.3689438637462393e-06, "loss": 0.6562, "step": 22386 }, { "epoch": 0.6861284786073312, "grad_norm": 1.855316756151988, "learning_rate": 2.368521830902822e-06, "loss": 0.7028, "step": 22387 }, { "epoch": 0.6861591271300723, "grad_norm": 1.4219010010929096, "learning_rate": 2.3680998239880315e-06, "loss": 0.5582, "step": 22388 }, { "epoch": 0.6861897756528136, "grad_norm": 0.4524885967162231, "learning_rate": 2.367677843006029e-06, "loss": 0.3945, "step": 22389 }, { "epoch": 0.6862204241755547, "grad_norm": 1.2975454583301884, "learning_rate": 2.3672558879609707e-06, "loss": 0.6317, "step": 22390 }, { "epoch": 0.686251072698296, "grad_norm": 1.4268496704981308, "learning_rate": 2.3668339588570115e-06, "loss": 0.6526, "step": 22391 }, { "epoch": 0.6862817212210371, "grad_norm": 1.4035740579170732, "learning_rate": 2.366412055698311e-06, "loss": 0.6453, "step": 22392 }, { "epoch": 0.6863123697437783, "grad_norm": 0.4393746701867713, "learning_rate": 2.365990178489028e-06, "loss": 0.3883, "step": 22393 }, { "epoch": 0.6863430182665196, "grad_norm": 1.4095274389114725, "learning_rate": 2.3655683272333163e-06, "loss": 0.6808, "step": 22394 }, { "epoch": 0.6863736667892607, "grad_norm": 1.450663817446142, "learning_rate": 2.365146501935334e-06, "loss": 0.5898, "step": 22395 }, { "epoch": 0.686404315312002, "grad_norm": 1.2396911798386718, "learning_rate": 2.364724702599237e-06, "loss": 0.5622, "step": 22396 }, { "epoch": 0.6864349638347431, "grad_norm": 1.3814488806776077, "learning_rate": 2.364302929229184e-06, "loss": 0.6625, "step": 22397 }, { "epoch": 0.6864656123574844, "grad_norm": 1.3333115027403375, "learning_rate": 2.3638811818293284e-06, "loss": 0.5842, "step": 22398 }, { "epoch": 0.6864962608802255, "grad_norm": 0.4379456401990809, "learning_rate": 2.363459460403822e-06, "loss": 0.3883, "step": 22399 }, { "epoch": 0.6865269094029668, "grad_norm": 1.3705123903578416, "learning_rate": 2.363037764956828e-06, "loss": 0.6149, "step": 22400 }, { "epoch": 0.6865575579257079, "grad_norm": 1.1766112003210827, "learning_rate": 2.362616095492498e-06, "loss": 0.5915, "step": 22401 }, { "epoch": 0.6865882064484492, "grad_norm": 1.283486986139185, "learning_rate": 2.3621944520149842e-06, "loss": 0.5278, "step": 22402 }, { "epoch": 0.6866188549711904, "grad_norm": 1.361453580529735, "learning_rate": 2.3617728345284434e-06, "loss": 0.6308, "step": 22403 }, { "epoch": 0.6866495034939316, "grad_norm": 1.4475532594725748, "learning_rate": 2.36135124303703e-06, "loss": 0.6813, "step": 22404 }, { "epoch": 0.6866801520166728, "grad_norm": 1.33529805547163, "learning_rate": 2.3609296775448998e-06, "loss": 0.6095, "step": 22405 }, { "epoch": 0.686710800539414, "grad_norm": 0.4678632094173526, "learning_rate": 2.360508138056203e-06, "loss": 0.3986, "step": 22406 }, { "epoch": 0.6867414490621552, "grad_norm": 1.410786757977538, "learning_rate": 2.360086624575094e-06, "loss": 0.6975, "step": 22407 }, { "epoch": 0.6867720975848964, "grad_norm": 1.2553831550533867, "learning_rate": 2.3596651371057293e-06, "loss": 0.638, "step": 22408 }, { "epoch": 0.6868027461076376, "grad_norm": 1.389711888748073, "learning_rate": 2.35924367565226e-06, "loss": 0.6376, "step": 22409 }, { "epoch": 0.6868333946303788, "grad_norm": 1.2340818666399584, "learning_rate": 2.3588222402188343e-06, "loss": 0.626, "step": 22410 }, { "epoch": 0.68686404315312, "grad_norm": 1.454852373836377, "learning_rate": 2.3584008308096127e-06, "loss": 0.5699, "step": 22411 }, { "epoch": 0.6868946916758613, "grad_norm": 1.3695865233451137, "learning_rate": 2.3579794474287416e-06, "loss": 0.65, "step": 22412 }, { "epoch": 0.6869253401986024, "grad_norm": 1.279935847026917, "learning_rate": 2.357558090080377e-06, "loss": 0.6239, "step": 22413 }, { "epoch": 0.6869559887213437, "grad_norm": 1.222705734690461, "learning_rate": 2.3571367587686667e-06, "loss": 0.6262, "step": 22414 }, { "epoch": 0.6869866372440848, "grad_norm": 1.2488370927769945, "learning_rate": 2.3567154534977643e-06, "loss": 0.4883, "step": 22415 }, { "epoch": 0.6870172857668261, "grad_norm": 1.467036799991903, "learning_rate": 2.3562941742718227e-06, "loss": 0.5992, "step": 22416 }, { "epoch": 0.6870479342895672, "grad_norm": 1.3965785803867266, "learning_rate": 2.355872921094989e-06, "loss": 0.5741, "step": 22417 }, { "epoch": 0.6870785828123085, "grad_norm": 1.3644605869533029, "learning_rate": 2.3554516939714156e-06, "loss": 0.6668, "step": 22418 }, { "epoch": 0.6871092313350496, "grad_norm": 1.5399835458507656, "learning_rate": 2.355030492905256e-06, "loss": 0.6338, "step": 22419 }, { "epoch": 0.6871398798577909, "grad_norm": 1.3852751894269826, "learning_rate": 2.354609317900657e-06, "loss": 0.5813, "step": 22420 }, { "epoch": 0.687170528380532, "grad_norm": 1.298928593431189, "learning_rate": 2.354188168961766e-06, "loss": 0.6018, "step": 22421 }, { "epoch": 0.6872011769032733, "grad_norm": 1.3253757904540013, "learning_rate": 2.353767046092739e-06, "loss": 0.6147, "step": 22422 }, { "epoch": 0.6872318254260145, "grad_norm": 1.1900465353905634, "learning_rate": 2.3533459492977208e-06, "loss": 0.6399, "step": 22423 }, { "epoch": 0.6872624739487556, "grad_norm": 1.249077806052032, "learning_rate": 2.352924878580864e-06, "loss": 0.6176, "step": 22424 }, { "epoch": 0.6872931224714969, "grad_norm": 1.3127950238391017, "learning_rate": 2.3525038339463143e-06, "loss": 0.6211, "step": 22425 }, { "epoch": 0.687323770994238, "grad_norm": 0.454437838694067, "learning_rate": 2.352082815398221e-06, "loss": 0.4069, "step": 22426 }, { "epoch": 0.6873544195169793, "grad_norm": 1.2789659068811492, "learning_rate": 2.3516618229407356e-06, "loss": 0.621, "step": 22427 }, { "epoch": 0.6873850680397204, "grad_norm": 1.3440093339801658, "learning_rate": 2.3512408565780013e-06, "loss": 0.6614, "step": 22428 }, { "epoch": 0.6874157165624617, "grad_norm": 1.359712254966218, "learning_rate": 2.3508199163141694e-06, "loss": 0.6255, "step": 22429 }, { "epoch": 0.6874463650852028, "grad_norm": 1.4617713284672655, "learning_rate": 2.350399002153388e-06, "loss": 0.6404, "step": 22430 }, { "epoch": 0.6874770136079441, "grad_norm": 0.4530609114585965, "learning_rate": 2.3499781140998016e-06, "loss": 0.4007, "step": 22431 }, { "epoch": 0.6875076621306853, "grad_norm": 1.2419926919388131, "learning_rate": 2.3495572521575603e-06, "loss": 0.499, "step": 22432 }, { "epoch": 0.6875383106534265, "grad_norm": 1.302610944641019, "learning_rate": 2.3491364163308083e-06, "loss": 0.5847, "step": 22433 }, { "epoch": 0.6875689591761677, "grad_norm": 1.2743897297752205, "learning_rate": 2.3487156066236934e-06, "loss": 0.6279, "step": 22434 }, { "epoch": 0.6875996076989089, "grad_norm": 1.4848813624792927, "learning_rate": 2.3482948230403637e-06, "loss": 0.6139, "step": 22435 }, { "epoch": 0.6876302562216501, "grad_norm": 1.555182576828999, "learning_rate": 2.3478740655849612e-06, "loss": 0.7247, "step": 22436 }, { "epoch": 0.6876609047443913, "grad_norm": 1.2988547921417715, "learning_rate": 2.3474533342616344e-06, "loss": 0.5505, "step": 22437 }, { "epoch": 0.6876915532671325, "grad_norm": 0.4447320684622038, "learning_rate": 2.3470326290745302e-06, "loss": 0.3801, "step": 22438 }, { "epoch": 0.6877222017898738, "grad_norm": 1.156614533384041, "learning_rate": 2.346611950027791e-06, "loss": 0.7079, "step": 22439 }, { "epoch": 0.6877528503126149, "grad_norm": 1.359498265957686, "learning_rate": 2.3461912971255635e-06, "loss": 0.6056, "step": 22440 }, { "epoch": 0.6877834988353562, "grad_norm": 1.3605704355424393, "learning_rate": 2.345770670371993e-06, "loss": 0.6135, "step": 22441 }, { "epoch": 0.6878141473580973, "grad_norm": 1.4373668359117926, "learning_rate": 2.345350069771222e-06, "loss": 0.7366, "step": 22442 }, { "epoch": 0.6878447958808386, "grad_norm": 0.46423850659182164, "learning_rate": 2.344929495327398e-06, "loss": 0.3729, "step": 22443 }, { "epoch": 0.6878754444035797, "grad_norm": 0.4379447280521209, "learning_rate": 2.3445089470446604e-06, "loss": 0.3812, "step": 22444 }, { "epoch": 0.687906092926321, "grad_norm": 1.4983372746234798, "learning_rate": 2.344088424927156e-06, "loss": 0.6238, "step": 22445 }, { "epoch": 0.6879367414490621, "grad_norm": 1.2192334626616792, "learning_rate": 2.3436679289790297e-06, "loss": 0.5562, "step": 22446 }, { "epoch": 0.6879673899718034, "grad_norm": 1.2871578862631523, "learning_rate": 2.3432474592044214e-06, "loss": 0.5776, "step": 22447 }, { "epoch": 0.6879980384945446, "grad_norm": 1.217604210273663, "learning_rate": 2.342827015607475e-06, "loss": 0.5944, "step": 22448 }, { "epoch": 0.6880286870172858, "grad_norm": 1.5362497032210785, "learning_rate": 2.342406598192336e-06, "loss": 0.5923, "step": 22449 }, { "epoch": 0.688059335540027, "grad_norm": 1.3350772477950215, "learning_rate": 2.3419862069631433e-06, "loss": 0.5897, "step": 22450 }, { "epoch": 0.6880899840627682, "grad_norm": 0.4593120569561736, "learning_rate": 2.34156584192404e-06, "loss": 0.3873, "step": 22451 }, { "epoch": 0.6881206325855094, "grad_norm": 1.3870769074934695, "learning_rate": 2.341145503079171e-06, "loss": 0.6299, "step": 22452 }, { "epoch": 0.6881512811082506, "grad_norm": 1.353728179485826, "learning_rate": 2.3407251904326733e-06, "loss": 0.5648, "step": 22453 }, { "epoch": 0.6881819296309918, "grad_norm": 1.3072786048370029, "learning_rate": 2.3403049039886932e-06, "loss": 0.6122, "step": 22454 }, { "epoch": 0.688212578153733, "grad_norm": 1.3635124548448798, "learning_rate": 2.339884643751367e-06, "loss": 0.5244, "step": 22455 }, { "epoch": 0.6882432266764742, "grad_norm": 1.2085325139872207, "learning_rate": 2.339464409724838e-06, "loss": 0.5861, "step": 22456 }, { "epoch": 0.6882738751992153, "grad_norm": 1.4307083846741728, "learning_rate": 2.339044201913249e-06, "loss": 0.6488, "step": 22457 }, { "epoch": 0.6883045237219566, "grad_norm": 1.2175773636799907, "learning_rate": 2.3386240203207365e-06, "loss": 0.5787, "step": 22458 }, { "epoch": 0.6883351722446978, "grad_norm": 1.2681784882406275, "learning_rate": 2.338203864951443e-06, "loss": 0.6006, "step": 22459 }, { "epoch": 0.688365820767439, "grad_norm": 1.3101681645520535, "learning_rate": 2.337783735809509e-06, "loss": 0.6, "step": 22460 }, { "epoch": 0.6883964692901802, "grad_norm": 0.4578803788815117, "learning_rate": 2.3373636328990713e-06, "loss": 0.3999, "step": 22461 }, { "epoch": 0.6884271178129214, "grad_norm": 1.304028281948709, "learning_rate": 2.336943556224271e-06, "loss": 0.5591, "step": 22462 }, { "epoch": 0.6884577663356626, "grad_norm": 1.4229603129076773, "learning_rate": 2.336523505789249e-06, "loss": 0.5476, "step": 22463 }, { "epoch": 0.6884884148584038, "grad_norm": 1.4435796402965322, "learning_rate": 2.3361034815981406e-06, "loss": 0.7083, "step": 22464 }, { "epoch": 0.688519063381145, "grad_norm": 1.3632615536620116, "learning_rate": 2.335683483655088e-06, "loss": 0.6633, "step": 22465 }, { "epoch": 0.6885497119038863, "grad_norm": 1.424512427842714, "learning_rate": 2.3352635119642252e-06, "loss": 0.5439, "step": 22466 }, { "epoch": 0.6885803604266274, "grad_norm": 1.339974154357436, "learning_rate": 2.3348435665296937e-06, "loss": 0.7379, "step": 22467 }, { "epoch": 0.6886110089493687, "grad_norm": 1.179819195873898, "learning_rate": 2.334423647355632e-06, "loss": 0.6604, "step": 22468 }, { "epoch": 0.6886416574721098, "grad_norm": 1.2749339119065903, "learning_rate": 2.3340037544461745e-06, "loss": 0.5695, "step": 22469 }, { "epoch": 0.6886723059948511, "grad_norm": 1.2959780809338983, "learning_rate": 2.3335838878054602e-06, "loss": 0.5557, "step": 22470 }, { "epoch": 0.6887029545175922, "grad_norm": 0.47769411871336853, "learning_rate": 2.3331640474376277e-06, "loss": 0.4067, "step": 22471 }, { "epoch": 0.6887336030403335, "grad_norm": 1.4124391666164802, "learning_rate": 2.3327442333468104e-06, "loss": 0.5789, "step": 22472 }, { "epoch": 0.6887642515630746, "grad_norm": 0.5048272727430306, "learning_rate": 2.3323244455371465e-06, "loss": 0.4027, "step": 22473 }, { "epoch": 0.6887949000858159, "grad_norm": 1.3932786823028316, "learning_rate": 2.3319046840127742e-06, "loss": 0.6137, "step": 22474 }, { "epoch": 0.688825548608557, "grad_norm": 0.43898654838612505, "learning_rate": 2.3314849487778258e-06, "loss": 0.3711, "step": 22475 }, { "epoch": 0.6888561971312983, "grad_norm": 1.410551673698776, "learning_rate": 2.3310652398364415e-06, "loss": 0.6664, "step": 22476 }, { "epoch": 0.6888868456540395, "grad_norm": 1.2580360431232114, "learning_rate": 2.330645557192752e-06, "loss": 0.6581, "step": 22477 }, { "epoch": 0.6889174941767807, "grad_norm": 0.4406104167301904, "learning_rate": 2.3302259008508942e-06, "loss": 0.3931, "step": 22478 }, { "epoch": 0.6889481426995219, "grad_norm": 1.3654305340877804, "learning_rate": 2.329806270815006e-06, "loss": 0.6333, "step": 22479 }, { "epoch": 0.6889787912222631, "grad_norm": 1.2546608279079763, "learning_rate": 2.3293866670892185e-06, "loss": 0.5009, "step": 22480 }, { "epoch": 0.6890094397450043, "grad_norm": 1.1746433070839981, "learning_rate": 2.3289670896776666e-06, "loss": 0.6609, "step": 22481 }, { "epoch": 0.6890400882677455, "grad_norm": 1.2181283404558825, "learning_rate": 2.3285475385844876e-06, "loss": 0.6162, "step": 22482 }, { "epoch": 0.6890707367904867, "grad_norm": 1.3882159141690973, "learning_rate": 2.328128013813811e-06, "loss": 0.6248, "step": 22483 }, { "epoch": 0.689101385313228, "grad_norm": 0.46788128229290016, "learning_rate": 2.3277085153697755e-06, "loss": 0.3908, "step": 22484 }, { "epoch": 0.6891320338359691, "grad_norm": 1.2690576056729057, "learning_rate": 2.3272890432565077e-06, "loss": 0.5887, "step": 22485 }, { "epoch": 0.6891626823587104, "grad_norm": 1.4553992083691085, "learning_rate": 2.326869597478148e-06, "loss": 0.7431, "step": 22486 }, { "epoch": 0.6891933308814515, "grad_norm": 0.45094735061442404, "learning_rate": 2.3264501780388267e-06, "loss": 0.3878, "step": 22487 }, { "epoch": 0.6892239794041927, "grad_norm": 0.4539599057882512, "learning_rate": 2.3260307849426733e-06, "loss": 0.3938, "step": 22488 }, { "epoch": 0.6892546279269339, "grad_norm": 1.3242309786670539, "learning_rate": 2.325611418193823e-06, "loss": 0.6587, "step": 22489 }, { "epoch": 0.6892852764496751, "grad_norm": 1.4723647774281012, "learning_rate": 2.3251920777964098e-06, "loss": 0.6107, "step": 22490 }, { "epoch": 0.6893159249724163, "grad_norm": 1.354558447767526, "learning_rate": 2.3247727637545612e-06, "loss": 0.6214, "step": 22491 }, { "epoch": 0.6893465734951575, "grad_norm": 0.47534290093579684, "learning_rate": 2.324353476072412e-06, "loss": 0.4033, "step": 22492 }, { "epoch": 0.6893772220178987, "grad_norm": 1.3301684865078987, "learning_rate": 2.3239342147540932e-06, "loss": 0.6543, "step": 22493 }, { "epoch": 0.6894078705406399, "grad_norm": 0.46023859990007127, "learning_rate": 2.3235149798037344e-06, "loss": 0.4038, "step": 22494 }, { "epoch": 0.6894385190633812, "grad_norm": 0.46501203248710765, "learning_rate": 2.3230957712254686e-06, "loss": 0.3972, "step": 22495 }, { "epoch": 0.6894691675861223, "grad_norm": 1.2892663023310067, "learning_rate": 2.3226765890234216e-06, "loss": 0.6083, "step": 22496 }, { "epoch": 0.6894998161088636, "grad_norm": 1.2293620162797452, "learning_rate": 2.3222574332017305e-06, "loss": 0.6763, "step": 22497 }, { "epoch": 0.6895304646316047, "grad_norm": 1.3410722974776639, "learning_rate": 2.3218383037645227e-06, "loss": 0.7029, "step": 22498 }, { "epoch": 0.689561113154346, "grad_norm": 1.3302187035902528, "learning_rate": 2.3214192007159246e-06, "loss": 0.6431, "step": 22499 }, { "epoch": 0.6895917616770871, "grad_norm": 1.0777634772928464, "learning_rate": 2.3210001240600694e-06, "loss": 0.5538, "step": 22500 }, { "epoch": 0.6896224101998284, "grad_norm": 1.2643124280568248, "learning_rate": 2.3205810738010866e-06, "loss": 0.5917, "step": 22501 }, { "epoch": 0.6896530587225695, "grad_norm": 1.365859322044124, "learning_rate": 2.3201620499431027e-06, "loss": 0.6405, "step": 22502 }, { "epoch": 0.6896837072453108, "grad_norm": 1.3332473458736858, "learning_rate": 2.3197430524902477e-06, "loss": 0.594, "step": 22503 }, { "epoch": 0.689714355768052, "grad_norm": 0.4381273637284151, "learning_rate": 2.3193240814466493e-06, "loss": 0.3819, "step": 22504 }, { "epoch": 0.6897450042907932, "grad_norm": 1.3799468462770705, "learning_rate": 2.3189051368164393e-06, "loss": 0.5881, "step": 22505 }, { "epoch": 0.6897756528135344, "grad_norm": 1.5254242565160194, "learning_rate": 2.318486218603743e-06, "loss": 0.6726, "step": 22506 }, { "epoch": 0.6898063013362756, "grad_norm": 1.2308746917851383, "learning_rate": 2.3180673268126842e-06, "loss": 0.6794, "step": 22507 }, { "epoch": 0.6898369498590168, "grad_norm": 1.4100924699594652, "learning_rate": 2.317648461447398e-06, "loss": 0.5686, "step": 22508 }, { "epoch": 0.689867598381758, "grad_norm": 1.1319794059578736, "learning_rate": 2.317229622512008e-06, "loss": 0.5698, "step": 22509 }, { "epoch": 0.6898982469044992, "grad_norm": 1.2762649613541466, "learning_rate": 2.3168108100106383e-06, "loss": 0.5572, "step": 22510 }, { "epoch": 0.6899288954272405, "grad_norm": 1.276611652176958, "learning_rate": 2.316392023947419e-06, "loss": 0.6063, "step": 22511 }, { "epoch": 0.6899595439499816, "grad_norm": 1.1927884181595154, "learning_rate": 2.3159732643264752e-06, "loss": 0.5415, "step": 22512 }, { "epoch": 0.6899901924727229, "grad_norm": 1.470115947144744, "learning_rate": 2.3155545311519364e-06, "loss": 0.7119, "step": 22513 }, { "epoch": 0.690020840995464, "grad_norm": 1.5699353213550138, "learning_rate": 2.3151358244279227e-06, "loss": 0.6675, "step": 22514 }, { "epoch": 0.6900514895182053, "grad_norm": 1.5718746596671662, "learning_rate": 2.3147171441585633e-06, "loss": 0.6478, "step": 22515 }, { "epoch": 0.6900821380409464, "grad_norm": 1.2434396447645562, "learning_rate": 2.3142984903479847e-06, "loss": 0.5412, "step": 22516 }, { "epoch": 0.6901127865636877, "grad_norm": 1.2167437954620361, "learning_rate": 2.31387986300031e-06, "loss": 0.5679, "step": 22517 }, { "epoch": 0.6901434350864288, "grad_norm": 1.376962671010326, "learning_rate": 2.3134612621196606e-06, "loss": 0.6017, "step": 22518 }, { "epoch": 0.69017408360917, "grad_norm": 1.3405516736153023, "learning_rate": 2.3130426877101686e-06, "loss": 0.6239, "step": 22519 }, { "epoch": 0.6902047321319112, "grad_norm": 1.2720035856888514, "learning_rate": 2.3126241397759547e-06, "loss": 0.5458, "step": 22520 }, { "epoch": 0.6902353806546524, "grad_norm": 1.3849057247088763, "learning_rate": 2.3122056183211406e-06, "loss": 0.6443, "step": 22521 }, { "epoch": 0.6902660291773937, "grad_norm": 1.2509457828203436, "learning_rate": 2.311787123349852e-06, "loss": 0.5559, "step": 22522 }, { "epoch": 0.6902966777001348, "grad_norm": 1.472364676629742, "learning_rate": 2.3113686548662128e-06, "loss": 0.55, "step": 22523 }, { "epoch": 0.6903273262228761, "grad_norm": 1.2519556894791064, "learning_rate": 2.3109502128743483e-06, "loss": 0.6545, "step": 22524 }, { "epoch": 0.6903579747456172, "grad_norm": 1.493166978272333, "learning_rate": 2.3105317973783774e-06, "loss": 0.7404, "step": 22525 }, { "epoch": 0.6903886232683585, "grad_norm": 1.2394151124495416, "learning_rate": 2.310113408382425e-06, "loss": 0.6023, "step": 22526 }, { "epoch": 0.6904192717910996, "grad_norm": 1.3281741522417327, "learning_rate": 2.309695045890615e-06, "loss": 0.6497, "step": 22527 }, { "epoch": 0.6904499203138409, "grad_norm": 1.4145121163026324, "learning_rate": 2.3092767099070683e-06, "loss": 0.6388, "step": 22528 }, { "epoch": 0.690480568836582, "grad_norm": 1.3918483157181738, "learning_rate": 2.308858400435905e-06, "loss": 0.6016, "step": 22529 }, { "epoch": 0.6905112173593233, "grad_norm": 1.2212782784888916, "learning_rate": 2.3084401174812476e-06, "loss": 0.5763, "step": 22530 }, { "epoch": 0.6905418658820645, "grad_norm": 1.155166576056645, "learning_rate": 2.308021861047219e-06, "loss": 0.4873, "step": 22531 }, { "epoch": 0.6905725144048057, "grad_norm": 1.4380059227854796, "learning_rate": 2.3076036311379413e-06, "loss": 0.6035, "step": 22532 }, { "epoch": 0.6906031629275469, "grad_norm": 1.2965748176941665, "learning_rate": 2.3071854277575324e-06, "loss": 0.5938, "step": 22533 }, { "epoch": 0.6906338114502881, "grad_norm": 0.44546728414457726, "learning_rate": 2.306767250910114e-06, "loss": 0.4035, "step": 22534 }, { "epoch": 0.6906644599730293, "grad_norm": 0.43878375524412133, "learning_rate": 2.3063491005998095e-06, "loss": 0.3964, "step": 22535 }, { "epoch": 0.6906951084957705, "grad_norm": 1.1478153554121029, "learning_rate": 2.3059309768307364e-06, "loss": 0.5232, "step": 22536 }, { "epoch": 0.6907257570185117, "grad_norm": 1.2088686378438231, "learning_rate": 2.3055128796070105e-06, "loss": 0.5401, "step": 22537 }, { "epoch": 0.690756405541253, "grad_norm": 1.3218772994181405, "learning_rate": 2.3050948089327594e-06, "loss": 0.6318, "step": 22538 }, { "epoch": 0.6907870540639941, "grad_norm": 1.268521895067199, "learning_rate": 2.304676764812097e-06, "loss": 0.5684, "step": 22539 }, { "epoch": 0.6908177025867354, "grad_norm": 1.2449968202895754, "learning_rate": 2.3042587472491463e-06, "loss": 0.5704, "step": 22540 }, { "epoch": 0.6908483511094765, "grad_norm": 1.1734409949081464, "learning_rate": 2.3038407562480213e-06, "loss": 0.6542, "step": 22541 }, { "epoch": 0.6908789996322178, "grad_norm": 1.3756630003369383, "learning_rate": 2.3034227918128438e-06, "loss": 0.5357, "step": 22542 }, { "epoch": 0.6909096481549589, "grad_norm": 1.3192824956955882, "learning_rate": 2.303004853947733e-06, "loss": 0.5562, "step": 22543 }, { "epoch": 0.6909402966777002, "grad_norm": 1.2481071780108344, "learning_rate": 2.302586942656803e-06, "loss": 0.569, "step": 22544 }, { "epoch": 0.6909709452004413, "grad_norm": 1.2856376800940075, "learning_rate": 2.3021690579441754e-06, "loss": 0.5898, "step": 22545 }, { "epoch": 0.6910015937231826, "grad_norm": 1.1660726007707263, "learning_rate": 2.3017511998139667e-06, "loss": 0.5042, "step": 22546 }, { "epoch": 0.6910322422459237, "grad_norm": 0.46584806795895456, "learning_rate": 2.301333368270295e-06, "loss": 0.3965, "step": 22547 }, { "epoch": 0.691062890768665, "grad_norm": 1.2935301588456407, "learning_rate": 2.300915563317272e-06, "loss": 0.6416, "step": 22548 }, { "epoch": 0.6910935392914062, "grad_norm": 0.47872014962139325, "learning_rate": 2.300497784959022e-06, "loss": 0.4062, "step": 22549 }, { "epoch": 0.6911241878141473, "grad_norm": 1.2448224026329935, "learning_rate": 2.3000800331996564e-06, "loss": 0.6444, "step": 22550 }, { "epoch": 0.6911548363368886, "grad_norm": 1.0983317092843716, "learning_rate": 2.299662308043295e-06, "loss": 0.5448, "step": 22551 }, { "epoch": 0.6911854848596297, "grad_norm": 1.2602877794040688, "learning_rate": 2.2992446094940496e-06, "loss": 0.5891, "step": 22552 }, { "epoch": 0.691216133382371, "grad_norm": 1.347459282438594, "learning_rate": 2.2988269375560383e-06, "loss": 0.5932, "step": 22553 }, { "epoch": 0.6912467819051121, "grad_norm": 0.4456777248485846, "learning_rate": 2.298409292233378e-06, "loss": 0.4148, "step": 22554 }, { "epoch": 0.6912774304278534, "grad_norm": 1.2997356067643546, "learning_rate": 2.2979916735301804e-06, "loss": 0.6528, "step": 22555 }, { "epoch": 0.6913080789505945, "grad_norm": 1.2470961689921491, "learning_rate": 2.297574081450563e-06, "loss": 0.5286, "step": 22556 }, { "epoch": 0.6913387274733358, "grad_norm": 1.2955746652305544, "learning_rate": 2.29715651599864e-06, "loss": 0.6075, "step": 22557 }, { "epoch": 0.691369375996077, "grad_norm": 1.3971559749035423, "learning_rate": 2.2967389771785243e-06, "loss": 0.5934, "step": 22558 }, { "epoch": 0.6914000245188182, "grad_norm": 1.396436503681811, "learning_rate": 2.296321464994331e-06, "loss": 0.7364, "step": 22559 }, { "epoch": 0.6914306730415594, "grad_norm": 1.3780181830474187, "learning_rate": 2.295903979450176e-06, "loss": 0.6398, "step": 22560 }, { "epoch": 0.6914613215643006, "grad_norm": 1.2700515702987243, "learning_rate": 2.295486520550169e-06, "loss": 0.6594, "step": 22561 }, { "epoch": 0.6914919700870418, "grad_norm": 0.474251158845243, "learning_rate": 2.2950690882984274e-06, "loss": 0.3855, "step": 22562 }, { "epoch": 0.691522618609783, "grad_norm": 1.241900869650494, "learning_rate": 2.29465168269906e-06, "loss": 0.5102, "step": 22563 }, { "epoch": 0.6915532671325242, "grad_norm": 1.301655915207002, "learning_rate": 2.294234303756182e-06, "loss": 0.6263, "step": 22564 }, { "epoch": 0.6915839156552654, "grad_norm": 1.2835858000637606, "learning_rate": 2.293816951473908e-06, "loss": 0.6395, "step": 22565 }, { "epoch": 0.6916145641780066, "grad_norm": 1.2438415809919718, "learning_rate": 2.293399625856345e-06, "loss": 0.6001, "step": 22566 }, { "epoch": 0.6916452127007479, "grad_norm": 1.1769423329948334, "learning_rate": 2.2929823269076085e-06, "loss": 0.5741, "step": 22567 }, { "epoch": 0.691675861223489, "grad_norm": 0.48947376182736, "learning_rate": 2.292565054631812e-06, "loss": 0.4093, "step": 22568 }, { "epoch": 0.6917065097462303, "grad_norm": 0.4512438775095872, "learning_rate": 2.2921478090330624e-06, "loss": 0.4049, "step": 22569 }, { "epoch": 0.6917371582689714, "grad_norm": 1.2284204171759339, "learning_rate": 2.2917305901154737e-06, "loss": 0.5971, "step": 22570 }, { "epoch": 0.6917678067917127, "grad_norm": 1.2597226488247442, "learning_rate": 2.2913133978831582e-06, "loss": 0.637, "step": 22571 }, { "epoch": 0.6917984553144538, "grad_norm": 1.4275425106633224, "learning_rate": 2.290896232340223e-06, "loss": 0.6721, "step": 22572 }, { "epoch": 0.6918291038371951, "grad_norm": 0.4595983334784053, "learning_rate": 2.2904790934907817e-06, "loss": 0.402, "step": 22573 }, { "epoch": 0.6918597523599362, "grad_norm": 1.1503014226678754, "learning_rate": 2.290061981338942e-06, "loss": 0.6257, "step": 22574 }, { "epoch": 0.6918904008826775, "grad_norm": 1.4469520630319013, "learning_rate": 2.2896448958888145e-06, "loss": 0.6361, "step": 22575 }, { "epoch": 0.6919210494054187, "grad_norm": 1.3221836641859939, "learning_rate": 2.2892278371445107e-06, "loss": 0.6121, "step": 22576 }, { "epoch": 0.6919516979281599, "grad_norm": 0.46384677201440677, "learning_rate": 2.2888108051101377e-06, "loss": 0.4066, "step": 22577 }, { "epoch": 0.6919823464509011, "grad_norm": 1.208168767101356, "learning_rate": 2.2883937997898053e-06, "loss": 0.6106, "step": 22578 }, { "epoch": 0.6920129949736423, "grad_norm": 1.2931317389321042, "learning_rate": 2.287976821187624e-06, "loss": 0.6256, "step": 22579 }, { "epoch": 0.6920436434963835, "grad_norm": 1.3040031229725522, "learning_rate": 2.2875598693076995e-06, "loss": 0.6628, "step": 22580 }, { "epoch": 0.6920742920191246, "grad_norm": 0.45638498520970605, "learning_rate": 2.287142944154144e-06, "loss": 0.3903, "step": 22581 }, { "epoch": 0.6921049405418659, "grad_norm": 1.2414842397821544, "learning_rate": 2.28672604573106e-06, "loss": 0.5991, "step": 22582 }, { "epoch": 0.692135589064607, "grad_norm": 1.2522868226781754, "learning_rate": 2.2863091740425597e-06, "loss": 0.5487, "step": 22583 }, { "epoch": 0.6921662375873483, "grad_norm": 1.2491816827685749, "learning_rate": 2.285892329092751e-06, "loss": 0.5653, "step": 22584 }, { "epoch": 0.6921968861100894, "grad_norm": 1.4328727298053525, "learning_rate": 2.2854755108857376e-06, "loss": 0.6708, "step": 22585 }, { "epoch": 0.6922275346328307, "grad_norm": 1.3525718438606196, "learning_rate": 2.2850587194256284e-06, "loss": 0.5694, "step": 22586 }, { "epoch": 0.6922581831555719, "grad_norm": 1.2102888436518189, "learning_rate": 2.2846419547165323e-06, "loss": 0.6698, "step": 22587 }, { "epoch": 0.6922888316783131, "grad_norm": 1.4755825946140944, "learning_rate": 2.2842252167625517e-06, "loss": 0.5851, "step": 22588 }, { "epoch": 0.6923194802010543, "grad_norm": 1.4326470973182992, "learning_rate": 2.283808505567795e-06, "loss": 0.6223, "step": 22589 }, { "epoch": 0.6923501287237955, "grad_norm": 1.1910877041749504, "learning_rate": 2.2833918211363705e-06, "loss": 0.588, "step": 22590 }, { "epoch": 0.6923807772465367, "grad_norm": 1.339039574412475, "learning_rate": 2.2829751634723786e-06, "loss": 0.6324, "step": 22591 }, { "epoch": 0.6924114257692779, "grad_norm": 0.46203284434595077, "learning_rate": 2.28255853257993e-06, "loss": 0.4151, "step": 22592 }, { "epoch": 0.6924420742920191, "grad_norm": 1.2115557354396813, "learning_rate": 2.2821419284631235e-06, "loss": 0.6343, "step": 22593 }, { "epoch": 0.6924727228147604, "grad_norm": 1.2310392821657437, "learning_rate": 2.2817253511260722e-06, "loss": 0.5471, "step": 22594 }, { "epoch": 0.6925033713375015, "grad_norm": 1.5273852330938336, "learning_rate": 2.281308800572876e-06, "loss": 0.6384, "step": 22595 }, { "epoch": 0.6925340198602428, "grad_norm": 1.1238239286514793, "learning_rate": 2.2808922768076387e-06, "loss": 0.5718, "step": 22596 }, { "epoch": 0.6925646683829839, "grad_norm": 1.6117797325024497, "learning_rate": 2.2804757798344646e-06, "loss": 0.631, "step": 22597 }, { "epoch": 0.6925953169057252, "grad_norm": 0.45455118324748, "learning_rate": 2.2800593096574607e-06, "loss": 0.415, "step": 22598 }, { "epoch": 0.6926259654284663, "grad_norm": 1.379267149367166, "learning_rate": 2.2796428662807262e-06, "loss": 0.6234, "step": 22599 }, { "epoch": 0.6926566139512076, "grad_norm": 1.2164177643301854, "learning_rate": 2.279226449708367e-06, "loss": 0.6293, "step": 22600 }, { "epoch": 0.6926872624739487, "grad_norm": 0.4780414670761552, "learning_rate": 2.2788100599444873e-06, "loss": 0.4, "step": 22601 }, { "epoch": 0.69271791099669, "grad_norm": 0.47127784475925943, "learning_rate": 2.278393696993187e-06, "loss": 0.414, "step": 22602 }, { "epoch": 0.6927485595194312, "grad_norm": 1.42932808631556, "learning_rate": 2.2779773608585713e-06, "loss": 0.6474, "step": 22603 }, { "epoch": 0.6927792080421724, "grad_norm": 1.4348510367846765, "learning_rate": 2.2775610515447373e-06, "loss": 0.6143, "step": 22604 }, { "epoch": 0.6928098565649136, "grad_norm": 1.3792902514561456, "learning_rate": 2.2771447690557948e-06, "loss": 0.6261, "step": 22605 }, { "epoch": 0.6928405050876548, "grad_norm": 1.3207418957291972, "learning_rate": 2.2767285133958415e-06, "loss": 0.5842, "step": 22606 }, { "epoch": 0.692871153610396, "grad_norm": 1.304909390229617, "learning_rate": 2.2763122845689772e-06, "loss": 0.594, "step": 22607 }, { "epoch": 0.6929018021331372, "grad_norm": 1.3939911026340184, "learning_rate": 2.2758960825793045e-06, "loss": 0.5803, "step": 22608 }, { "epoch": 0.6929324506558784, "grad_norm": 1.208335508461329, "learning_rate": 2.275479907430927e-06, "loss": 0.4896, "step": 22609 }, { "epoch": 0.6929630991786196, "grad_norm": 1.3164392918688888, "learning_rate": 2.2750637591279413e-06, "loss": 0.6814, "step": 22610 }, { "epoch": 0.6929937477013608, "grad_norm": 1.3756193133259629, "learning_rate": 2.2746476376744493e-06, "loss": 0.6649, "step": 22611 }, { "epoch": 0.693024396224102, "grad_norm": 0.4423501157935457, "learning_rate": 2.274231543074551e-06, "loss": 0.3961, "step": 22612 }, { "epoch": 0.6930550447468432, "grad_norm": 1.3637450427260438, "learning_rate": 2.2738154753323495e-06, "loss": 0.707, "step": 22613 }, { "epoch": 0.6930856932695844, "grad_norm": 1.1992100550404512, "learning_rate": 2.273399434451941e-06, "loss": 0.5799, "step": 22614 }, { "epoch": 0.6931163417923256, "grad_norm": 0.43783204108990603, "learning_rate": 2.272983420437422e-06, "loss": 0.3921, "step": 22615 }, { "epoch": 0.6931469903150668, "grad_norm": 1.3301046572242587, "learning_rate": 2.272567433292899e-06, "loss": 0.6532, "step": 22616 }, { "epoch": 0.693177638837808, "grad_norm": 1.1947747697179272, "learning_rate": 2.2721514730224664e-06, "loss": 0.6084, "step": 22617 }, { "epoch": 0.6932082873605492, "grad_norm": 1.2413999005217624, "learning_rate": 2.2717355396302214e-06, "loss": 0.702, "step": 22618 }, { "epoch": 0.6932389358832904, "grad_norm": 1.3208069674954102, "learning_rate": 2.271319633120265e-06, "loss": 0.5893, "step": 22619 }, { "epoch": 0.6932695844060316, "grad_norm": 1.154675623034392, "learning_rate": 2.270903753496694e-06, "loss": 0.5267, "step": 22620 }, { "epoch": 0.6933002329287729, "grad_norm": 1.3383362358000392, "learning_rate": 2.2704879007636077e-06, "loss": 0.6173, "step": 22621 }, { "epoch": 0.693330881451514, "grad_norm": 1.2354168706451298, "learning_rate": 2.2700720749251016e-06, "loss": 0.6305, "step": 22622 }, { "epoch": 0.6933615299742553, "grad_norm": 1.4571214418317722, "learning_rate": 2.2696562759852738e-06, "loss": 0.6912, "step": 22623 }, { "epoch": 0.6933921784969964, "grad_norm": 1.4812062742625163, "learning_rate": 2.2692405039482223e-06, "loss": 0.6422, "step": 22624 }, { "epoch": 0.6934228270197377, "grad_norm": 1.2351121678354051, "learning_rate": 2.2688247588180433e-06, "loss": 0.6276, "step": 22625 }, { "epoch": 0.6934534755424788, "grad_norm": 0.44806491201484666, "learning_rate": 2.2684090405988315e-06, "loss": 0.3872, "step": 22626 }, { "epoch": 0.6934841240652201, "grad_norm": 1.3264346290589963, "learning_rate": 2.2679933492946837e-06, "loss": 0.596, "step": 22627 }, { "epoch": 0.6935147725879612, "grad_norm": 1.3173143526032438, "learning_rate": 2.267577684909698e-06, "loss": 0.6653, "step": 22628 }, { "epoch": 0.6935454211107025, "grad_norm": 0.42036937620657544, "learning_rate": 2.267162047447967e-06, "loss": 0.411, "step": 22629 }, { "epoch": 0.6935760696334436, "grad_norm": 1.152791750174441, "learning_rate": 2.266746436913588e-06, "loss": 0.6016, "step": 22630 }, { "epoch": 0.6936067181561849, "grad_norm": 1.3524019819097453, "learning_rate": 2.2663308533106555e-06, "loss": 0.5663, "step": 22631 }, { "epoch": 0.6936373666789261, "grad_norm": 1.3330018017024974, "learning_rate": 2.265915296643266e-06, "loss": 0.5399, "step": 22632 }, { "epoch": 0.6936680152016673, "grad_norm": 1.154079265989565, "learning_rate": 2.2654997669155125e-06, "loss": 0.5216, "step": 22633 }, { "epoch": 0.6936986637244085, "grad_norm": 0.4437999820071251, "learning_rate": 2.2650842641314864e-06, "loss": 0.388, "step": 22634 }, { "epoch": 0.6937293122471497, "grad_norm": 1.3834633856023484, "learning_rate": 2.2646687882952884e-06, "loss": 0.6884, "step": 22635 }, { "epoch": 0.6937599607698909, "grad_norm": 1.2794531165690264, "learning_rate": 2.2642533394110082e-06, "loss": 0.5835, "step": 22636 }, { "epoch": 0.6937906092926321, "grad_norm": 1.1471289904438473, "learning_rate": 2.2638379174827385e-06, "loss": 0.5796, "step": 22637 }, { "epoch": 0.6938212578153733, "grad_norm": 6.967495689697416, "learning_rate": 2.2634225225145733e-06, "loss": 0.5772, "step": 22638 }, { "epoch": 0.6938519063381146, "grad_norm": 1.2563802044210712, "learning_rate": 2.2630071545106064e-06, "loss": 0.6411, "step": 22639 }, { "epoch": 0.6938825548608557, "grad_norm": 1.3066851087633546, "learning_rate": 2.2625918134749326e-06, "loss": 0.6417, "step": 22640 }, { "epoch": 0.693913203383597, "grad_norm": 0.4429854255677423, "learning_rate": 2.2621764994116395e-06, "loss": 0.3952, "step": 22641 }, { "epoch": 0.6939438519063381, "grad_norm": 1.216049188896219, "learning_rate": 2.2617612123248223e-06, "loss": 0.6462, "step": 22642 }, { "epoch": 0.6939745004290793, "grad_norm": 1.2547404916037128, "learning_rate": 2.2613459522185744e-06, "loss": 0.6406, "step": 22643 }, { "epoch": 0.6940051489518205, "grad_norm": 0.4594312568880338, "learning_rate": 2.2609307190969852e-06, "loss": 0.405, "step": 22644 }, { "epoch": 0.6940357974745617, "grad_norm": 1.3599329989834326, "learning_rate": 2.260515512964143e-06, "loss": 0.6531, "step": 22645 }, { "epoch": 0.6940664459973029, "grad_norm": 1.4570687242361842, "learning_rate": 2.260100333824146e-06, "loss": 0.6225, "step": 22646 }, { "epoch": 0.6940970945200441, "grad_norm": 1.2519649331797764, "learning_rate": 2.2596851816810815e-06, "loss": 0.5934, "step": 22647 }, { "epoch": 0.6941277430427854, "grad_norm": 1.2521710117529496, "learning_rate": 2.259270056539038e-06, "loss": 0.6576, "step": 22648 }, { "epoch": 0.6941583915655265, "grad_norm": 1.3134837859729942, "learning_rate": 2.258854958402108e-06, "loss": 0.5876, "step": 22649 }, { "epoch": 0.6941890400882678, "grad_norm": 1.4283219630877453, "learning_rate": 2.2584398872743817e-06, "loss": 0.662, "step": 22650 }, { "epoch": 0.6942196886110089, "grad_norm": 1.3691135894806292, "learning_rate": 2.25802484315995e-06, "loss": 0.6718, "step": 22651 }, { "epoch": 0.6942503371337502, "grad_norm": 0.4367175837090884, "learning_rate": 2.2576098260629e-06, "loss": 0.3652, "step": 22652 }, { "epoch": 0.6942809856564913, "grad_norm": 1.3532611238159162, "learning_rate": 2.2571948359873213e-06, "loss": 0.6273, "step": 22653 }, { "epoch": 0.6943116341792326, "grad_norm": 1.281366802568707, "learning_rate": 2.256779872937306e-06, "loss": 0.579, "step": 22654 }, { "epoch": 0.6943422827019737, "grad_norm": 1.3511782364526106, "learning_rate": 2.25636493691694e-06, "loss": 0.5932, "step": 22655 }, { "epoch": 0.694372931224715, "grad_norm": 1.5366252197559445, "learning_rate": 2.2559500279303087e-06, "loss": 0.6288, "step": 22656 }, { "epoch": 0.6944035797474561, "grad_norm": 0.44787708146264393, "learning_rate": 2.2555351459815076e-06, "loss": 0.4067, "step": 22657 }, { "epoch": 0.6944342282701974, "grad_norm": 1.2196903655169462, "learning_rate": 2.2551202910746196e-06, "loss": 0.6948, "step": 22658 }, { "epoch": 0.6944648767929386, "grad_norm": 1.3154200409225165, "learning_rate": 2.254705463213735e-06, "loss": 0.5692, "step": 22659 }, { "epoch": 0.6944955253156798, "grad_norm": 1.4425809894834527, "learning_rate": 2.254290662402938e-06, "loss": 0.6525, "step": 22660 }, { "epoch": 0.694526173838421, "grad_norm": 0.45198129082963034, "learning_rate": 2.2538758886463174e-06, "loss": 0.4083, "step": 22661 }, { "epoch": 0.6945568223611622, "grad_norm": 1.1464524865779064, "learning_rate": 2.253461141947963e-06, "loss": 0.6922, "step": 22662 }, { "epoch": 0.6945874708839034, "grad_norm": 1.343345779140577, "learning_rate": 2.253046422311956e-06, "loss": 0.562, "step": 22663 }, { "epoch": 0.6946181194066446, "grad_norm": 1.3289460668778228, "learning_rate": 2.252631729742386e-06, "loss": 0.587, "step": 22664 }, { "epoch": 0.6946487679293858, "grad_norm": 1.194622407744188, "learning_rate": 2.25221706424334e-06, "loss": 0.6473, "step": 22665 }, { "epoch": 0.694679416452127, "grad_norm": 1.5511458067411605, "learning_rate": 2.2518024258189004e-06, "loss": 0.6907, "step": 22666 }, { "epoch": 0.6947100649748682, "grad_norm": 1.3612153426634037, "learning_rate": 2.251387814473155e-06, "loss": 0.667, "step": 22667 }, { "epoch": 0.6947407134976095, "grad_norm": 1.2499850621314255, "learning_rate": 2.2509732302101906e-06, "loss": 0.5612, "step": 22668 }, { "epoch": 0.6947713620203506, "grad_norm": 1.2987877110806687, "learning_rate": 2.2505586730340884e-06, "loss": 0.5478, "step": 22669 }, { "epoch": 0.6948020105430919, "grad_norm": 1.2527279514213971, "learning_rate": 2.2501441429489366e-06, "loss": 0.5421, "step": 22670 }, { "epoch": 0.694832659065833, "grad_norm": 1.4029698970082516, "learning_rate": 2.2497296399588166e-06, "loss": 0.6199, "step": 22671 }, { "epoch": 0.6948633075885743, "grad_norm": 3.3057614130750332, "learning_rate": 2.249315164067814e-06, "loss": 0.6456, "step": 22672 }, { "epoch": 0.6948939561113154, "grad_norm": 1.3603182224830304, "learning_rate": 2.2489007152800146e-06, "loss": 0.5737, "step": 22673 }, { "epoch": 0.6949246046340566, "grad_norm": 1.33204152406571, "learning_rate": 2.248486293599499e-06, "loss": 0.541, "step": 22674 }, { "epoch": 0.6949552531567978, "grad_norm": 1.3210864644078661, "learning_rate": 2.2480718990303517e-06, "loss": 0.5572, "step": 22675 }, { "epoch": 0.694985901679539, "grad_norm": 1.282978980144615, "learning_rate": 2.247657531576658e-06, "loss": 0.5955, "step": 22676 }, { "epoch": 0.6950165502022803, "grad_norm": 1.4265810298521011, "learning_rate": 2.247243191242497e-06, "loss": 0.6541, "step": 22677 }, { "epoch": 0.6950471987250214, "grad_norm": 1.3317947924688063, "learning_rate": 2.246828878031955e-06, "loss": 0.5959, "step": 22678 }, { "epoch": 0.6950778472477627, "grad_norm": 0.4450020804843512, "learning_rate": 2.2464145919491105e-06, "loss": 0.3918, "step": 22679 }, { "epoch": 0.6951084957705038, "grad_norm": 1.257663310009858, "learning_rate": 2.246000332998047e-06, "loss": 0.569, "step": 22680 }, { "epoch": 0.6951391442932451, "grad_norm": 0.45711803250471167, "learning_rate": 2.2455861011828494e-06, "loss": 0.4142, "step": 22681 }, { "epoch": 0.6951697928159862, "grad_norm": 1.280822841486654, "learning_rate": 2.245171896507595e-06, "loss": 0.6004, "step": 22682 }, { "epoch": 0.6952004413387275, "grad_norm": 1.3275039530936281, "learning_rate": 2.2447577189763662e-06, "loss": 0.6267, "step": 22683 }, { "epoch": 0.6952310898614686, "grad_norm": 1.6299174751679713, "learning_rate": 2.244343568593247e-06, "loss": 0.6542, "step": 22684 }, { "epoch": 0.6952617383842099, "grad_norm": 1.3368866722230308, "learning_rate": 2.2439294453623135e-06, "loss": 0.6933, "step": 22685 }, { "epoch": 0.695292386906951, "grad_norm": 0.43461091077769975, "learning_rate": 2.2435153492876484e-06, "loss": 0.4096, "step": 22686 }, { "epoch": 0.6953230354296923, "grad_norm": 1.3815551748945998, "learning_rate": 2.2431012803733337e-06, "loss": 0.6275, "step": 22687 }, { "epoch": 0.6953536839524335, "grad_norm": 1.2957313216241861, "learning_rate": 2.2426872386234457e-06, "loss": 0.6091, "step": 22688 }, { "epoch": 0.6953843324751747, "grad_norm": 1.2138964495178737, "learning_rate": 2.2422732240420674e-06, "loss": 0.586, "step": 22689 }, { "epoch": 0.6954149809979159, "grad_norm": 1.3959556047532107, "learning_rate": 2.2418592366332753e-06, "loss": 0.5572, "step": 22690 }, { "epoch": 0.6954456295206571, "grad_norm": 1.2426131407156538, "learning_rate": 2.2414452764011495e-06, "loss": 0.625, "step": 22691 }, { "epoch": 0.6954762780433983, "grad_norm": 1.1492140149650498, "learning_rate": 2.241031343349771e-06, "loss": 0.5965, "step": 22692 }, { "epoch": 0.6955069265661395, "grad_norm": 1.339807035129339, "learning_rate": 2.2406174374832147e-06, "loss": 0.6511, "step": 22693 }, { "epoch": 0.6955375750888807, "grad_norm": 1.509930027334024, "learning_rate": 2.240203558805561e-06, "loss": 0.5956, "step": 22694 }, { "epoch": 0.695568223611622, "grad_norm": 1.3575403255269691, "learning_rate": 2.2397897073208897e-06, "loss": 0.6686, "step": 22695 }, { "epoch": 0.6955988721343631, "grad_norm": 1.2272153350224766, "learning_rate": 2.2393758830332744e-06, "loss": 0.541, "step": 22696 }, { "epoch": 0.6956295206571044, "grad_norm": 1.1196388824480419, "learning_rate": 2.238962085946795e-06, "loss": 0.6165, "step": 22697 }, { "epoch": 0.6956601691798455, "grad_norm": 1.442620206777313, "learning_rate": 2.238548316065531e-06, "loss": 0.6208, "step": 22698 }, { "epoch": 0.6956908177025868, "grad_norm": 1.2554666541458184, "learning_rate": 2.2381345733935545e-06, "loss": 0.5377, "step": 22699 }, { "epoch": 0.6957214662253279, "grad_norm": 1.2358330707988292, "learning_rate": 2.2377208579349464e-06, "loss": 0.686, "step": 22700 }, { "epoch": 0.6957521147480692, "grad_norm": 1.3855094752585144, "learning_rate": 2.23730716969378e-06, "loss": 0.6041, "step": 22701 }, { "epoch": 0.6957827632708103, "grad_norm": 1.2800303961962236, "learning_rate": 2.2368935086741326e-06, "loss": 0.6546, "step": 22702 }, { "epoch": 0.6958134117935516, "grad_norm": 1.3957976544732615, "learning_rate": 2.2364798748800826e-06, "loss": 0.6307, "step": 22703 }, { "epoch": 0.6958440603162928, "grad_norm": 1.3066209564167885, "learning_rate": 2.2360662683157016e-06, "loss": 0.5933, "step": 22704 }, { "epoch": 0.6958747088390339, "grad_norm": 1.2020037416758367, "learning_rate": 2.2356526889850666e-06, "loss": 0.6167, "step": 22705 }, { "epoch": 0.6959053573617752, "grad_norm": 0.4641267624314164, "learning_rate": 2.235239136892255e-06, "loss": 0.3951, "step": 22706 }, { "epoch": 0.6959360058845163, "grad_norm": 1.2274696836820125, "learning_rate": 2.234825612041338e-06, "loss": 0.5966, "step": 22707 }, { "epoch": 0.6959666544072576, "grad_norm": 1.519391611672243, "learning_rate": 2.2344121144363912e-06, "loss": 0.7078, "step": 22708 }, { "epoch": 0.6959973029299987, "grad_norm": 1.5391923602100586, "learning_rate": 2.2339986440814916e-06, "loss": 0.5831, "step": 22709 }, { "epoch": 0.69602795145274, "grad_norm": 1.510605203300031, "learning_rate": 2.233585200980709e-06, "loss": 0.6229, "step": 22710 }, { "epoch": 0.6960585999754811, "grad_norm": 0.4601651641308101, "learning_rate": 2.233171785138121e-06, "loss": 0.4066, "step": 22711 }, { "epoch": 0.6960892484982224, "grad_norm": 1.2302045557219756, "learning_rate": 2.2327583965577965e-06, "loss": 0.5993, "step": 22712 }, { "epoch": 0.6961198970209636, "grad_norm": 1.210213446230924, "learning_rate": 2.232345035243814e-06, "loss": 0.578, "step": 22713 }, { "epoch": 0.6961505455437048, "grad_norm": 1.3208420454863767, "learning_rate": 2.2319317012002452e-06, "loss": 0.6059, "step": 22714 }, { "epoch": 0.696181194066446, "grad_norm": 1.2096149944466272, "learning_rate": 2.231518394431159e-06, "loss": 0.5529, "step": 22715 }, { "epoch": 0.6962118425891872, "grad_norm": 1.2206171454537498, "learning_rate": 2.2311051149406303e-06, "loss": 0.4977, "step": 22716 }, { "epoch": 0.6962424911119284, "grad_norm": 0.4397450914991116, "learning_rate": 2.2306918627327335e-06, "loss": 0.4004, "step": 22717 }, { "epoch": 0.6962731396346696, "grad_norm": 1.3683978517361637, "learning_rate": 2.2302786378115367e-06, "loss": 0.717, "step": 22718 }, { "epoch": 0.6963037881574108, "grad_norm": 1.396392731879197, "learning_rate": 2.2298654401811126e-06, "loss": 0.6186, "step": 22719 }, { "epoch": 0.696334436680152, "grad_norm": 1.0716718285436821, "learning_rate": 2.2294522698455332e-06, "loss": 0.5275, "step": 22720 }, { "epoch": 0.6963650852028932, "grad_norm": 1.292772941285878, "learning_rate": 2.229039126808872e-06, "loss": 0.6349, "step": 22721 }, { "epoch": 0.6963957337256345, "grad_norm": 1.3380948469029605, "learning_rate": 2.2286260110751968e-06, "loss": 0.5981, "step": 22722 }, { "epoch": 0.6964263822483756, "grad_norm": 1.6478589218728086, "learning_rate": 2.2282129226485767e-06, "loss": 0.6257, "step": 22723 }, { "epoch": 0.6964570307711169, "grad_norm": 1.1543772117995657, "learning_rate": 2.227799861533084e-06, "loss": 0.555, "step": 22724 }, { "epoch": 0.696487679293858, "grad_norm": 1.3244962985949305, "learning_rate": 2.2273868277327896e-06, "loss": 0.5103, "step": 22725 }, { "epoch": 0.6965183278165993, "grad_norm": 0.48058826954327055, "learning_rate": 2.2269738212517617e-06, "loss": 0.4086, "step": 22726 }, { "epoch": 0.6965489763393404, "grad_norm": 1.3565373665479337, "learning_rate": 2.2265608420940694e-06, "loss": 0.5289, "step": 22727 }, { "epoch": 0.6965796248620817, "grad_norm": 1.2999300770850895, "learning_rate": 2.2261478902637847e-06, "loss": 0.6113, "step": 22728 }, { "epoch": 0.6966102733848228, "grad_norm": 1.3860791368874628, "learning_rate": 2.225734965764973e-06, "loss": 0.6754, "step": 22729 }, { "epoch": 0.6966409219075641, "grad_norm": 1.5871618566405536, "learning_rate": 2.2253220686017056e-06, "loss": 0.5871, "step": 22730 }, { "epoch": 0.6966715704303053, "grad_norm": 1.686701411876872, "learning_rate": 2.224909198778047e-06, "loss": 0.5875, "step": 22731 }, { "epoch": 0.6967022189530465, "grad_norm": 1.4264474521283246, "learning_rate": 2.2244963562980713e-06, "loss": 0.6792, "step": 22732 }, { "epoch": 0.6967328674757877, "grad_norm": 1.3452031585718665, "learning_rate": 2.2240835411658435e-06, "loss": 0.6033, "step": 22733 }, { "epoch": 0.6967635159985289, "grad_norm": 1.1697646599921068, "learning_rate": 2.2236707533854285e-06, "loss": 0.5586, "step": 22734 }, { "epoch": 0.6967941645212701, "grad_norm": 0.4524295593367027, "learning_rate": 2.2232579929608962e-06, "loss": 0.3901, "step": 22735 }, { "epoch": 0.6968248130440112, "grad_norm": 1.2079021027493737, "learning_rate": 2.222845259896315e-06, "loss": 0.6174, "step": 22736 }, { "epoch": 0.6968554615667525, "grad_norm": 1.1714185723630648, "learning_rate": 2.2224325541957483e-06, "loss": 0.5947, "step": 22737 }, { "epoch": 0.6968861100894936, "grad_norm": 1.2535682822140446, "learning_rate": 2.2220198758632645e-06, "loss": 0.6557, "step": 22738 }, { "epoch": 0.6969167586122349, "grad_norm": 1.4556007407526999, "learning_rate": 2.221607224902929e-06, "loss": 0.6616, "step": 22739 }, { "epoch": 0.696947407134976, "grad_norm": 1.3571544305729946, "learning_rate": 2.221194601318811e-06, "loss": 0.6188, "step": 22740 }, { "epoch": 0.6969780556577173, "grad_norm": 1.5402486332252887, "learning_rate": 2.2207820051149735e-06, "loss": 0.6151, "step": 22741 }, { "epoch": 0.6970087041804585, "grad_norm": 1.319670803867019, "learning_rate": 2.220369436295478e-06, "loss": 0.6453, "step": 22742 }, { "epoch": 0.6970393527031997, "grad_norm": 1.2614400069201304, "learning_rate": 2.219956894864397e-06, "loss": 0.6149, "step": 22743 }, { "epoch": 0.6970700012259409, "grad_norm": 1.4397256994033958, "learning_rate": 2.219544380825793e-06, "loss": 0.5443, "step": 22744 }, { "epoch": 0.6971006497486821, "grad_norm": 1.3173852974629245, "learning_rate": 2.219131894183727e-06, "loss": 0.5629, "step": 22745 }, { "epoch": 0.6971312982714233, "grad_norm": 0.45632971352064927, "learning_rate": 2.2187194349422666e-06, "loss": 0.3926, "step": 22746 }, { "epoch": 0.6971619467941645, "grad_norm": 1.2301170226313167, "learning_rate": 2.2183070031054748e-06, "loss": 0.6035, "step": 22747 }, { "epoch": 0.6971925953169057, "grad_norm": 0.45439502043600777, "learning_rate": 2.2178945986774176e-06, "loss": 0.4009, "step": 22748 }, { "epoch": 0.697223243839647, "grad_norm": 1.586684663754091, "learning_rate": 2.217482221662155e-06, "loss": 0.6241, "step": 22749 }, { "epoch": 0.6972538923623881, "grad_norm": 0.46395201842832057, "learning_rate": 2.217069872063752e-06, "loss": 0.4168, "step": 22750 }, { "epoch": 0.6972845408851294, "grad_norm": 1.3210830817735792, "learning_rate": 2.2166575498862734e-06, "loss": 0.6059, "step": 22751 }, { "epoch": 0.6973151894078705, "grad_norm": 1.2377565453063015, "learning_rate": 2.2162452551337804e-06, "loss": 0.6547, "step": 22752 }, { "epoch": 0.6973458379306118, "grad_norm": 1.2956205947794142, "learning_rate": 2.215832987810331e-06, "loss": 0.5358, "step": 22753 }, { "epoch": 0.6973764864533529, "grad_norm": 1.3686915482176272, "learning_rate": 2.215420747919996e-06, "loss": 0.6461, "step": 22754 }, { "epoch": 0.6974071349760942, "grad_norm": 1.396200372220882, "learning_rate": 2.2150085354668317e-06, "loss": 0.6456, "step": 22755 }, { "epoch": 0.6974377834988353, "grad_norm": 1.5154345058330925, "learning_rate": 2.2145963504548995e-06, "loss": 0.6174, "step": 22756 }, { "epoch": 0.6974684320215766, "grad_norm": 1.4385276651768748, "learning_rate": 2.2141841928882624e-06, "loss": 0.5589, "step": 22757 }, { "epoch": 0.6974990805443178, "grad_norm": 1.2570515437288399, "learning_rate": 2.2137720627709812e-06, "loss": 0.5927, "step": 22758 }, { "epoch": 0.697529729067059, "grad_norm": 1.2288828224389423, "learning_rate": 2.213359960107118e-06, "loss": 0.6007, "step": 22759 }, { "epoch": 0.6975603775898002, "grad_norm": 1.2989817364887488, "learning_rate": 2.212947884900731e-06, "loss": 0.5272, "step": 22760 }, { "epoch": 0.6975910261125414, "grad_norm": 1.4369771562103877, "learning_rate": 2.2125358371558815e-06, "loss": 0.6528, "step": 22761 }, { "epoch": 0.6976216746352826, "grad_norm": 1.2381369277958063, "learning_rate": 2.212123816876631e-06, "loss": 0.5609, "step": 22762 }, { "epoch": 0.6976523231580238, "grad_norm": 1.4709188081312463, "learning_rate": 2.211711824067038e-06, "loss": 0.6252, "step": 22763 }, { "epoch": 0.697682971680765, "grad_norm": 0.45387116806918154, "learning_rate": 2.2112998587311584e-06, "loss": 0.4052, "step": 22764 }, { "epoch": 0.6977136202035062, "grad_norm": 0.4301998699016369, "learning_rate": 2.210887920873058e-06, "loss": 0.3812, "step": 22765 }, { "epoch": 0.6977442687262474, "grad_norm": 0.44662530145650736, "learning_rate": 2.2104760104967915e-06, "loss": 0.3992, "step": 22766 }, { "epoch": 0.6977749172489885, "grad_norm": 1.4510501851343194, "learning_rate": 2.21006412760642e-06, "loss": 0.5742, "step": 22767 }, { "epoch": 0.6978055657717298, "grad_norm": 1.2229305045041001, "learning_rate": 2.2096522722059987e-06, "loss": 0.5415, "step": 22768 }, { "epoch": 0.697836214294471, "grad_norm": 0.43216406387137124, "learning_rate": 2.2092404442995872e-06, "loss": 0.3939, "step": 22769 }, { "epoch": 0.6978668628172122, "grad_norm": 1.31345217718454, "learning_rate": 2.208828643891246e-06, "loss": 0.5589, "step": 22770 }, { "epoch": 0.6978975113399534, "grad_norm": 1.4092540902553141, "learning_rate": 2.208416870985028e-06, "loss": 0.5786, "step": 22771 }, { "epoch": 0.6979281598626946, "grad_norm": 1.360075855311588, "learning_rate": 2.2080051255849933e-06, "loss": 0.6178, "step": 22772 }, { "epoch": 0.6979588083854358, "grad_norm": 1.460091366554385, "learning_rate": 2.2075934076952e-06, "loss": 0.5525, "step": 22773 }, { "epoch": 0.697989456908177, "grad_norm": 1.3723245422667416, "learning_rate": 2.2071817173197014e-06, "loss": 0.6443, "step": 22774 }, { "epoch": 0.6980201054309182, "grad_norm": 1.3548001545426747, "learning_rate": 2.2067700544625577e-06, "loss": 0.6255, "step": 22775 }, { "epoch": 0.6980507539536595, "grad_norm": 1.3330632006314083, "learning_rate": 2.2063584191278213e-06, "loss": 0.6766, "step": 22776 }, { "epoch": 0.6980814024764006, "grad_norm": 1.3622847150109725, "learning_rate": 2.205946811319551e-06, "loss": 0.62, "step": 22777 }, { "epoch": 0.6981120509991419, "grad_norm": 1.630375257180255, "learning_rate": 2.205535231041803e-06, "loss": 0.4914, "step": 22778 }, { "epoch": 0.698142699521883, "grad_norm": 1.359744089248209, "learning_rate": 2.2051236782986295e-06, "loss": 0.597, "step": 22779 }, { "epoch": 0.6981733480446243, "grad_norm": 1.5761647177771396, "learning_rate": 2.2047121530940873e-06, "loss": 0.6807, "step": 22780 }, { "epoch": 0.6982039965673654, "grad_norm": 0.45940983428061355, "learning_rate": 2.204300655432234e-06, "loss": 0.4062, "step": 22781 }, { "epoch": 0.6982346450901067, "grad_norm": 1.2604667385797041, "learning_rate": 2.2038891853171213e-06, "loss": 0.5298, "step": 22782 }, { "epoch": 0.6982652936128478, "grad_norm": 0.45609667804368614, "learning_rate": 2.2034777427527998e-06, "loss": 0.4068, "step": 22783 }, { "epoch": 0.6982959421355891, "grad_norm": 1.3333127269400156, "learning_rate": 2.2030663277433316e-06, "loss": 0.6594, "step": 22784 }, { "epoch": 0.6983265906583302, "grad_norm": 1.2578186973091514, "learning_rate": 2.2026549402927644e-06, "loss": 0.5508, "step": 22785 }, { "epoch": 0.6983572391810715, "grad_norm": 1.460906731515823, "learning_rate": 2.202243580405156e-06, "loss": 0.6428, "step": 22786 }, { "epoch": 0.6983878877038127, "grad_norm": 1.2019328870093235, "learning_rate": 2.2018322480845554e-06, "loss": 0.5295, "step": 22787 }, { "epoch": 0.6984185362265539, "grad_norm": 1.1646781328070464, "learning_rate": 2.201420943335018e-06, "loss": 0.5998, "step": 22788 }, { "epoch": 0.6984491847492951, "grad_norm": 1.454806822939011, "learning_rate": 2.2010096661605973e-06, "loss": 0.6248, "step": 22789 }, { "epoch": 0.6984798332720363, "grad_norm": 0.4367187494706194, "learning_rate": 2.200598416565343e-06, "loss": 0.3901, "step": 22790 }, { "epoch": 0.6985104817947775, "grad_norm": 1.1230726192798732, "learning_rate": 2.2001871945533087e-06, "loss": 0.562, "step": 22791 }, { "epoch": 0.6985411303175187, "grad_norm": 0.4355532678466632, "learning_rate": 2.1997760001285485e-06, "loss": 0.393, "step": 22792 }, { "epoch": 0.6985717788402599, "grad_norm": 1.4071267154084, "learning_rate": 2.19936483329511e-06, "loss": 0.6036, "step": 22793 }, { "epoch": 0.6986024273630012, "grad_norm": 1.2925703425807333, "learning_rate": 2.198953694057046e-06, "loss": 0.6441, "step": 22794 }, { "epoch": 0.6986330758857423, "grad_norm": 1.3252189111371326, "learning_rate": 2.1985425824184096e-06, "loss": 0.532, "step": 22795 }, { "epoch": 0.6986637244084836, "grad_norm": 1.2833067662780926, "learning_rate": 2.1981314983832484e-06, "loss": 0.7114, "step": 22796 }, { "epoch": 0.6986943729312247, "grad_norm": 0.4484151445251136, "learning_rate": 2.1977204419556163e-06, "loss": 0.3771, "step": 22797 }, { "epoch": 0.6987250214539659, "grad_norm": 1.3428407970971534, "learning_rate": 2.19730941313956e-06, "loss": 0.6595, "step": 22798 }, { "epoch": 0.6987556699767071, "grad_norm": 1.1931160104930054, "learning_rate": 2.1968984119391308e-06, "loss": 0.6627, "step": 22799 }, { "epoch": 0.6987863184994483, "grad_norm": 1.4223142297501195, "learning_rate": 2.1964874383583805e-06, "loss": 0.5864, "step": 22800 }, { "epoch": 0.6988169670221895, "grad_norm": 1.1933017525715215, "learning_rate": 2.1960764924013554e-06, "loss": 0.6296, "step": 22801 }, { "epoch": 0.6988476155449307, "grad_norm": 1.284522249108266, "learning_rate": 2.1956655740721056e-06, "loss": 0.6397, "step": 22802 }, { "epoch": 0.698878264067672, "grad_norm": 1.3674152919755267, "learning_rate": 2.1952546833746825e-06, "loss": 0.6859, "step": 22803 }, { "epoch": 0.6989089125904131, "grad_norm": 1.3208425252004488, "learning_rate": 2.1948438203131306e-06, "loss": 0.5487, "step": 22804 }, { "epoch": 0.6989395611131544, "grad_norm": 1.3595377386577123, "learning_rate": 2.194432984891501e-06, "loss": 0.6515, "step": 22805 }, { "epoch": 0.6989702096358955, "grad_norm": 1.4046187744714556, "learning_rate": 2.194022177113842e-06, "loss": 0.6631, "step": 22806 }, { "epoch": 0.6990008581586368, "grad_norm": 1.3845062726977198, "learning_rate": 2.193611396984199e-06, "loss": 0.5902, "step": 22807 }, { "epoch": 0.6990315066813779, "grad_norm": 0.44291769916113066, "learning_rate": 2.193200644506622e-06, "loss": 0.4047, "step": 22808 }, { "epoch": 0.6990621552041192, "grad_norm": 1.331779486368639, "learning_rate": 2.1927899196851564e-06, "loss": 0.5234, "step": 22809 }, { "epoch": 0.6990928037268603, "grad_norm": 1.3653962139154405, "learning_rate": 2.19237922252385e-06, "loss": 0.5726, "step": 22810 }, { "epoch": 0.6991234522496016, "grad_norm": 1.2991576231971138, "learning_rate": 2.1919685530267503e-06, "loss": 0.6168, "step": 22811 }, { "epoch": 0.6991541007723427, "grad_norm": 1.3177930667692945, "learning_rate": 2.191557911197902e-06, "loss": 0.6769, "step": 22812 }, { "epoch": 0.699184749295084, "grad_norm": 1.1258637939965763, "learning_rate": 2.1911472970413517e-06, "loss": 0.5665, "step": 22813 }, { "epoch": 0.6992153978178252, "grad_norm": 1.2856036634871133, "learning_rate": 2.1907367105611475e-06, "loss": 0.5907, "step": 22814 }, { "epoch": 0.6992460463405664, "grad_norm": 1.4979280014559178, "learning_rate": 2.1903261517613324e-06, "loss": 0.6205, "step": 22815 }, { "epoch": 0.6992766948633076, "grad_norm": 1.551837647959478, "learning_rate": 2.1899156206459515e-06, "loss": 0.6488, "step": 22816 }, { "epoch": 0.6993073433860488, "grad_norm": 1.355010299010644, "learning_rate": 2.1895051172190535e-06, "loss": 0.5523, "step": 22817 }, { "epoch": 0.69933799190879, "grad_norm": 1.3050680696500012, "learning_rate": 2.1890946414846785e-06, "loss": 0.6965, "step": 22818 }, { "epoch": 0.6993686404315312, "grad_norm": 1.387801668428515, "learning_rate": 2.188684193446875e-06, "loss": 0.5683, "step": 22819 }, { "epoch": 0.6993992889542724, "grad_norm": 1.2949683284739388, "learning_rate": 2.188273773109684e-06, "loss": 0.6317, "step": 22820 }, { "epoch": 0.6994299374770137, "grad_norm": 1.2895721804598694, "learning_rate": 2.1878633804771506e-06, "loss": 0.5914, "step": 22821 }, { "epoch": 0.6994605859997548, "grad_norm": 1.245271538180447, "learning_rate": 2.18745301555332e-06, "loss": 0.6183, "step": 22822 }, { "epoch": 0.6994912345224961, "grad_norm": 1.1955660311303369, "learning_rate": 2.187042678342234e-06, "loss": 0.6091, "step": 22823 }, { "epoch": 0.6995218830452372, "grad_norm": 1.639936983378328, "learning_rate": 2.186632368847935e-06, "loss": 0.6741, "step": 22824 }, { "epoch": 0.6995525315679785, "grad_norm": 1.2113379185901374, "learning_rate": 2.18622208707447e-06, "loss": 0.5931, "step": 22825 }, { "epoch": 0.6995831800907196, "grad_norm": 1.2502343832637495, "learning_rate": 2.185811833025876e-06, "loss": 0.6578, "step": 22826 }, { "epoch": 0.6996138286134609, "grad_norm": 1.4314124503708778, "learning_rate": 2.1854016067062006e-06, "loss": 0.5782, "step": 22827 }, { "epoch": 0.699644477136202, "grad_norm": 1.3597571995461732, "learning_rate": 2.184991408119481e-06, "loss": 0.6655, "step": 22828 }, { "epoch": 0.6996751256589432, "grad_norm": 1.2102649185983758, "learning_rate": 2.184581237269761e-06, "loss": 0.5908, "step": 22829 }, { "epoch": 0.6997057741816844, "grad_norm": 1.3602544215779397, "learning_rate": 2.184171094161085e-06, "loss": 0.6211, "step": 22830 }, { "epoch": 0.6997364227044256, "grad_norm": 1.1332107578372734, "learning_rate": 2.1837609787974894e-06, "loss": 0.5069, "step": 22831 }, { "epoch": 0.6997670712271669, "grad_norm": 0.4516398558294031, "learning_rate": 2.1833508911830175e-06, "loss": 0.3869, "step": 22832 }, { "epoch": 0.699797719749908, "grad_norm": 1.2710854347800857, "learning_rate": 2.182940831321712e-06, "loss": 0.6469, "step": 22833 }, { "epoch": 0.6998283682726493, "grad_norm": 1.3173378346867546, "learning_rate": 2.1825307992176094e-06, "loss": 0.6192, "step": 22834 }, { "epoch": 0.6998590167953904, "grad_norm": 0.4507318517592445, "learning_rate": 2.1821207948747513e-06, "loss": 0.3963, "step": 22835 }, { "epoch": 0.6998896653181317, "grad_norm": 1.6519943059057385, "learning_rate": 2.18171081829718e-06, "loss": 0.6639, "step": 22836 }, { "epoch": 0.6999203138408728, "grad_norm": 1.366074500294963, "learning_rate": 2.1813008694889314e-06, "loss": 0.6473, "step": 22837 }, { "epoch": 0.6999509623636141, "grad_norm": 1.2317127152258356, "learning_rate": 2.1808909484540486e-06, "loss": 0.6406, "step": 22838 }, { "epoch": 0.6999816108863552, "grad_norm": 1.3416242772908267, "learning_rate": 2.180481055196565e-06, "loss": 0.5676, "step": 22839 }, { "epoch": 0.7000122594090965, "grad_norm": 1.279118153740308, "learning_rate": 2.180071189720526e-06, "loss": 0.6314, "step": 22840 }, { "epoch": 0.7000429079318377, "grad_norm": 1.3251877081412287, "learning_rate": 2.1796613520299677e-06, "loss": 0.6685, "step": 22841 }, { "epoch": 0.7000735564545789, "grad_norm": 1.3486258122517736, "learning_rate": 2.179251542128925e-06, "loss": 0.6511, "step": 22842 }, { "epoch": 0.7001042049773201, "grad_norm": 1.3162272081981463, "learning_rate": 2.1788417600214398e-06, "loss": 0.6476, "step": 22843 }, { "epoch": 0.7001348535000613, "grad_norm": 0.4403102878767641, "learning_rate": 2.1784320057115493e-06, "loss": 0.3961, "step": 22844 }, { "epoch": 0.7001655020228025, "grad_norm": 1.1632403935796227, "learning_rate": 2.1780222792032885e-06, "loss": 0.5311, "step": 22845 }, { "epoch": 0.7001961505455437, "grad_norm": 1.3987878634337654, "learning_rate": 2.1776125805006964e-06, "loss": 0.6443, "step": 22846 }, { "epoch": 0.7002267990682849, "grad_norm": 1.3653642304059554, "learning_rate": 2.17720290960781e-06, "loss": 0.6363, "step": 22847 }, { "epoch": 0.7002574475910261, "grad_norm": 1.1779645047801663, "learning_rate": 2.176793266528667e-06, "loss": 0.55, "step": 22848 }, { "epoch": 0.7002880961137673, "grad_norm": 1.3732738233517827, "learning_rate": 2.1763836512673025e-06, "loss": 0.6942, "step": 22849 }, { "epoch": 0.7003187446365086, "grad_norm": 1.2656772042038265, "learning_rate": 2.1759740638277486e-06, "loss": 0.6204, "step": 22850 }, { "epoch": 0.7003493931592497, "grad_norm": 0.4662933461529565, "learning_rate": 2.175564504214049e-06, "loss": 0.4073, "step": 22851 }, { "epoch": 0.700380041681991, "grad_norm": 1.301139505306619, "learning_rate": 2.175154972430234e-06, "loss": 0.5928, "step": 22852 }, { "epoch": 0.7004106902047321, "grad_norm": 1.312266752485159, "learning_rate": 2.1747454684803387e-06, "loss": 0.6426, "step": 22853 }, { "epoch": 0.7004413387274734, "grad_norm": 1.3324856517739174, "learning_rate": 2.174335992368399e-06, "loss": 0.6005, "step": 22854 }, { "epoch": 0.7004719872502145, "grad_norm": 1.0959227002928758, "learning_rate": 2.1739265440984513e-06, "loss": 0.5211, "step": 22855 }, { "epoch": 0.7005026357729558, "grad_norm": 1.328805282968943, "learning_rate": 2.1735171236745275e-06, "loss": 0.5377, "step": 22856 }, { "epoch": 0.700533284295697, "grad_norm": 1.5897883241622097, "learning_rate": 2.1731077311006616e-06, "loss": 0.6388, "step": 22857 }, { "epoch": 0.7005639328184382, "grad_norm": 1.2688499247282823, "learning_rate": 2.172698366380889e-06, "loss": 0.6197, "step": 22858 }, { "epoch": 0.7005945813411794, "grad_norm": 1.2473827692058879, "learning_rate": 2.1722890295192446e-06, "loss": 0.5366, "step": 22859 }, { "epoch": 0.7006252298639205, "grad_norm": 1.2542472147345605, "learning_rate": 2.17187972051976e-06, "loss": 0.7064, "step": 22860 }, { "epoch": 0.7006558783866618, "grad_norm": 0.4536461022903579, "learning_rate": 2.1714704393864638e-06, "loss": 0.3887, "step": 22861 }, { "epoch": 0.7006865269094029, "grad_norm": 1.312561093868507, "learning_rate": 2.1710611861233977e-06, "loss": 0.6151, "step": 22862 }, { "epoch": 0.7007171754321442, "grad_norm": 1.3139403532722966, "learning_rate": 2.170651960734589e-06, "loss": 0.565, "step": 22863 }, { "epoch": 0.7007478239548853, "grad_norm": 1.2123778927168825, "learning_rate": 2.1702427632240684e-06, "loss": 0.62, "step": 22864 }, { "epoch": 0.7007784724776266, "grad_norm": 1.324997692182398, "learning_rate": 2.1698335935958705e-06, "loss": 0.6756, "step": 22865 }, { "epoch": 0.7008091210003677, "grad_norm": 1.270153931661183, "learning_rate": 2.169424451854026e-06, "loss": 0.5675, "step": 22866 }, { "epoch": 0.700839769523109, "grad_norm": 1.4808389235101542, "learning_rate": 2.1690153380025685e-06, "loss": 0.7115, "step": 22867 }, { "epoch": 0.7008704180458502, "grad_norm": 1.325394339411308, "learning_rate": 2.168606252045525e-06, "loss": 0.6147, "step": 22868 }, { "epoch": 0.7009010665685914, "grad_norm": 1.4500163994752342, "learning_rate": 2.1681971939869295e-06, "loss": 0.5246, "step": 22869 }, { "epoch": 0.7009317150913326, "grad_norm": 1.485134187893491, "learning_rate": 2.1677881638308124e-06, "loss": 0.5449, "step": 22870 }, { "epoch": 0.7009623636140738, "grad_norm": 1.353104878588726, "learning_rate": 2.167379161581204e-06, "loss": 0.5813, "step": 22871 }, { "epoch": 0.700993012136815, "grad_norm": 1.3947029123290113, "learning_rate": 2.1669701872421313e-06, "loss": 0.5658, "step": 22872 }, { "epoch": 0.7010236606595562, "grad_norm": 1.1868794103664224, "learning_rate": 2.166561240817626e-06, "loss": 0.5754, "step": 22873 }, { "epoch": 0.7010543091822974, "grad_norm": 0.45597001951594057, "learning_rate": 2.1661523223117176e-06, "loss": 0.401, "step": 22874 }, { "epoch": 0.7010849577050386, "grad_norm": 1.3631427918032355, "learning_rate": 2.1657434317284377e-06, "loss": 0.6461, "step": 22875 }, { "epoch": 0.7011156062277798, "grad_norm": 1.1608470780305005, "learning_rate": 2.165334569071811e-06, "loss": 0.6148, "step": 22876 }, { "epoch": 0.7011462547505211, "grad_norm": 1.2289793107681912, "learning_rate": 2.164925734345868e-06, "loss": 0.5706, "step": 22877 }, { "epoch": 0.7011769032732622, "grad_norm": 1.28818858521988, "learning_rate": 2.164516927554639e-06, "loss": 0.638, "step": 22878 }, { "epoch": 0.7012075517960035, "grad_norm": 0.45454573772943774, "learning_rate": 2.16410814870215e-06, "loss": 0.4087, "step": 22879 }, { "epoch": 0.7012382003187446, "grad_norm": 1.2007117056765237, "learning_rate": 2.163699397792426e-06, "loss": 0.5096, "step": 22880 }, { "epoch": 0.7012688488414859, "grad_norm": 1.2365118544732612, "learning_rate": 2.1632906748295006e-06, "loss": 0.6018, "step": 22881 }, { "epoch": 0.701299497364227, "grad_norm": 1.2991473845757635, "learning_rate": 2.1628819798173983e-06, "loss": 0.6676, "step": 22882 }, { "epoch": 0.7013301458869683, "grad_norm": 1.1242400718567291, "learning_rate": 2.1624733127601437e-06, "loss": 0.6052, "step": 22883 }, { "epoch": 0.7013607944097094, "grad_norm": 1.525899639891976, "learning_rate": 2.1620646736617658e-06, "loss": 0.6616, "step": 22884 }, { "epoch": 0.7013914429324507, "grad_norm": 1.2388019157170147, "learning_rate": 2.1616560625262904e-06, "loss": 0.6337, "step": 22885 }, { "epoch": 0.7014220914551919, "grad_norm": 1.363514038836463, "learning_rate": 2.1612474793577458e-06, "loss": 0.7524, "step": 22886 }, { "epoch": 0.7014527399779331, "grad_norm": 0.4300864063619581, "learning_rate": 2.160838924160155e-06, "loss": 0.3872, "step": 22887 }, { "epoch": 0.7014833885006743, "grad_norm": 0.46162162242368726, "learning_rate": 2.160430396937544e-06, "loss": 0.383, "step": 22888 }, { "epoch": 0.7015140370234155, "grad_norm": 1.1589067295828823, "learning_rate": 2.1600218976939413e-06, "loss": 0.4499, "step": 22889 }, { "epoch": 0.7015446855461567, "grad_norm": 1.535584701098058, "learning_rate": 2.159613426433369e-06, "loss": 0.6092, "step": 22890 }, { "epoch": 0.7015753340688978, "grad_norm": 1.339600634411207, "learning_rate": 2.1592049831598487e-06, "loss": 0.5925, "step": 22891 }, { "epoch": 0.7016059825916391, "grad_norm": 1.2924279197397905, "learning_rate": 2.1587965678774125e-06, "loss": 0.5674, "step": 22892 }, { "epoch": 0.7016366311143802, "grad_norm": 0.43967712859372204, "learning_rate": 2.1583881805900786e-06, "loss": 0.4009, "step": 22893 }, { "epoch": 0.7016672796371215, "grad_norm": 0.4348094511567908, "learning_rate": 2.157979821301875e-06, "loss": 0.3959, "step": 22894 }, { "epoch": 0.7016979281598626, "grad_norm": 1.2228534681454284, "learning_rate": 2.1575714900168217e-06, "loss": 0.4908, "step": 22895 }, { "epoch": 0.7017285766826039, "grad_norm": 0.43487284210912963, "learning_rate": 2.157163186738943e-06, "loss": 0.4132, "step": 22896 }, { "epoch": 0.7017592252053451, "grad_norm": 1.3298731979993843, "learning_rate": 2.156754911472265e-06, "loss": 0.4854, "step": 22897 }, { "epoch": 0.7017898737280863, "grad_norm": 1.3942415463383853, "learning_rate": 2.156346664220807e-06, "loss": 0.726, "step": 22898 }, { "epoch": 0.7018205222508275, "grad_norm": 1.3364211873136305, "learning_rate": 2.155938444988593e-06, "loss": 0.6244, "step": 22899 }, { "epoch": 0.7018511707735687, "grad_norm": 1.583039158361497, "learning_rate": 2.1555302537796463e-06, "loss": 0.6124, "step": 22900 }, { "epoch": 0.7018818192963099, "grad_norm": 1.5288031429901083, "learning_rate": 2.1551220905979864e-06, "loss": 0.5839, "step": 22901 }, { "epoch": 0.7019124678190511, "grad_norm": 1.4564230443002817, "learning_rate": 2.154713955447636e-06, "loss": 0.6296, "step": 22902 }, { "epoch": 0.7019431163417923, "grad_norm": 1.3622129082540935, "learning_rate": 2.154305848332619e-06, "loss": 0.5999, "step": 22903 }, { "epoch": 0.7019737648645336, "grad_norm": 1.3411219169276045, "learning_rate": 2.153897769256953e-06, "loss": 0.5304, "step": 22904 }, { "epoch": 0.7020044133872747, "grad_norm": 1.343210987445003, "learning_rate": 2.1534897182246623e-06, "loss": 0.6344, "step": 22905 }, { "epoch": 0.702035061910016, "grad_norm": 1.3155488474010257, "learning_rate": 2.1530816952397636e-06, "loss": 0.5891, "step": 22906 }, { "epoch": 0.7020657104327571, "grad_norm": 1.4486153896293632, "learning_rate": 2.15267370030628e-06, "loss": 0.6575, "step": 22907 }, { "epoch": 0.7020963589554984, "grad_norm": 0.46405105490510273, "learning_rate": 2.152265733428232e-06, "loss": 0.4109, "step": 22908 }, { "epoch": 0.7021270074782395, "grad_norm": 1.4126104508511752, "learning_rate": 2.151857794609637e-06, "loss": 0.6488, "step": 22909 }, { "epoch": 0.7021576560009808, "grad_norm": 1.5455007307058275, "learning_rate": 2.1514498838545157e-06, "loss": 0.6834, "step": 22910 }, { "epoch": 0.7021883045237219, "grad_norm": 1.3084700241384974, "learning_rate": 2.1510420011668892e-06, "loss": 0.6461, "step": 22911 }, { "epoch": 0.7022189530464632, "grad_norm": 1.427614403478681, "learning_rate": 2.1506341465507728e-06, "loss": 0.6047, "step": 22912 }, { "epoch": 0.7022496015692044, "grad_norm": 1.3921432668972806, "learning_rate": 2.150226320010188e-06, "loss": 0.5643, "step": 22913 }, { "epoch": 0.7022802500919456, "grad_norm": 1.333593026802843, "learning_rate": 2.1498185215491534e-06, "loss": 0.4823, "step": 22914 }, { "epoch": 0.7023108986146868, "grad_norm": 1.2832042637930408, "learning_rate": 2.149410751171685e-06, "loss": 0.59, "step": 22915 }, { "epoch": 0.702341547137428, "grad_norm": 1.5996520417438094, "learning_rate": 2.1490030088818032e-06, "loss": 0.6157, "step": 22916 }, { "epoch": 0.7023721956601692, "grad_norm": 1.335825910730004, "learning_rate": 2.1485952946835227e-06, "loss": 0.6024, "step": 22917 }, { "epoch": 0.7024028441829104, "grad_norm": 1.2692561967750124, "learning_rate": 2.148187608580862e-06, "loss": 0.5885, "step": 22918 }, { "epoch": 0.7024334927056516, "grad_norm": 1.759318694497241, "learning_rate": 2.1477799505778407e-06, "loss": 0.5464, "step": 22919 }, { "epoch": 0.7024641412283928, "grad_norm": 0.44369242598032704, "learning_rate": 2.147372320678471e-06, "loss": 0.3931, "step": 22920 }, { "epoch": 0.702494789751134, "grad_norm": 1.279410240057946, "learning_rate": 2.146964718886772e-06, "loss": 0.5669, "step": 22921 }, { "epoch": 0.7025254382738751, "grad_norm": 1.3770423986938023, "learning_rate": 2.1465571452067614e-06, "loss": 0.5846, "step": 22922 }, { "epoch": 0.7025560867966164, "grad_norm": 1.3859334244271997, "learning_rate": 2.1461495996424513e-06, "loss": 0.5737, "step": 22923 }, { "epoch": 0.7025867353193576, "grad_norm": 1.4028044012236114, "learning_rate": 2.145742082197862e-06, "loss": 0.5989, "step": 22924 }, { "epoch": 0.7026173838420988, "grad_norm": 1.6419528657506246, "learning_rate": 2.1453345928770037e-06, "loss": 0.5417, "step": 22925 }, { "epoch": 0.70264803236484, "grad_norm": 1.4149577311810528, "learning_rate": 2.144927131683894e-06, "loss": 0.7001, "step": 22926 }, { "epoch": 0.7026786808875812, "grad_norm": 1.1816367337729825, "learning_rate": 2.14451969862255e-06, "loss": 0.5205, "step": 22927 }, { "epoch": 0.7027093294103224, "grad_norm": 1.2130639067886941, "learning_rate": 2.1441122936969814e-06, "loss": 0.5644, "step": 22928 }, { "epoch": 0.7027399779330636, "grad_norm": 0.4710549486227835, "learning_rate": 2.1437049169112062e-06, "loss": 0.3914, "step": 22929 }, { "epoch": 0.7027706264558048, "grad_norm": 1.3401091804941383, "learning_rate": 2.1432975682692387e-06, "loss": 0.6848, "step": 22930 }, { "epoch": 0.702801274978546, "grad_norm": 0.45492581329091814, "learning_rate": 2.142890247775089e-06, "loss": 0.3783, "step": 22931 }, { "epoch": 0.7028319235012872, "grad_norm": 1.2308252451868014, "learning_rate": 2.142482955432773e-06, "loss": 0.5531, "step": 22932 }, { "epoch": 0.7028625720240285, "grad_norm": 1.3840316365886094, "learning_rate": 2.142075691246305e-06, "loss": 0.6785, "step": 22933 }, { "epoch": 0.7028932205467696, "grad_norm": 1.271325671414578, "learning_rate": 2.1416684552196947e-06, "loss": 0.5798, "step": 22934 }, { "epoch": 0.7029238690695109, "grad_norm": 1.360120720125257, "learning_rate": 2.141261247356959e-06, "loss": 0.6156, "step": 22935 }, { "epoch": 0.702954517592252, "grad_norm": 1.3037523263567563, "learning_rate": 2.1408540676621054e-06, "loss": 0.631, "step": 22936 }, { "epoch": 0.7029851661149933, "grad_norm": 1.4530429366414201, "learning_rate": 2.140446916139148e-06, "loss": 0.5852, "step": 22937 }, { "epoch": 0.7030158146377344, "grad_norm": 0.4350000641695215, "learning_rate": 2.140039792792101e-06, "loss": 0.3842, "step": 22938 }, { "epoch": 0.7030464631604757, "grad_norm": 1.3544331938288747, "learning_rate": 2.1396326976249716e-06, "loss": 0.5389, "step": 22939 }, { "epoch": 0.7030771116832168, "grad_norm": 1.4291454792158016, "learning_rate": 2.139225630641773e-06, "loss": 0.5948, "step": 22940 }, { "epoch": 0.7031077602059581, "grad_norm": 1.0919364387706303, "learning_rate": 2.1388185918465183e-06, "loss": 0.5321, "step": 22941 }, { "epoch": 0.7031384087286993, "grad_norm": 1.4679055878946798, "learning_rate": 2.1384115812432138e-06, "loss": 0.6894, "step": 22942 }, { "epoch": 0.7031690572514405, "grad_norm": 1.3556480357647003, "learning_rate": 2.138004598835872e-06, "loss": 0.6069, "step": 22943 }, { "epoch": 0.7031997057741817, "grad_norm": 0.4650811422094806, "learning_rate": 2.1375976446285057e-06, "loss": 0.3994, "step": 22944 }, { "epoch": 0.7032303542969229, "grad_norm": 1.3918583801623452, "learning_rate": 2.13719071862512e-06, "loss": 0.7024, "step": 22945 }, { "epoch": 0.7032610028196641, "grad_norm": 1.2249774546986194, "learning_rate": 2.1367838208297287e-06, "loss": 0.5743, "step": 22946 }, { "epoch": 0.7032916513424053, "grad_norm": 1.30510539046379, "learning_rate": 2.1363769512463357e-06, "loss": 0.6175, "step": 22947 }, { "epoch": 0.7033222998651465, "grad_norm": 0.4592504530367689, "learning_rate": 2.1359701098789558e-06, "loss": 0.3962, "step": 22948 }, { "epoch": 0.7033529483878878, "grad_norm": 1.212155044280294, "learning_rate": 2.1355632967315965e-06, "loss": 0.6023, "step": 22949 }, { "epoch": 0.7033835969106289, "grad_norm": 1.3718972333344124, "learning_rate": 2.1351565118082624e-06, "loss": 0.6054, "step": 22950 }, { "epoch": 0.7034142454333702, "grad_norm": 0.4691303834170563, "learning_rate": 2.1347497551129644e-06, "loss": 0.4214, "step": 22951 }, { "epoch": 0.7034448939561113, "grad_norm": 1.314735775893634, "learning_rate": 2.1343430266497116e-06, "loss": 0.6148, "step": 22952 }, { "epoch": 0.7034755424788525, "grad_norm": 1.3444679057618796, "learning_rate": 2.1339363264225084e-06, "loss": 0.6579, "step": 22953 }, { "epoch": 0.7035061910015937, "grad_norm": 1.3254815169464074, "learning_rate": 2.133529654435364e-06, "loss": 0.669, "step": 22954 }, { "epoch": 0.7035368395243349, "grad_norm": 1.2421174028609587, "learning_rate": 2.1331230106922857e-06, "loss": 0.6215, "step": 22955 }, { "epoch": 0.7035674880470761, "grad_norm": 1.4115595365082494, "learning_rate": 2.1327163951972814e-06, "loss": 0.6711, "step": 22956 }, { "epoch": 0.7035981365698173, "grad_norm": 1.3275788465598375, "learning_rate": 2.132309807954356e-06, "loss": 0.6669, "step": 22957 }, { "epoch": 0.7036287850925586, "grad_norm": 1.5057537226401019, "learning_rate": 2.131903248967512e-06, "loss": 0.6498, "step": 22958 }, { "epoch": 0.7036594336152997, "grad_norm": 1.335530173936653, "learning_rate": 2.131496718240763e-06, "loss": 0.5912, "step": 22959 }, { "epoch": 0.703690082138041, "grad_norm": 1.356868560951382, "learning_rate": 2.131090215778111e-06, "loss": 0.6911, "step": 22960 }, { "epoch": 0.7037207306607821, "grad_norm": 1.4422339211041595, "learning_rate": 2.130683741583559e-06, "loss": 0.6372, "step": 22961 }, { "epoch": 0.7037513791835234, "grad_norm": 1.3206570246004985, "learning_rate": 2.1302772956611144e-06, "loss": 0.5909, "step": 22962 }, { "epoch": 0.7037820277062645, "grad_norm": 1.2213721488736333, "learning_rate": 2.129870878014784e-06, "loss": 0.5555, "step": 22963 }, { "epoch": 0.7038126762290058, "grad_norm": 1.3038265139242984, "learning_rate": 2.1294644886485677e-06, "loss": 0.5733, "step": 22964 }, { "epoch": 0.7038433247517469, "grad_norm": 1.1920972247389765, "learning_rate": 2.129058127566473e-06, "loss": 0.6137, "step": 22965 }, { "epoch": 0.7038739732744882, "grad_norm": 1.4050442674611319, "learning_rate": 2.128651794772503e-06, "loss": 0.6859, "step": 22966 }, { "epoch": 0.7039046217972293, "grad_norm": 1.1572977218855056, "learning_rate": 2.1282454902706625e-06, "loss": 0.5701, "step": 22967 }, { "epoch": 0.7039352703199706, "grad_norm": 1.131020079746692, "learning_rate": 2.1278392140649547e-06, "loss": 0.5704, "step": 22968 }, { "epoch": 0.7039659188427118, "grad_norm": 1.3124732006279851, "learning_rate": 2.1274329661593795e-06, "loss": 0.5832, "step": 22969 }, { "epoch": 0.703996567365453, "grad_norm": 1.1837735175546584, "learning_rate": 2.127026746557943e-06, "loss": 0.6258, "step": 22970 }, { "epoch": 0.7040272158881942, "grad_norm": 1.5243530520275637, "learning_rate": 2.1266205552646485e-06, "loss": 0.5863, "step": 22971 }, { "epoch": 0.7040578644109354, "grad_norm": 1.269512188142275, "learning_rate": 2.1262143922834953e-06, "loss": 0.5962, "step": 22972 }, { "epoch": 0.7040885129336766, "grad_norm": 1.4110634476994748, "learning_rate": 2.1258082576184868e-06, "loss": 0.6797, "step": 22973 }, { "epoch": 0.7041191614564178, "grad_norm": 1.2613436682410524, "learning_rate": 2.125402151273625e-06, "loss": 0.5661, "step": 22974 }, { "epoch": 0.704149809979159, "grad_norm": 1.345136069011242, "learning_rate": 2.124996073252913e-06, "loss": 0.7188, "step": 22975 }, { "epoch": 0.7041804585019003, "grad_norm": 1.2681605771244098, "learning_rate": 2.1245900235603507e-06, "loss": 0.5455, "step": 22976 }, { "epoch": 0.7042111070246414, "grad_norm": 1.3349159820307623, "learning_rate": 2.124184002199934e-06, "loss": 0.7168, "step": 22977 }, { "epoch": 0.7042417555473827, "grad_norm": 1.1618083632823069, "learning_rate": 2.1237780091756726e-06, "loss": 0.5991, "step": 22978 }, { "epoch": 0.7042724040701238, "grad_norm": 1.4005646873517597, "learning_rate": 2.123372044491562e-06, "loss": 0.6422, "step": 22979 }, { "epoch": 0.7043030525928651, "grad_norm": 1.2932731067680059, "learning_rate": 2.1229661081516017e-06, "loss": 0.633, "step": 22980 }, { "epoch": 0.7043337011156062, "grad_norm": 1.3292138824993658, "learning_rate": 2.1225602001597918e-06, "loss": 0.5877, "step": 22981 }, { "epoch": 0.7043643496383475, "grad_norm": 1.4471438923886708, "learning_rate": 2.122154320520134e-06, "loss": 0.6811, "step": 22982 }, { "epoch": 0.7043949981610886, "grad_norm": 1.3558931120515563, "learning_rate": 2.1217484692366245e-06, "loss": 0.5901, "step": 22983 }, { "epoch": 0.7044256466838298, "grad_norm": 1.2765051294978949, "learning_rate": 2.121342646313264e-06, "loss": 0.61, "step": 22984 }, { "epoch": 0.704456295206571, "grad_norm": 1.2656240218320907, "learning_rate": 2.1209368517540506e-06, "loss": 0.64, "step": 22985 }, { "epoch": 0.7044869437293122, "grad_norm": 0.460099305296453, "learning_rate": 2.120531085562985e-06, "loss": 0.3931, "step": 22986 }, { "epoch": 0.7045175922520535, "grad_norm": 1.2983356641836197, "learning_rate": 2.120125347744063e-06, "loss": 0.5274, "step": 22987 }, { "epoch": 0.7045482407747946, "grad_norm": 0.4385121162617779, "learning_rate": 2.1197196383012795e-06, "loss": 0.4053, "step": 22988 }, { "epoch": 0.7045788892975359, "grad_norm": 1.4965275707135777, "learning_rate": 2.119313957238639e-06, "loss": 0.5672, "step": 22989 }, { "epoch": 0.704609537820277, "grad_norm": 1.5398231963486833, "learning_rate": 2.1189083045601355e-06, "loss": 0.6022, "step": 22990 }, { "epoch": 0.7046401863430183, "grad_norm": 1.2991011138919637, "learning_rate": 2.118502680269763e-06, "loss": 0.5601, "step": 22991 }, { "epoch": 0.7046708348657594, "grad_norm": 1.3707496298546349, "learning_rate": 2.1180970843715215e-06, "loss": 0.4873, "step": 22992 }, { "epoch": 0.7047014833885007, "grad_norm": 1.3340636622499755, "learning_rate": 2.1176915168694067e-06, "loss": 0.5844, "step": 22993 }, { "epoch": 0.7047321319112418, "grad_norm": 1.2353543605216937, "learning_rate": 2.1172859777674164e-06, "loss": 0.5869, "step": 22994 }, { "epoch": 0.7047627804339831, "grad_norm": 1.2218611604705407, "learning_rate": 2.116880467069543e-06, "loss": 0.5533, "step": 22995 }, { "epoch": 0.7047934289567243, "grad_norm": 1.4311289094905058, "learning_rate": 2.1164749847797843e-06, "loss": 0.6069, "step": 22996 }, { "epoch": 0.7048240774794655, "grad_norm": 0.4422162748768539, "learning_rate": 2.1160695309021373e-06, "loss": 0.404, "step": 22997 }, { "epoch": 0.7048547260022067, "grad_norm": 1.3780689076886394, "learning_rate": 2.1156641054405952e-06, "loss": 0.5974, "step": 22998 }, { "epoch": 0.7048853745249479, "grad_norm": 1.3488092759425778, "learning_rate": 2.1152587083991486e-06, "loss": 0.6107, "step": 22999 }, { "epoch": 0.7049160230476891, "grad_norm": 1.2815469714326673, "learning_rate": 2.1148533397818e-06, "loss": 0.542, "step": 23000 }, { "epoch": 0.7049466715704303, "grad_norm": 1.291154684548268, "learning_rate": 2.114447999592538e-06, "loss": 0.5355, "step": 23001 }, { "epoch": 0.7049773200931715, "grad_norm": 1.2094025004035813, "learning_rate": 2.114042687835359e-06, "loss": 0.5968, "step": 23002 }, { "epoch": 0.7050079686159128, "grad_norm": 1.3430772411209422, "learning_rate": 2.113637404514255e-06, "loss": 0.5697, "step": 23003 }, { "epoch": 0.7050386171386539, "grad_norm": 1.2458355528803864, "learning_rate": 2.11323214963322e-06, "loss": 0.6365, "step": 23004 }, { "epoch": 0.7050692656613952, "grad_norm": 1.584418812681678, "learning_rate": 2.1128269231962485e-06, "loss": 0.5353, "step": 23005 }, { "epoch": 0.7050999141841363, "grad_norm": 1.156426166951881, "learning_rate": 2.11242172520733e-06, "loss": 0.5788, "step": 23006 }, { "epoch": 0.7051305627068776, "grad_norm": 1.3940094464156614, "learning_rate": 2.1120165556704603e-06, "loss": 0.5848, "step": 23007 }, { "epoch": 0.7051612112296187, "grad_norm": 1.266204557573415, "learning_rate": 2.1116114145896314e-06, "loss": 0.6383, "step": 23008 }, { "epoch": 0.70519185975236, "grad_norm": 1.2757069091747408, "learning_rate": 2.1112063019688343e-06, "loss": 0.5448, "step": 23009 }, { "epoch": 0.7052225082751011, "grad_norm": 1.272524668873144, "learning_rate": 2.1108012178120575e-06, "loss": 0.6653, "step": 23010 }, { "epoch": 0.7052531567978424, "grad_norm": 1.4492393927444447, "learning_rate": 2.1103961621232988e-06, "loss": 0.5497, "step": 23011 }, { "epoch": 0.7052838053205835, "grad_norm": 0.4557821831352727, "learning_rate": 2.1099911349065437e-06, "loss": 0.4247, "step": 23012 }, { "epoch": 0.7053144538433248, "grad_norm": 1.3494729109029797, "learning_rate": 2.1095861361657883e-06, "loss": 0.6261, "step": 23013 }, { "epoch": 0.705345102366066, "grad_norm": 0.46258698321238434, "learning_rate": 2.1091811659050177e-06, "loss": 0.4006, "step": 23014 }, { "epoch": 0.7053757508888071, "grad_norm": 1.1834014256178522, "learning_rate": 2.1087762241282245e-06, "loss": 0.614, "step": 23015 }, { "epoch": 0.7054063994115484, "grad_norm": 0.43964448203294226, "learning_rate": 2.1083713108394015e-06, "loss": 0.4085, "step": 23016 }, { "epoch": 0.7054370479342895, "grad_norm": 1.4897098500037742, "learning_rate": 2.1079664260425337e-06, "loss": 0.6264, "step": 23017 }, { "epoch": 0.7054676964570308, "grad_norm": 1.2733254016841866, "learning_rate": 2.1075615697416123e-06, "loss": 0.4988, "step": 23018 }, { "epoch": 0.7054983449797719, "grad_norm": 1.1535132900835472, "learning_rate": 2.1071567419406293e-06, "loss": 0.5925, "step": 23019 }, { "epoch": 0.7055289935025132, "grad_norm": 1.3188364706914377, "learning_rate": 2.1067519426435683e-06, "loss": 0.7011, "step": 23020 }, { "epoch": 0.7055596420252543, "grad_norm": 0.44741308544326325, "learning_rate": 2.106347171854423e-06, "loss": 0.4084, "step": 23021 }, { "epoch": 0.7055902905479956, "grad_norm": 0.43880261884688104, "learning_rate": 2.105942429577178e-06, "loss": 0.387, "step": 23022 }, { "epoch": 0.7056209390707368, "grad_norm": 1.323501132366641, "learning_rate": 2.1055377158158224e-06, "loss": 0.5477, "step": 23023 }, { "epoch": 0.705651587593478, "grad_norm": 0.4323092177138968, "learning_rate": 2.105133030574346e-06, "loss": 0.3903, "step": 23024 }, { "epoch": 0.7056822361162192, "grad_norm": 1.5587739097335485, "learning_rate": 2.1047283738567326e-06, "loss": 0.6989, "step": 23025 }, { "epoch": 0.7057128846389604, "grad_norm": 1.4322671396732374, "learning_rate": 2.104323745666972e-06, "loss": 0.5783, "step": 23026 }, { "epoch": 0.7057435331617016, "grad_norm": 1.8435448863328852, "learning_rate": 2.1039191460090515e-06, "loss": 0.6421, "step": 23027 }, { "epoch": 0.7057741816844428, "grad_norm": 1.2812022865905954, "learning_rate": 2.1035145748869553e-06, "loss": 0.5959, "step": 23028 }, { "epoch": 0.705804830207184, "grad_norm": 1.394314160519808, "learning_rate": 2.1031100323046703e-06, "loss": 0.5711, "step": 23029 }, { "epoch": 0.7058354787299252, "grad_norm": 1.2242527794069977, "learning_rate": 2.102705518266186e-06, "loss": 0.6409, "step": 23030 }, { "epoch": 0.7058661272526664, "grad_norm": 1.2584741054109012, "learning_rate": 2.1023010327754833e-06, "loss": 0.5943, "step": 23031 }, { "epoch": 0.7058967757754077, "grad_norm": 1.2546987650966368, "learning_rate": 2.101896575836552e-06, "loss": 0.661, "step": 23032 }, { "epoch": 0.7059274242981488, "grad_norm": 1.3968501660710844, "learning_rate": 2.1014921474533732e-06, "loss": 0.6278, "step": 23033 }, { "epoch": 0.7059580728208901, "grad_norm": 1.1316359038104344, "learning_rate": 2.101087747629934e-06, "loss": 0.5377, "step": 23034 }, { "epoch": 0.7059887213436312, "grad_norm": 1.3467849583760358, "learning_rate": 2.1006833763702206e-06, "loss": 0.5878, "step": 23035 }, { "epoch": 0.7060193698663725, "grad_norm": 0.45829552658468237, "learning_rate": 2.1002790336782143e-06, "loss": 0.3936, "step": 23036 }, { "epoch": 0.7060500183891136, "grad_norm": 0.4414493116520982, "learning_rate": 2.0998747195579007e-06, "loss": 0.3963, "step": 23037 }, { "epoch": 0.7060806669118549, "grad_norm": 0.4531446964159389, "learning_rate": 2.099470434013265e-06, "loss": 0.4101, "step": 23038 }, { "epoch": 0.706111315434596, "grad_norm": 1.5013450259309196, "learning_rate": 2.099066177048287e-06, "loss": 0.7521, "step": 23039 }, { "epoch": 0.7061419639573373, "grad_norm": 1.3892914451651903, "learning_rate": 2.098661948666953e-06, "loss": 0.5979, "step": 23040 }, { "epoch": 0.7061726124800785, "grad_norm": 1.3187789493656825, "learning_rate": 2.0982577488732464e-06, "loss": 0.5347, "step": 23041 }, { "epoch": 0.7062032610028197, "grad_norm": 1.3035408732160219, "learning_rate": 2.097853577671147e-06, "loss": 0.6407, "step": 23042 }, { "epoch": 0.7062339095255609, "grad_norm": 1.4377710867506348, "learning_rate": 2.0974494350646408e-06, "loss": 0.6186, "step": 23043 }, { "epoch": 0.7062645580483021, "grad_norm": 1.231713885722842, "learning_rate": 2.0970453210577058e-06, "loss": 0.578, "step": 23044 }, { "epoch": 0.7062952065710433, "grad_norm": 0.41787476826669406, "learning_rate": 2.0966412356543263e-06, "loss": 0.3789, "step": 23045 }, { "epoch": 0.7063258550937844, "grad_norm": 1.3648745626081527, "learning_rate": 2.096237178858485e-06, "loss": 0.639, "step": 23046 }, { "epoch": 0.7063565036165257, "grad_norm": 1.3640448135527272, "learning_rate": 2.095833150674161e-06, "loss": 0.6324, "step": 23047 }, { "epoch": 0.7063871521392668, "grad_norm": 1.2058836660930772, "learning_rate": 2.0954291511053347e-06, "loss": 0.6096, "step": 23048 }, { "epoch": 0.7064178006620081, "grad_norm": 1.2546334317390748, "learning_rate": 2.0950251801559906e-06, "loss": 0.5707, "step": 23049 }, { "epoch": 0.7064484491847492, "grad_norm": 1.3023461217285157, "learning_rate": 2.094621237830105e-06, "loss": 0.6783, "step": 23050 }, { "epoch": 0.7064790977074905, "grad_norm": 1.299909313392836, "learning_rate": 2.0942173241316594e-06, "loss": 0.6339, "step": 23051 }, { "epoch": 0.7065097462302317, "grad_norm": 1.4266422509119885, "learning_rate": 2.0938134390646357e-06, "loss": 0.6047, "step": 23052 }, { "epoch": 0.7065403947529729, "grad_norm": 1.3727511470366278, "learning_rate": 2.09340958263301e-06, "loss": 0.6516, "step": 23053 }, { "epoch": 0.7065710432757141, "grad_norm": 0.4759051188427842, "learning_rate": 2.0930057548407658e-06, "loss": 0.4096, "step": 23054 }, { "epoch": 0.7066016917984553, "grad_norm": 1.3660374759551657, "learning_rate": 2.0926019556918774e-06, "loss": 0.6468, "step": 23055 }, { "epoch": 0.7066323403211965, "grad_norm": 1.3568190104530018, "learning_rate": 2.0921981851903255e-06, "loss": 0.5758, "step": 23056 }, { "epoch": 0.7066629888439377, "grad_norm": 0.4369829731224107, "learning_rate": 2.0917944433400912e-06, "loss": 0.4145, "step": 23057 }, { "epoch": 0.7066936373666789, "grad_norm": 1.4693029129156474, "learning_rate": 2.0913907301451485e-06, "loss": 0.5977, "step": 23058 }, { "epoch": 0.7067242858894202, "grad_norm": 1.6184720569663174, "learning_rate": 2.0909870456094765e-06, "loss": 0.6497, "step": 23059 }, { "epoch": 0.7067549344121613, "grad_norm": 1.4143858529925004, "learning_rate": 2.090583389737056e-06, "loss": 0.6658, "step": 23060 }, { "epoch": 0.7067855829349026, "grad_norm": 0.4333330652092656, "learning_rate": 2.09017976253186e-06, "loss": 0.3853, "step": 23061 }, { "epoch": 0.7068162314576437, "grad_norm": 1.3359320623296662, "learning_rate": 2.089776163997867e-06, "loss": 0.5815, "step": 23062 }, { "epoch": 0.706846879980385, "grad_norm": 1.4072660738573892, "learning_rate": 2.089372594139056e-06, "loss": 0.6457, "step": 23063 }, { "epoch": 0.7068775285031261, "grad_norm": 1.3136858891354843, "learning_rate": 2.0889690529593993e-06, "loss": 0.6438, "step": 23064 }, { "epoch": 0.7069081770258674, "grad_norm": 1.2151761003628008, "learning_rate": 2.0885655404628774e-06, "loss": 0.5612, "step": 23065 }, { "epoch": 0.7069388255486085, "grad_norm": 1.4262130868589997, "learning_rate": 2.088162056653462e-06, "loss": 0.7825, "step": 23066 }, { "epoch": 0.7069694740713498, "grad_norm": 0.4437998117269984, "learning_rate": 2.0877586015351315e-06, "loss": 0.4043, "step": 23067 }, { "epoch": 0.707000122594091, "grad_norm": 1.2873859787562754, "learning_rate": 2.0873551751118624e-06, "loss": 0.5676, "step": 23068 }, { "epoch": 0.7070307711168322, "grad_norm": 0.4463588220390986, "learning_rate": 2.086951777387626e-06, "loss": 0.4003, "step": 23069 }, { "epoch": 0.7070614196395734, "grad_norm": 1.5240863171452657, "learning_rate": 2.086548408366399e-06, "loss": 0.5839, "step": 23070 }, { "epoch": 0.7070920681623146, "grad_norm": 1.3001497629742373, "learning_rate": 2.0861450680521576e-06, "loss": 0.507, "step": 23071 }, { "epoch": 0.7071227166850558, "grad_norm": 1.337411878005777, "learning_rate": 2.085741756448873e-06, "loss": 0.6522, "step": 23072 }, { "epoch": 0.707153365207797, "grad_norm": 1.3878609900642032, "learning_rate": 2.0853384735605227e-06, "loss": 0.6577, "step": 23073 }, { "epoch": 0.7071840137305382, "grad_norm": 1.2138570529808996, "learning_rate": 2.084935219391074e-06, "loss": 0.527, "step": 23074 }, { "epoch": 0.7072146622532794, "grad_norm": 0.4514935737561712, "learning_rate": 2.0845319939445074e-06, "loss": 0.3906, "step": 23075 }, { "epoch": 0.7072453107760206, "grad_norm": 1.3687241428809547, "learning_rate": 2.0841287972247935e-06, "loss": 0.562, "step": 23076 }, { "epoch": 0.7072759592987617, "grad_norm": 1.1824403789882825, "learning_rate": 2.083725629235903e-06, "loss": 0.6184, "step": 23077 }, { "epoch": 0.707306607821503, "grad_norm": 1.3176763457310356, "learning_rate": 2.0833224899818105e-06, "loss": 0.5886, "step": 23078 }, { "epoch": 0.7073372563442442, "grad_norm": 1.3057636436497455, "learning_rate": 2.082919379466489e-06, "loss": 0.6543, "step": 23079 }, { "epoch": 0.7073679048669854, "grad_norm": 1.1441884996021672, "learning_rate": 2.0825162976939077e-06, "loss": 0.5778, "step": 23080 }, { "epoch": 0.7073985533897266, "grad_norm": 1.2185437449794931, "learning_rate": 2.0821132446680393e-06, "loss": 0.5467, "step": 23081 }, { "epoch": 0.7074292019124678, "grad_norm": 1.2834303368402709, "learning_rate": 2.081710220392856e-06, "loss": 0.6898, "step": 23082 }, { "epoch": 0.707459850435209, "grad_norm": 1.4374465710213644, "learning_rate": 2.0813072248723303e-06, "loss": 0.6078, "step": 23083 }, { "epoch": 0.7074904989579502, "grad_norm": 1.4525454353162168, "learning_rate": 2.0809042581104318e-06, "loss": 0.6136, "step": 23084 }, { "epoch": 0.7075211474806914, "grad_norm": 0.465134826484396, "learning_rate": 2.0805013201111264e-06, "loss": 0.3858, "step": 23085 }, { "epoch": 0.7075517960034327, "grad_norm": 2.116267274191187, "learning_rate": 2.0800984108783924e-06, "loss": 0.5986, "step": 23086 }, { "epoch": 0.7075824445261738, "grad_norm": 1.3127752517469224, "learning_rate": 2.0796955304161954e-06, "loss": 0.6192, "step": 23087 }, { "epoch": 0.7076130930489151, "grad_norm": 1.3348985983587918, "learning_rate": 2.079292678728504e-06, "loss": 0.5089, "step": 23088 }, { "epoch": 0.7076437415716562, "grad_norm": 0.4768560158267844, "learning_rate": 2.0788898558192887e-06, "loss": 0.3999, "step": 23089 }, { "epoch": 0.7076743900943975, "grad_norm": 1.2793188713442447, "learning_rate": 2.078487061692521e-06, "loss": 0.6695, "step": 23090 }, { "epoch": 0.7077050386171386, "grad_norm": 1.354131600514876, "learning_rate": 2.0780842963521665e-06, "loss": 0.6447, "step": 23091 }, { "epoch": 0.7077356871398799, "grad_norm": 1.2813799585539476, "learning_rate": 2.077681559802195e-06, "loss": 0.6416, "step": 23092 }, { "epoch": 0.707766335662621, "grad_norm": 1.3699726151544496, "learning_rate": 2.077278852046574e-06, "loss": 0.6406, "step": 23093 }, { "epoch": 0.7077969841853623, "grad_norm": 1.2801202358161223, "learning_rate": 2.076876173089275e-06, "loss": 0.5882, "step": 23094 }, { "epoch": 0.7078276327081034, "grad_norm": 1.3715131025738683, "learning_rate": 2.0764735229342623e-06, "loss": 0.6483, "step": 23095 }, { "epoch": 0.7078582812308447, "grad_norm": 1.2678063657612932, "learning_rate": 2.0760709015855006e-06, "loss": 0.6347, "step": 23096 }, { "epoch": 0.7078889297535859, "grad_norm": 1.2704684748507933, "learning_rate": 2.075668309046964e-06, "loss": 0.5711, "step": 23097 }, { "epoch": 0.7079195782763271, "grad_norm": 1.1829904421849087, "learning_rate": 2.0752657453226162e-06, "loss": 0.6854, "step": 23098 }, { "epoch": 0.7079502267990683, "grad_norm": 0.4708493703703131, "learning_rate": 2.0748632104164213e-06, "loss": 0.4145, "step": 23099 }, { "epoch": 0.7079808753218095, "grad_norm": 1.3560792543731415, "learning_rate": 2.0744607043323477e-06, "loss": 0.5865, "step": 23100 }, { "epoch": 0.7080115238445507, "grad_norm": 1.273724838919772, "learning_rate": 2.074058227074361e-06, "loss": 0.6511, "step": 23101 }, { "epoch": 0.7080421723672919, "grad_norm": 1.1838711830268813, "learning_rate": 2.073655778646429e-06, "loss": 0.4746, "step": 23102 }, { "epoch": 0.7080728208900331, "grad_norm": 1.2781891383001447, "learning_rate": 2.073253359052514e-06, "loss": 0.5263, "step": 23103 }, { "epoch": 0.7081034694127744, "grad_norm": 1.1393095079186129, "learning_rate": 2.072850968296582e-06, "loss": 0.5983, "step": 23104 }, { "epoch": 0.7081341179355155, "grad_norm": 0.46542619798406254, "learning_rate": 2.0724486063826003e-06, "loss": 0.4092, "step": 23105 }, { "epoch": 0.7081647664582568, "grad_norm": 1.3192634097737679, "learning_rate": 2.072046273314532e-06, "loss": 0.6899, "step": 23106 }, { "epoch": 0.7081954149809979, "grad_norm": 1.3350641057349137, "learning_rate": 2.0716439690963385e-06, "loss": 0.5892, "step": 23107 }, { "epoch": 0.7082260635037391, "grad_norm": 1.3274530977627474, "learning_rate": 2.071241693731986e-06, "loss": 0.5811, "step": 23108 }, { "epoch": 0.7082567120264803, "grad_norm": 1.4366482437616932, "learning_rate": 2.0708394472254397e-06, "loss": 0.5052, "step": 23109 }, { "epoch": 0.7082873605492215, "grad_norm": 1.3247167717211152, "learning_rate": 2.0704372295806622e-06, "loss": 0.6149, "step": 23110 }, { "epoch": 0.7083180090719627, "grad_norm": 0.44996284811760845, "learning_rate": 2.070035040801615e-06, "loss": 0.4175, "step": 23111 }, { "epoch": 0.7083486575947039, "grad_norm": 1.2336714654626404, "learning_rate": 2.0696328808922623e-06, "loss": 0.5853, "step": 23112 }, { "epoch": 0.7083793061174452, "grad_norm": 1.3071340685179649, "learning_rate": 2.0692307498565685e-06, "loss": 0.6239, "step": 23113 }, { "epoch": 0.7084099546401863, "grad_norm": 0.43698352603744073, "learning_rate": 2.068828647698492e-06, "loss": 0.3857, "step": 23114 }, { "epoch": 0.7084406031629276, "grad_norm": 1.247891712727915, "learning_rate": 2.0684265744219965e-06, "loss": 0.6083, "step": 23115 }, { "epoch": 0.7084712516856687, "grad_norm": 0.44987392707611945, "learning_rate": 2.0680245300310465e-06, "loss": 0.4019, "step": 23116 }, { "epoch": 0.70850190020841, "grad_norm": 0.4362805054297751, "learning_rate": 2.0676225145296e-06, "loss": 0.411, "step": 23117 }, { "epoch": 0.7085325487311511, "grad_norm": 1.378655698917323, "learning_rate": 2.0672205279216183e-06, "loss": 0.5957, "step": 23118 }, { "epoch": 0.7085631972538924, "grad_norm": 0.448108438446749, "learning_rate": 2.0668185702110633e-06, "loss": 0.4, "step": 23119 }, { "epoch": 0.7085938457766335, "grad_norm": 1.5798189823329258, "learning_rate": 2.066416641401894e-06, "loss": 0.6977, "step": 23120 }, { "epoch": 0.7086244942993748, "grad_norm": 1.3225894843545458, "learning_rate": 2.066014741498075e-06, "loss": 0.6787, "step": 23121 }, { "epoch": 0.708655142822116, "grad_norm": 1.369249156365262, "learning_rate": 2.065612870503562e-06, "loss": 0.5629, "step": 23122 }, { "epoch": 0.7086857913448572, "grad_norm": 1.5613855503535818, "learning_rate": 2.0652110284223153e-06, "loss": 0.5848, "step": 23123 }, { "epoch": 0.7087164398675984, "grad_norm": 0.44887992396278914, "learning_rate": 2.064809215258298e-06, "loss": 0.3962, "step": 23124 }, { "epoch": 0.7087470883903396, "grad_norm": 1.1905869478284565, "learning_rate": 2.0644074310154656e-06, "loss": 0.5588, "step": 23125 }, { "epoch": 0.7087777369130808, "grad_norm": 0.45182221087742197, "learning_rate": 2.0640056756977743e-06, "loss": 0.3874, "step": 23126 }, { "epoch": 0.708808385435822, "grad_norm": 1.4582416721606182, "learning_rate": 2.06360394930919e-06, "loss": 0.6251, "step": 23127 }, { "epoch": 0.7088390339585632, "grad_norm": 0.41821706816596343, "learning_rate": 2.063202251853666e-06, "loss": 0.3646, "step": 23128 }, { "epoch": 0.7088696824813044, "grad_norm": 1.3957342425234334, "learning_rate": 2.0628005833351634e-06, "loss": 0.5638, "step": 23129 }, { "epoch": 0.7089003310040456, "grad_norm": 1.3285644063440478, "learning_rate": 2.062398943757636e-06, "loss": 0.6623, "step": 23130 }, { "epoch": 0.7089309795267869, "grad_norm": 1.367059779340694, "learning_rate": 2.061997333125043e-06, "loss": 0.6642, "step": 23131 }, { "epoch": 0.708961628049528, "grad_norm": 1.61609698404632, "learning_rate": 2.0615957514413446e-06, "loss": 0.6943, "step": 23132 }, { "epoch": 0.7089922765722693, "grad_norm": 0.47003729753923545, "learning_rate": 2.0611941987104927e-06, "loss": 0.3995, "step": 23133 }, { "epoch": 0.7090229250950104, "grad_norm": 1.5211157455474138, "learning_rate": 2.0607926749364467e-06, "loss": 0.6794, "step": 23134 }, { "epoch": 0.7090535736177517, "grad_norm": 1.145978120499128, "learning_rate": 2.060391180123164e-06, "loss": 0.6164, "step": 23135 }, { "epoch": 0.7090842221404928, "grad_norm": 1.397886532656355, "learning_rate": 2.0599897142745995e-06, "loss": 0.5436, "step": 23136 }, { "epoch": 0.7091148706632341, "grad_norm": 1.3406959321289984, "learning_rate": 2.0595882773947045e-06, "loss": 0.574, "step": 23137 }, { "epoch": 0.7091455191859752, "grad_norm": 0.45478082682218807, "learning_rate": 2.0591868694874427e-06, "loss": 0.3959, "step": 23138 }, { "epoch": 0.7091761677087164, "grad_norm": 1.2644239454629977, "learning_rate": 2.058785490556763e-06, "loss": 0.5915, "step": 23139 }, { "epoch": 0.7092068162314576, "grad_norm": 0.4446026981640655, "learning_rate": 2.058384140606624e-06, "loss": 0.3967, "step": 23140 }, { "epoch": 0.7092374647541988, "grad_norm": 1.263029070511379, "learning_rate": 2.0579828196409774e-06, "loss": 0.6435, "step": 23141 }, { "epoch": 0.7092681132769401, "grad_norm": 1.3515215247323404, "learning_rate": 2.0575815276637782e-06, "loss": 0.5624, "step": 23142 }, { "epoch": 0.7092987617996812, "grad_norm": 1.3834174270670228, "learning_rate": 2.0571802646789833e-06, "loss": 0.6775, "step": 23143 }, { "epoch": 0.7093294103224225, "grad_norm": 1.3167403386796892, "learning_rate": 2.0567790306905427e-06, "loss": 0.573, "step": 23144 }, { "epoch": 0.7093600588451636, "grad_norm": 1.1926621473223882, "learning_rate": 2.056377825702411e-06, "loss": 0.522, "step": 23145 }, { "epoch": 0.7093907073679049, "grad_norm": 1.306519016433514, "learning_rate": 2.0559766497185433e-06, "loss": 0.5113, "step": 23146 }, { "epoch": 0.709421355890646, "grad_norm": 1.2576175094872004, "learning_rate": 2.055575502742889e-06, "loss": 0.6211, "step": 23147 }, { "epoch": 0.7094520044133873, "grad_norm": 1.2519052606453933, "learning_rate": 2.055174384779403e-06, "loss": 0.6152, "step": 23148 }, { "epoch": 0.7094826529361284, "grad_norm": 1.3224322761702507, "learning_rate": 2.054773295832039e-06, "loss": 0.6657, "step": 23149 }, { "epoch": 0.7095133014588697, "grad_norm": 1.3325936248938766, "learning_rate": 2.054372235904746e-06, "loss": 0.628, "step": 23150 }, { "epoch": 0.7095439499816109, "grad_norm": 1.3000019358694592, "learning_rate": 2.0539712050014783e-06, "loss": 0.6994, "step": 23151 }, { "epoch": 0.7095745985043521, "grad_norm": 1.3371183900719588, "learning_rate": 2.0535702031261843e-06, "loss": 0.5966, "step": 23152 }, { "epoch": 0.7096052470270933, "grad_norm": 1.3304967458494839, "learning_rate": 2.053169230282817e-06, "loss": 0.6059, "step": 23153 }, { "epoch": 0.7096358955498345, "grad_norm": 0.45645833769835364, "learning_rate": 2.052768286475329e-06, "loss": 0.389, "step": 23154 }, { "epoch": 0.7096665440725757, "grad_norm": 1.2233853995534685, "learning_rate": 2.0523673717076676e-06, "loss": 0.6306, "step": 23155 }, { "epoch": 0.7096971925953169, "grad_norm": 1.1777070156930793, "learning_rate": 2.0519664859837846e-06, "loss": 0.5375, "step": 23156 }, { "epoch": 0.7097278411180581, "grad_norm": 0.44992045269158115, "learning_rate": 2.0515656293076315e-06, "loss": 0.3902, "step": 23157 }, { "epoch": 0.7097584896407994, "grad_norm": 1.2488432871621302, "learning_rate": 2.0511648016831554e-06, "loss": 0.5771, "step": 23158 }, { "epoch": 0.7097891381635405, "grad_norm": 1.1148361732025707, "learning_rate": 2.0507640031143083e-06, "loss": 0.4535, "step": 23159 }, { "epoch": 0.7098197866862818, "grad_norm": 1.2606512164143848, "learning_rate": 2.0503632336050367e-06, "loss": 0.561, "step": 23160 }, { "epoch": 0.7098504352090229, "grad_norm": 1.445086482667092, "learning_rate": 2.0499624931592905e-06, "loss": 0.6696, "step": 23161 }, { "epoch": 0.7098810837317642, "grad_norm": 1.5221671926418048, "learning_rate": 2.049561781781021e-06, "loss": 0.5886, "step": 23162 }, { "epoch": 0.7099117322545053, "grad_norm": 1.2700792239061087, "learning_rate": 2.049161099474172e-06, "loss": 0.6138, "step": 23163 }, { "epoch": 0.7099423807772466, "grad_norm": 1.1405070577521828, "learning_rate": 2.0487604462426936e-06, "loss": 0.5209, "step": 23164 }, { "epoch": 0.7099730292999877, "grad_norm": 1.3766659753634458, "learning_rate": 2.0483598220905354e-06, "loss": 0.6049, "step": 23165 }, { "epoch": 0.710003677822729, "grad_norm": 0.465774705435011, "learning_rate": 2.0479592270216414e-06, "loss": 0.4068, "step": 23166 }, { "epoch": 0.7100343263454701, "grad_norm": 1.5223054846947652, "learning_rate": 2.04755866103996e-06, "loss": 0.7079, "step": 23167 }, { "epoch": 0.7100649748682114, "grad_norm": 1.2629957162069576, "learning_rate": 2.047158124149441e-06, "loss": 0.6375, "step": 23168 }, { "epoch": 0.7100956233909526, "grad_norm": 1.6556469301346748, "learning_rate": 2.0467576163540263e-06, "loss": 0.7498, "step": 23169 }, { "epoch": 0.7101262719136937, "grad_norm": 1.323734396095354, "learning_rate": 2.0463571376576667e-06, "loss": 0.6346, "step": 23170 }, { "epoch": 0.710156920436435, "grad_norm": 1.3760416612276396, "learning_rate": 2.0459566880643038e-06, "loss": 0.6567, "step": 23171 }, { "epoch": 0.7101875689591761, "grad_norm": 1.3088536486467324, "learning_rate": 2.0455562675778855e-06, "loss": 0.6339, "step": 23172 }, { "epoch": 0.7102182174819174, "grad_norm": 1.1900456988773835, "learning_rate": 2.0451558762023595e-06, "loss": 0.5644, "step": 23173 }, { "epoch": 0.7102488660046585, "grad_norm": 1.3757708538705897, "learning_rate": 2.0447555139416658e-06, "loss": 0.6068, "step": 23174 }, { "epoch": 0.7102795145273998, "grad_norm": 1.4764685552227956, "learning_rate": 2.044355180799753e-06, "loss": 0.5086, "step": 23175 }, { "epoch": 0.7103101630501409, "grad_norm": 1.2344626630238267, "learning_rate": 2.0439548767805667e-06, "loss": 0.6084, "step": 23176 }, { "epoch": 0.7103408115728822, "grad_norm": 1.4503887427092568, "learning_rate": 2.043554601888047e-06, "loss": 0.6203, "step": 23177 }, { "epoch": 0.7103714600956234, "grad_norm": 1.5591091643453348, "learning_rate": 2.0431543561261408e-06, "loss": 0.6915, "step": 23178 }, { "epoch": 0.7104021086183646, "grad_norm": 1.3642406663327498, "learning_rate": 2.0427541394987926e-06, "loss": 0.6386, "step": 23179 }, { "epoch": 0.7104327571411058, "grad_norm": 1.2809657298870645, "learning_rate": 2.042353952009943e-06, "loss": 0.5448, "step": 23180 }, { "epoch": 0.710463405663847, "grad_norm": 1.231027851861002, "learning_rate": 2.041953793663538e-06, "loss": 0.5829, "step": 23181 }, { "epoch": 0.7104940541865882, "grad_norm": 0.46301361538641106, "learning_rate": 2.041553664463516e-06, "loss": 0.4029, "step": 23182 }, { "epoch": 0.7105247027093294, "grad_norm": 1.2863776025774292, "learning_rate": 2.0411535644138266e-06, "loss": 0.6297, "step": 23183 }, { "epoch": 0.7105553512320706, "grad_norm": 2.445965174671938, "learning_rate": 2.0407534935184076e-06, "loss": 0.6937, "step": 23184 }, { "epoch": 0.7105859997548118, "grad_norm": 1.2115716570016553, "learning_rate": 2.0403534517811996e-06, "loss": 0.567, "step": 23185 }, { "epoch": 0.710616648277553, "grad_norm": 1.311121838592481, "learning_rate": 2.0399534392061464e-06, "loss": 0.5833, "step": 23186 }, { "epoch": 0.7106472968002943, "grad_norm": 1.297737228009657, "learning_rate": 2.039553455797192e-06, "loss": 0.6776, "step": 23187 }, { "epoch": 0.7106779453230354, "grad_norm": 1.2585576453795144, "learning_rate": 2.039153501558272e-06, "loss": 0.5564, "step": 23188 }, { "epoch": 0.7107085938457767, "grad_norm": 1.2224062039925319, "learning_rate": 2.0387535764933306e-06, "loss": 0.5672, "step": 23189 }, { "epoch": 0.7107392423685178, "grad_norm": 1.4388516302985452, "learning_rate": 2.03835368060631e-06, "loss": 0.6696, "step": 23190 }, { "epoch": 0.7107698908912591, "grad_norm": 1.362308635178799, "learning_rate": 2.0379538139011455e-06, "loss": 0.6403, "step": 23191 }, { "epoch": 0.7108005394140002, "grad_norm": 1.334039646224019, "learning_rate": 2.0375539763817824e-06, "loss": 0.7051, "step": 23192 }, { "epoch": 0.7108311879367415, "grad_norm": 1.2812597273684272, "learning_rate": 2.0371541680521543e-06, "loss": 0.5922, "step": 23193 }, { "epoch": 0.7108618364594826, "grad_norm": 1.3478164479897885, "learning_rate": 2.0367543889162083e-06, "loss": 0.6035, "step": 23194 }, { "epoch": 0.7108924849822239, "grad_norm": 1.2449915648568874, "learning_rate": 2.036354638977879e-06, "loss": 0.5846, "step": 23195 }, { "epoch": 0.710923133504965, "grad_norm": 0.4202967661742032, "learning_rate": 2.0359549182411043e-06, "loss": 0.3737, "step": 23196 }, { "epoch": 0.7109537820277063, "grad_norm": 1.3835736622840111, "learning_rate": 2.035555226709824e-06, "loss": 0.6741, "step": 23197 }, { "epoch": 0.7109844305504475, "grad_norm": 1.5082385007992172, "learning_rate": 2.0351555643879777e-06, "loss": 0.6375, "step": 23198 }, { "epoch": 0.7110150790731887, "grad_norm": 1.3167405377575223, "learning_rate": 2.0347559312795013e-06, "loss": 0.5235, "step": 23199 }, { "epoch": 0.7110457275959299, "grad_norm": 1.389673377908707, "learning_rate": 2.034356327388333e-06, "loss": 0.6093, "step": 23200 }, { "epoch": 0.711076376118671, "grad_norm": 0.442206237313408, "learning_rate": 2.0339567527184107e-06, "loss": 0.3907, "step": 23201 }, { "epoch": 0.7111070246414123, "grad_norm": 0.4604971331205295, "learning_rate": 2.033557207273673e-06, "loss": 0.3787, "step": 23202 }, { "epoch": 0.7111376731641534, "grad_norm": 1.4668337693340443, "learning_rate": 2.0331576910580554e-06, "loss": 0.5216, "step": 23203 }, { "epoch": 0.7111683216868947, "grad_norm": 1.538717210883084, "learning_rate": 2.0327582040754916e-06, "loss": 0.6456, "step": 23204 }, { "epoch": 0.7111989702096359, "grad_norm": 0.44472505909697746, "learning_rate": 2.0323587463299217e-06, "loss": 0.4084, "step": 23205 }, { "epoch": 0.7112296187323771, "grad_norm": 0.44692581961548444, "learning_rate": 2.031959317825281e-06, "loss": 0.4151, "step": 23206 }, { "epoch": 0.7112602672551183, "grad_norm": 1.33456498523507, "learning_rate": 2.031559918565504e-06, "loss": 0.6248, "step": 23207 }, { "epoch": 0.7112909157778595, "grad_norm": 1.1910133917266919, "learning_rate": 2.0311605485545255e-06, "loss": 0.6708, "step": 23208 }, { "epoch": 0.7113215643006007, "grad_norm": 1.3664962776651546, "learning_rate": 2.0307612077962822e-06, "loss": 0.6484, "step": 23209 }, { "epoch": 0.7113522128233419, "grad_norm": 1.3169018548680034, "learning_rate": 2.03036189629471e-06, "loss": 0.5451, "step": 23210 }, { "epoch": 0.7113828613460831, "grad_norm": 1.3296804894145318, "learning_rate": 2.029962614053742e-06, "loss": 0.6002, "step": 23211 }, { "epoch": 0.7114135098688243, "grad_norm": 0.44977765730301006, "learning_rate": 2.029563361077309e-06, "loss": 0.396, "step": 23212 }, { "epoch": 0.7114441583915655, "grad_norm": 1.4235860013121915, "learning_rate": 2.0291641373693515e-06, "loss": 0.5811, "step": 23213 }, { "epoch": 0.7114748069143068, "grad_norm": 1.3779206881003445, "learning_rate": 2.0287649429337997e-06, "loss": 0.5965, "step": 23214 }, { "epoch": 0.7115054554370479, "grad_norm": 1.3410612099118526, "learning_rate": 2.0283657777745856e-06, "loss": 0.5822, "step": 23215 }, { "epoch": 0.7115361039597892, "grad_norm": 0.4332500642189767, "learning_rate": 2.027966641895644e-06, "loss": 0.4073, "step": 23216 }, { "epoch": 0.7115667524825303, "grad_norm": 1.170720378278394, "learning_rate": 2.027567535300909e-06, "loss": 0.5432, "step": 23217 }, { "epoch": 0.7115974010052716, "grad_norm": 1.6767182148213449, "learning_rate": 2.0271684579943096e-06, "loss": 0.5764, "step": 23218 }, { "epoch": 0.7116280495280127, "grad_norm": 1.3412693836948537, "learning_rate": 2.02676940997978e-06, "loss": 0.6865, "step": 23219 }, { "epoch": 0.711658698050754, "grad_norm": 0.42905286238588514, "learning_rate": 2.026370391261253e-06, "loss": 0.3773, "step": 23220 }, { "epoch": 0.7116893465734951, "grad_norm": 1.2936456962595484, "learning_rate": 2.0259714018426606e-06, "loss": 0.5265, "step": 23221 }, { "epoch": 0.7117199950962364, "grad_norm": 1.3018533673070631, "learning_rate": 2.0255724417279325e-06, "loss": 0.5643, "step": 23222 }, { "epoch": 0.7117506436189776, "grad_norm": 1.3657134545205991, "learning_rate": 2.0251735109209975e-06, "loss": 0.6114, "step": 23223 }, { "epoch": 0.7117812921417188, "grad_norm": 1.2581212726276394, "learning_rate": 2.024774609425792e-06, "loss": 0.6219, "step": 23224 }, { "epoch": 0.71181194066446, "grad_norm": 1.2890207640184892, "learning_rate": 2.0243757372462435e-06, "loss": 0.5889, "step": 23225 }, { "epoch": 0.7118425891872012, "grad_norm": 1.2630371108886183, "learning_rate": 2.0239768943862808e-06, "loss": 0.582, "step": 23226 }, { "epoch": 0.7118732377099424, "grad_norm": 1.2724066095928757, "learning_rate": 2.0235780808498346e-06, "loss": 0.6795, "step": 23227 }, { "epoch": 0.7119038862326836, "grad_norm": 1.29057480182232, "learning_rate": 2.0231792966408357e-06, "loss": 0.6006, "step": 23228 }, { "epoch": 0.7119345347554248, "grad_norm": 1.4678466059770372, "learning_rate": 2.0227805417632148e-06, "loss": 0.5866, "step": 23229 }, { "epoch": 0.711965183278166, "grad_norm": 1.50562287983442, "learning_rate": 2.0223818162208965e-06, "loss": 0.7196, "step": 23230 }, { "epoch": 0.7119958318009072, "grad_norm": 1.3506306965007353, "learning_rate": 2.021983120017812e-06, "loss": 0.5179, "step": 23231 }, { "epoch": 0.7120264803236483, "grad_norm": 1.2416101802413528, "learning_rate": 2.021584453157892e-06, "loss": 0.5557, "step": 23232 }, { "epoch": 0.7120571288463896, "grad_norm": 1.4560522212405176, "learning_rate": 2.0211858156450627e-06, "loss": 0.621, "step": 23233 }, { "epoch": 0.7120877773691308, "grad_norm": 1.5029415756704296, "learning_rate": 2.0207872074832476e-06, "loss": 0.7136, "step": 23234 }, { "epoch": 0.712118425891872, "grad_norm": 1.254885852794245, "learning_rate": 2.020388628676382e-06, "loss": 0.6894, "step": 23235 }, { "epoch": 0.7121490744146132, "grad_norm": 1.3893262614897646, "learning_rate": 2.019990079228388e-06, "loss": 0.6468, "step": 23236 }, { "epoch": 0.7121797229373544, "grad_norm": 1.2324402346237406, "learning_rate": 2.0195915591431957e-06, "loss": 0.5936, "step": 23237 }, { "epoch": 0.7122103714600956, "grad_norm": 1.4347645882381537, "learning_rate": 2.019193068424729e-06, "loss": 0.6645, "step": 23238 }, { "epoch": 0.7122410199828368, "grad_norm": 1.3057755134618987, "learning_rate": 2.0187946070769153e-06, "loss": 0.6102, "step": 23239 }, { "epoch": 0.712271668505578, "grad_norm": 1.2551400212212904, "learning_rate": 2.0183961751036834e-06, "loss": 0.701, "step": 23240 }, { "epoch": 0.7123023170283193, "grad_norm": 1.425484828392993, "learning_rate": 2.017997772508955e-06, "loss": 0.6331, "step": 23241 }, { "epoch": 0.7123329655510604, "grad_norm": 1.2446311594667065, "learning_rate": 2.0175993992966568e-06, "loss": 0.6279, "step": 23242 }, { "epoch": 0.7123636140738017, "grad_norm": 1.3304603690545906, "learning_rate": 2.017201055470717e-06, "loss": 0.5021, "step": 23243 }, { "epoch": 0.7123942625965428, "grad_norm": 0.4432224622503043, "learning_rate": 2.0168027410350587e-06, "loss": 0.4002, "step": 23244 }, { "epoch": 0.7124249111192841, "grad_norm": 1.4607206977324514, "learning_rate": 2.0164044559936023e-06, "loss": 0.7023, "step": 23245 }, { "epoch": 0.7124555596420252, "grad_norm": 1.3592712252359507, "learning_rate": 2.01600620035028e-06, "loss": 0.6259, "step": 23246 }, { "epoch": 0.7124862081647665, "grad_norm": 1.3133236219534365, "learning_rate": 2.0156079741090107e-06, "loss": 0.6996, "step": 23247 }, { "epoch": 0.7125168566875076, "grad_norm": 1.4985955357053116, "learning_rate": 2.0152097772737204e-06, "loss": 0.7237, "step": 23248 }, { "epoch": 0.7125475052102489, "grad_norm": 1.2671836043972333, "learning_rate": 2.0148116098483313e-06, "loss": 0.6069, "step": 23249 }, { "epoch": 0.71257815373299, "grad_norm": 1.4497177052226282, "learning_rate": 2.0144134718367665e-06, "loss": 0.7371, "step": 23250 }, { "epoch": 0.7126088022557313, "grad_norm": 1.2670463834165822, "learning_rate": 2.014015363242951e-06, "loss": 0.5641, "step": 23251 }, { "epoch": 0.7126394507784725, "grad_norm": 1.4876477912115973, "learning_rate": 2.0136172840708053e-06, "loss": 0.649, "step": 23252 }, { "epoch": 0.7126700993012137, "grad_norm": 1.3633009784841672, "learning_rate": 2.013219234324252e-06, "loss": 0.6154, "step": 23253 }, { "epoch": 0.7127007478239549, "grad_norm": 1.3302105462927691, "learning_rate": 2.0128212140072156e-06, "loss": 0.6356, "step": 23254 }, { "epoch": 0.7127313963466961, "grad_norm": 1.3339184400844306, "learning_rate": 2.012423223123614e-06, "loss": 0.513, "step": 23255 }, { "epoch": 0.7127620448694373, "grad_norm": 0.4738204515450069, "learning_rate": 2.0120252616773735e-06, "loss": 0.3877, "step": 23256 }, { "epoch": 0.7127926933921785, "grad_norm": 1.4422637680025858, "learning_rate": 2.0116273296724098e-06, "loss": 0.6011, "step": 23257 }, { "epoch": 0.7128233419149197, "grad_norm": 1.484618509559601, "learning_rate": 2.011229427112647e-06, "loss": 0.7203, "step": 23258 }, { "epoch": 0.712853990437661, "grad_norm": 1.1611681840440502, "learning_rate": 2.0108315540020072e-06, "loss": 0.5594, "step": 23259 }, { "epoch": 0.7128846389604021, "grad_norm": 0.45645994260099076, "learning_rate": 2.0104337103444074e-06, "loss": 0.4035, "step": 23260 }, { "epoch": 0.7129152874831434, "grad_norm": 1.4456787317438118, "learning_rate": 2.010035896143769e-06, "loss": 0.6409, "step": 23261 }, { "epoch": 0.7129459360058845, "grad_norm": 1.3519223867617072, "learning_rate": 2.0096381114040136e-06, "loss": 0.6257, "step": 23262 }, { "epoch": 0.7129765845286257, "grad_norm": 1.2332676485628433, "learning_rate": 2.009240356129057e-06, "loss": 0.552, "step": 23263 }, { "epoch": 0.7130072330513669, "grad_norm": 1.2345144019576764, "learning_rate": 2.0088426303228208e-06, "loss": 0.5913, "step": 23264 }, { "epoch": 0.7130378815741081, "grad_norm": 1.3398533484014166, "learning_rate": 2.0084449339892247e-06, "loss": 0.6319, "step": 23265 }, { "epoch": 0.7130685300968493, "grad_norm": 1.4442695179072393, "learning_rate": 2.0080472671321847e-06, "loss": 0.6288, "step": 23266 }, { "epoch": 0.7130991786195905, "grad_norm": 1.3899339227332457, "learning_rate": 2.0076496297556224e-06, "loss": 0.6062, "step": 23267 }, { "epoch": 0.7131298271423318, "grad_norm": 1.3863291927386845, "learning_rate": 2.0072520218634524e-06, "loss": 0.5331, "step": 23268 }, { "epoch": 0.7131604756650729, "grad_norm": 1.3324125033748844, "learning_rate": 2.006854443459594e-06, "loss": 0.6022, "step": 23269 }, { "epoch": 0.7131911241878142, "grad_norm": 1.2875945986104391, "learning_rate": 2.006456894547966e-06, "loss": 0.6224, "step": 23270 }, { "epoch": 0.7132217727105553, "grad_norm": 1.4159068092044926, "learning_rate": 2.0060593751324817e-06, "loss": 0.5617, "step": 23271 }, { "epoch": 0.7132524212332966, "grad_norm": 0.44551372345242457, "learning_rate": 2.0056618852170613e-06, "loss": 0.3839, "step": 23272 }, { "epoch": 0.7132830697560377, "grad_norm": 0.44385694759711836, "learning_rate": 2.0052644248056217e-06, "loss": 0.3867, "step": 23273 }, { "epoch": 0.713313718278779, "grad_norm": 0.42739740796339243, "learning_rate": 2.0048669939020766e-06, "loss": 0.4055, "step": 23274 }, { "epoch": 0.7133443668015201, "grad_norm": 1.3188094057939048, "learning_rate": 2.0044695925103435e-06, "loss": 0.5557, "step": 23275 }, { "epoch": 0.7133750153242614, "grad_norm": 1.1993804914557333, "learning_rate": 2.004072220634339e-06, "loss": 0.5827, "step": 23276 }, { "epoch": 0.7134056638470025, "grad_norm": 1.5622470043086747, "learning_rate": 2.0036748782779764e-06, "loss": 0.5617, "step": 23277 }, { "epoch": 0.7134363123697438, "grad_norm": 1.3594246705242925, "learning_rate": 2.0032775654451736e-06, "loss": 0.5704, "step": 23278 }, { "epoch": 0.713466960892485, "grad_norm": 1.3487738026011926, "learning_rate": 2.0028802821398415e-06, "loss": 0.5757, "step": 23279 }, { "epoch": 0.7134976094152262, "grad_norm": 1.436248399825883, "learning_rate": 2.0024830283658968e-06, "loss": 0.6446, "step": 23280 }, { "epoch": 0.7135282579379674, "grad_norm": 1.2770617618744453, "learning_rate": 2.002085804127256e-06, "loss": 0.7075, "step": 23281 }, { "epoch": 0.7135589064607086, "grad_norm": 1.2386973608681442, "learning_rate": 2.0016886094278286e-06, "loss": 0.5178, "step": 23282 }, { "epoch": 0.7135895549834498, "grad_norm": 1.4075731413258235, "learning_rate": 2.001291444271531e-06, "loss": 0.6606, "step": 23283 }, { "epoch": 0.713620203506191, "grad_norm": 1.3307763472646932, "learning_rate": 2.000894308662277e-06, "loss": 0.5229, "step": 23284 }, { "epoch": 0.7136508520289322, "grad_norm": 1.207839688540849, "learning_rate": 2.000497202603978e-06, "loss": 0.6637, "step": 23285 }, { "epoch": 0.7136815005516735, "grad_norm": 1.329158208288307, "learning_rate": 2.000100126100547e-06, "loss": 0.6143, "step": 23286 }, { "epoch": 0.7137121490744146, "grad_norm": 1.400274698326409, "learning_rate": 1.9997030791558985e-06, "loss": 0.5926, "step": 23287 }, { "epoch": 0.7137427975971559, "grad_norm": 1.4092858839754625, "learning_rate": 1.999306061773942e-06, "loss": 0.5892, "step": 23288 }, { "epoch": 0.713773446119897, "grad_norm": 1.2954980834035543, "learning_rate": 1.998909073958592e-06, "loss": 0.5141, "step": 23289 }, { "epoch": 0.7138040946426383, "grad_norm": 0.443646151766932, "learning_rate": 1.9985121157137553e-06, "loss": 0.3973, "step": 23290 }, { "epoch": 0.7138347431653794, "grad_norm": 1.2204303123309894, "learning_rate": 1.99811518704335e-06, "loss": 0.6476, "step": 23291 }, { "epoch": 0.7138653916881207, "grad_norm": 1.3635116941143877, "learning_rate": 1.997718287951285e-06, "loss": 0.6246, "step": 23292 }, { "epoch": 0.7138960402108618, "grad_norm": 1.302745930081197, "learning_rate": 1.9973214184414667e-06, "loss": 0.5726, "step": 23293 }, { "epoch": 0.713926688733603, "grad_norm": 1.359979619202184, "learning_rate": 1.9969245785178093e-06, "loss": 0.5143, "step": 23294 }, { "epoch": 0.7139573372563442, "grad_norm": 1.193233166718281, "learning_rate": 1.9965277681842244e-06, "loss": 0.5833, "step": 23295 }, { "epoch": 0.7139879857790854, "grad_norm": 1.2238931764215961, "learning_rate": 1.996130987444618e-06, "loss": 0.6316, "step": 23296 }, { "epoch": 0.7140186343018267, "grad_norm": 0.44960185455320717, "learning_rate": 1.995734236302901e-06, "loss": 0.4107, "step": 23297 }, { "epoch": 0.7140492828245678, "grad_norm": 1.2782264512750072, "learning_rate": 1.9953375147629854e-06, "loss": 0.583, "step": 23298 }, { "epoch": 0.7140799313473091, "grad_norm": 1.3382100698174348, "learning_rate": 1.994940822828776e-06, "loss": 0.596, "step": 23299 }, { "epoch": 0.7141105798700502, "grad_norm": 1.4334242966303758, "learning_rate": 1.994544160504186e-06, "loss": 0.6587, "step": 23300 }, { "epoch": 0.7141412283927915, "grad_norm": 0.4704541672953053, "learning_rate": 1.9941475277931187e-06, "loss": 0.4033, "step": 23301 }, { "epoch": 0.7141718769155326, "grad_norm": 0.4752875049062165, "learning_rate": 1.993750924699486e-06, "loss": 0.4033, "step": 23302 }, { "epoch": 0.7142025254382739, "grad_norm": 1.3191226844328219, "learning_rate": 1.9933543512271954e-06, "loss": 0.5486, "step": 23303 }, { "epoch": 0.714233173961015, "grad_norm": 1.2515000684403785, "learning_rate": 1.992957807380152e-06, "loss": 0.504, "step": 23304 }, { "epoch": 0.7142638224837563, "grad_norm": 1.3669386226597418, "learning_rate": 1.992561293162265e-06, "loss": 0.6011, "step": 23305 }, { "epoch": 0.7142944710064975, "grad_norm": 0.4232460793146774, "learning_rate": 1.992164808577443e-06, "loss": 0.3658, "step": 23306 }, { "epoch": 0.7143251195292387, "grad_norm": 1.558246619041215, "learning_rate": 1.9917683536295886e-06, "loss": 0.6511, "step": 23307 }, { "epoch": 0.7143557680519799, "grad_norm": 1.4027221622109485, "learning_rate": 1.9913719283226123e-06, "loss": 0.6529, "step": 23308 }, { "epoch": 0.7143864165747211, "grad_norm": 1.5556141451179566, "learning_rate": 1.9909755326604145e-06, "loss": 0.7287, "step": 23309 }, { "epoch": 0.7144170650974623, "grad_norm": 1.4149079853082378, "learning_rate": 1.9905791666469084e-06, "loss": 0.5536, "step": 23310 }, { "epoch": 0.7144477136202035, "grad_norm": 1.4130982345820782, "learning_rate": 1.9901828302859954e-06, "loss": 0.6406, "step": 23311 }, { "epoch": 0.7144783621429447, "grad_norm": 0.4364497849238141, "learning_rate": 1.9897865235815795e-06, "loss": 0.3925, "step": 23312 }, { "epoch": 0.714509010665686, "grad_norm": 1.491173545227994, "learning_rate": 1.9893902465375677e-06, "loss": 0.5989, "step": 23313 }, { "epoch": 0.7145396591884271, "grad_norm": 0.4389450889148321, "learning_rate": 1.9889939991578648e-06, "loss": 0.3883, "step": 23314 }, { "epoch": 0.7145703077111684, "grad_norm": 1.4294057657471169, "learning_rate": 1.9885977814463734e-06, "loss": 0.7971, "step": 23315 }, { "epoch": 0.7146009562339095, "grad_norm": 1.4843566666077332, "learning_rate": 1.9882015934069985e-06, "loss": 0.6293, "step": 23316 }, { "epoch": 0.7146316047566508, "grad_norm": 0.4379301807760362, "learning_rate": 1.9878054350436452e-06, "loss": 0.3956, "step": 23317 }, { "epoch": 0.7146622532793919, "grad_norm": 1.4169842066821212, "learning_rate": 1.9874093063602146e-06, "loss": 0.5257, "step": 23318 }, { "epoch": 0.7146929018021332, "grad_norm": 1.232154276907966, "learning_rate": 1.9870132073606124e-06, "loss": 0.6106, "step": 23319 }, { "epoch": 0.7147235503248743, "grad_norm": 1.3925208772843782, "learning_rate": 1.9866171380487365e-06, "loss": 0.6573, "step": 23320 }, { "epoch": 0.7147541988476156, "grad_norm": 1.3826971094907112, "learning_rate": 1.9862210984284964e-06, "loss": 0.6425, "step": 23321 }, { "epoch": 0.7147848473703567, "grad_norm": 1.7084257508854543, "learning_rate": 1.9858250885037907e-06, "loss": 0.6644, "step": 23322 }, { "epoch": 0.714815495893098, "grad_norm": 1.3691282139629153, "learning_rate": 1.98542910827852e-06, "loss": 0.6286, "step": 23323 }, { "epoch": 0.7148461444158392, "grad_norm": 1.4388047356761804, "learning_rate": 1.985033157756587e-06, "loss": 0.5756, "step": 23324 }, { "epoch": 0.7148767929385803, "grad_norm": 0.4505613836432208, "learning_rate": 1.9846372369418964e-06, "loss": 0.3949, "step": 23325 }, { "epoch": 0.7149074414613216, "grad_norm": 0.4293431015805976, "learning_rate": 1.984241345838345e-06, "loss": 0.3877, "step": 23326 }, { "epoch": 0.7149380899840627, "grad_norm": 1.3507477120235536, "learning_rate": 1.9838454844498344e-06, "loss": 0.5639, "step": 23327 }, { "epoch": 0.714968738506804, "grad_norm": 0.44480598200746274, "learning_rate": 1.9834496527802665e-06, "loss": 0.3879, "step": 23328 }, { "epoch": 0.7149993870295451, "grad_norm": 1.5166079941118595, "learning_rate": 1.9830538508335425e-06, "loss": 0.5946, "step": 23329 }, { "epoch": 0.7150300355522864, "grad_norm": 1.2054755460487498, "learning_rate": 1.982658078613561e-06, "loss": 0.5968, "step": 23330 }, { "epoch": 0.7150606840750275, "grad_norm": 0.44167714414263615, "learning_rate": 1.9822623361242176e-06, "loss": 0.4065, "step": 23331 }, { "epoch": 0.7150913325977688, "grad_norm": 1.2096235567186324, "learning_rate": 1.9818666233694196e-06, "loss": 0.5827, "step": 23332 }, { "epoch": 0.71512198112051, "grad_norm": 1.4530248301107045, "learning_rate": 1.981470940353062e-06, "loss": 0.6406, "step": 23333 }, { "epoch": 0.7151526296432512, "grad_norm": 1.3164267813024302, "learning_rate": 1.981075287079041e-06, "loss": 0.6128, "step": 23334 }, { "epoch": 0.7151832781659924, "grad_norm": 1.3321961520052994, "learning_rate": 1.9806796635512583e-06, "loss": 0.6806, "step": 23335 }, { "epoch": 0.7152139266887336, "grad_norm": 1.3544676420086037, "learning_rate": 1.980284069773611e-06, "loss": 0.6466, "step": 23336 }, { "epoch": 0.7152445752114748, "grad_norm": 1.1880424438488315, "learning_rate": 1.9798885057499994e-06, "loss": 0.6076, "step": 23337 }, { "epoch": 0.715275223734216, "grad_norm": 0.432510885771217, "learning_rate": 1.9794929714843176e-06, "loss": 0.3838, "step": 23338 }, { "epoch": 0.7153058722569572, "grad_norm": 1.2890794611529455, "learning_rate": 1.9790974669804637e-06, "loss": 0.6388, "step": 23339 }, { "epoch": 0.7153365207796984, "grad_norm": 1.3057094423748599, "learning_rate": 1.9787019922423376e-06, "loss": 0.6505, "step": 23340 }, { "epoch": 0.7153671693024396, "grad_norm": 1.281016873978838, "learning_rate": 1.978306547273834e-06, "loss": 0.5669, "step": 23341 }, { "epoch": 0.7153978178251809, "grad_norm": 1.1823742209813484, "learning_rate": 1.977911132078845e-06, "loss": 0.5762, "step": 23342 }, { "epoch": 0.715428466347922, "grad_norm": 0.4473716963063328, "learning_rate": 1.977515746661275e-06, "loss": 0.3797, "step": 23343 }, { "epoch": 0.7154591148706633, "grad_norm": 1.3028701567543446, "learning_rate": 1.9771203910250155e-06, "loss": 0.7118, "step": 23344 }, { "epoch": 0.7154897633934044, "grad_norm": 1.5011299371807934, "learning_rate": 1.97672506517396e-06, "loss": 0.5727, "step": 23345 }, { "epoch": 0.7155204119161457, "grad_norm": 0.4415889365849145, "learning_rate": 1.9763297691120065e-06, "loss": 0.3961, "step": 23346 }, { "epoch": 0.7155510604388868, "grad_norm": 1.2414235583327435, "learning_rate": 1.975934502843049e-06, "loss": 0.5974, "step": 23347 }, { "epoch": 0.7155817089616281, "grad_norm": 1.4592719465952586, "learning_rate": 1.9755392663709842e-06, "loss": 0.6933, "step": 23348 }, { "epoch": 0.7156123574843692, "grad_norm": 1.4045775237566185, "learning_rate": 1.975144059699704e-06, "loss": 0.6989, "step": 23349 }, { "epoch": 0.7156430060071105, "grad_norm": 0.4365699136436432, "learning_rate": 1.9747488828331022e-06, "loss": 0.4056, "step": 23350 }, { "epoch": 0.7156736545298517, "grad_norm": 1.347323744073375, "learning_rate": 1.9743537357750763e-06, "loss": 0.5846, "step": 23351 }, { "epoch": 0.7157043030525929, "grad_norm": 1.4135419382770447, "learning_rate": 1.9739586185295172e-06, "loss": 0.6711, "step": 23352 }, { "epoch": 0.7157349515753341, "grad_norm": 1.5137055202118972, "learning_rate": 1.973563531100316e-06, "loss": 0.6371, "step": 23353 }, { "epoch": 0.7157656000980753, "grad_norm": 1.3951235348796303, "learning_rate": 1.9731684734913675e-06, "loss": 0.692, "step": 23354 }, { "epoch": 0.7157962486208165, "grad_norm": 1.2054568327886308, "learning_rate": 1.972773445706565e-06, "loss": 0.6797, "step": 23355 }, { "epoch": 0.7158268971435576, "grad_norm": 0.4332964308055912, "learning_rate": 1.9723784477498014e-06, "loss": 0.3795, "step": 23356 }, { "epoch": 0.7158575456662989, "grad_norm": 1.499668465444154, "learning_rate": 1.9719834796249666e-06, "loss": 0.6154, "step": 23357 }, { "epoch": 0.71588819418904, "grad_norm": 0.46037818955661763, "learning_rate": 1.9715885413359525e-06, "loss": 0.4111, "step": 23358 }, { "epoch": 0.7159188427117813, "grad_norm": 1.4459497846353961, "learning_rate": 1.971193632886654e-06, "loss": 0.7035, "step": 23359 }, { "epoch": 0.7159494912345225, "grad_norm": 1.3209380990235, "learning_rate": 1.9707987542809585e-06, "loss": 0.6039, "step": 23360 }, { "epoch": 0.7159801397572637, "grad_norm": 1.3799540738972293, "learning_rate": 1.970403905522755e-06, "loss": 0.544, "step": 23361 }, { "epoch": 0.7160107882800049, "grad_norm": 0.4576548711212073, "learning_rate": 1.970009086615941e-06, "loss": 0.4201, "step": 23362 }, { "epoch": 0.7160414368027461, "grad_norm": 1.532908544575884, "learning_rate": 1.9696142975644008e-06, "loss": 0.6434, "step": 23363 }, { "epoch": 0.7160720853254873, "grad_norm": 1.4685090327150954, "learning_rate": 1.9692195383720275e-06, "loss": 0.5663, "step": 23364 }, { "epoch": 0.7161027338482285, "grad_norm": 1.27036901884985, "learning_rate": 1.968824809042708e-06, "loss": 0.5486, "step": 23365 }, { "epoch": 0.7161333823709697, "grad_norm": 1.2836495591401798, "learning_rate": 1.968430109580333e-06, "loss": 0.6497, "step": 23366 }, { "epoch": 0.716164030893711, "grad_norm": 1.2733681024022714, "learning_rate": 1.968035439988794e-06, "loss": 0.6149, "step": 23367 }, { "epoch": 0.7161946794164521, "grad_norm": 1.3665953693221204, "learning_rate": 1.9676408002719753e-06, "loss": 0.6373, "step": 23368 }, { "epoch": 0.7162253279391934, "grad_norm": 1.3022273516935947, "learning_rate": 1.967246190433768e-06, "loss": 0.6458, "step": 23369 }, { "epoch": 0.7162559764619345, "grad_norm": 1.4435545544441895, "learning_rate": 1.966851610478062e-06, "loss": 0.6723, "step": 23370 }, { "epoch": 0.7162866249846758, "grad_norm": 1.420762391273406, "learning_rate": 1.9664570604087428e-06, "loss": 0.5621, "step": 23371 }, { "epoch": 0.7163172735074169, "grad_norm": 1.2381980274955418, "learning_rate": 1.9660625402296948e-06, "loss": 0.5704, "step": 23372 }, { "epoch": 0.7163479220301582, "grad_norm": 1.3639863070732845, "learning_rate": 1.965668049944812e-06, "loss": 0.6032, "step": 23373 }, { "epoch": 0.7163785705528993, "grad_norm": 0.42838022209794585, "learning_rate": 1.9652735895579773e-06, "loss": 0.3856, "step": 23374 }, { "epoch": 0.7164092190756406, "grad_norm": 1.3922831794178565, "learning_rate": 1.96487915907308e-06, "loss": 0.6106, "step": 23375 }, { "epoch": 0.7164398675983817, "grad_norm": 1.363738586220134, "learning_rate": 1.964484758494003e-06, "loss": 0.6055, "step": 23376 }, { "epoch": 0.716470516121123, "grad_norm": 1.4456243088913316, "learning_rate": 1.9640903878246344e-06, "loss": 0.6562, "step": 23377 }, { "epoch": 0.7165011646438642, "grad_norm": 1.4049812550073133, "learning_rate": 1.9636960470688613e-06, "loss": 0.5853, "step": 23378 }, { "epoch": 0.7165318131666054, "grad_norm": 1.2937913887414474, "learning_rate": 1.963301736230567e-06, "loss": 0.6563, "step": 23379 }, { "epoch": 0.7165624616893466, "grad_norm": 1.180322711832696, "learning_rate": 1.9629074553136367e-06, "loss": 0.5872, "step": 23380 }, { "epoch": 0.7165931102120878, "grad_norm": 1.4577396946640453, "learning_rate": 1.9625132043219584e-06, "loss": 0.5796, "step": 23381 }, { "epoch": 0.716623758734829, "grad_norm": 1.32054388469626, "learning_rate": 1.962118983259413e-06, "loss": 0.6132, "step": 23382 }, { "epoch": 0.7166544072575702, "grad_norm": 1.2544359915603833, "learning_rate": 1.9617247921298865e-06, "loss": 0.5312, "step": 23383 }, { "epoch": 0.7166850557803114, "grad_norm": 0.4762452653613352, "learning_rate": 1.961330630937265e-06, "loss": 0.4117, "step": 23384 }, { "epoch": 0.7167157043030526, "grad_norm": 1.3314870053969694, "learning_rate": 1.9609364996854285e-06, "loss": 0.621, "step": 23385 }, { "epoch": 0.7167463528257938, "grad_norm": 1.2295946292900068, "learning_rate": 1.9605423983782633e-06, "loss": 0.574, "step": 23386 }, { "epoch": 0.716777001348535, "grad_norm": 0.47183185789648585, "learning_rate": 1.960148327019651e-06, "loss": 0.3878, "step": 23387 }, { "epoch": 0.7168076498712762, "grad_norm": 1.3498924332109195, "learning_rate": 1.9597542856134737e-06, "loss": 0.6053, "step": 23388 }, { "epoch": 0.7168382983940174, "grad_norm": 1.3397734390406528, "learning_rate": 1.959360274163618e-06, "loss": 0.4948, "step": 23389 }, { "epoch": 0.7168689469167586, "grad_norm": 1.305844750660247, "learning_rate": 1.9589662926739616e-06, "loss": 0.6143, "step": 23390 }, { "epoch": 0.7168995954394998, "grad_norm": 1.2401551121144356, "learning_rate": 1.958572341148388e-06, "loss": 0.5234, "step": 23391 }, { "epoch": 0.716930243962241, "grad_norm": 1.4611188901353567, "learning_rate": 1.9581784195907817e-06, "loss": 0.6074, "step": 23392 }, { "epoch": 0.7169608924849822, "grad_norm": 1.4185833885133783, "learning_rate": 1.9577845280050194e-06, "loss": 0.6146, "step": 23393 }, { "epoch": 0.7169915410077234, "grad_norm": 0.473019772027522, "learning_rate": 1.9573906663949845e-06, "loss": 0.4005, "step": 23394 }, { "epoch": 0.7170221895304646, "grad_norm": 1.21649205186816, "learning_rate": 1.9569968347645597e-06, "loss": 0.6206, "step": 23395 }, { "epoch": 0.7170528380532059, "grad_norm": 1.220584407593897, "learning_rate": 1.9566030331176223e-06, "loss": 0.5522, "step": 23396 }, { "epoch": 0.717083486575947, "grad_norm": 0.488015313730728, "learning_rate": 1.956209261458055e-06, "loss": 0.4039, "step": 23397 }, { "epoch": 0.7171141350986883, "grad_norm": 1.234613441964614, "learning_rate": 1.9558155197897355e-06, "loss": 0.5886, "step": 23398 }, { "epoch": 0.7171447836214294, "grad_norm": 1.3551736014141735, "learning_rate": 1.9554218081165444e-06, "loss": 0.6753, "step": 23399 }, { "epoch": 0.7171754321441707, "grad_norm": 1.3551153782572734, "learning_rate": 1.9550281264423626e-06, "loss": 0.6309, "step": 23400 }, { "epoch": 0.7172060806669118, "grad_norm": 1.2957931394069577, "learning_rate": 1.9546344747710666e-06, "loss": 0.6135, "step": 23401 }, { "epoch": 0.7172367291896531, "grad_norm": 1.3163759569423286, "learning_rate": 1.954240853106536e-06, "loss": 0.5609, "step": 23402 }, { "epoch": 0.7172673777123942, "grad_norm": 1.4127583670747186, "learning_rate": 1.953847261452651e-06, "loss": 0.6186, "step": 23403 }, { "epoch": 0.7172980262351355, "grad_norm": 1.5080140783467655, "learning_rate": 1.953453699813287e-06, "loss": 0.7057, "step": 23404 }, { "epoch": 0.7173286747578766, "grad_norm": 1.4061649811708166, "learning_rate": 1.953060168192325e-06, "loss": 0.5534, "step": 23405 }, { "epoch": 0.7173593232806179, "grad_norm": 1.3198035191190147, "learning_rate": 1.9526666665936388e-06, "loss": 0.6495, "step": 23406 }, { "epoch": 0.7173899718033591, "grad_norm": 1.2252470167435863, "learning_rate": 1.952273195021108e-06, "loss": 0.6227, "step": 23407 }, { "epoch": 0.7174206203261003, "grad_norm": 0.43166842447339826, "learning_rate": 1.95187975347861e-06, "loss": 0.3861, "step": 23408 }, { "epoch": 0.7174512688488415, "grad_norm": 1.3821666265957966, "learning_rate": 1.9514863419700198e-06, "loss": 0.5632, "step": 23409 }, { "epoch": 0.7174819173715827, "grad_norm": 1.1571616582792001, "learning_rate": 1.9510929604992147e-06, "loss": 0.5369, "step": 23410 }, { "epoch": 0.7175125658943239, "grad_norm": 1.2646836269484147, "learning_rate": 1.950699609070072e-06, "loss": 0.6257, "step": 23411 }, { "epoch": 0.7175432144170651, "grad_norm": 1.2748543658591673, "learning_rate": 1.950306287686465e-06, "loss": 0.5996, "step": 23412 }, { "epoch": 0.7175738629398063, "grad_norm": 1.195088414840405, "learning_rate": 1.94991299635227e-06, "loss": 0.5953, "step": 23413 }, { "epoch": 0.7176045114625476, "grad_norm": 1.3526500410774431, "learning_rate": 1.9495197350713645e-06, "loss": 0.6134, "step": 23414 }, { "epoch": 0.7176351599852887, "grad_norm": 1.3956561118109716, "learning_rate": 1.9491265038476197e-06, "loss": 0.6575, "step": 23415 }, { "epoch": 0.71766580850803, "grad_norm": 1.3065287438084363, "learning_rate": 1.948733302684914e-06, "loss": 0.5945, "step": 23416 }, { "epoch": 0.7176964570307711, "grad_norm": 0.45623137823541077, "learning_rate": 1.9483401315871163e-06, "loss": 0.4056, "step": 23417 }, { "epoch": 0.7177271055535123, "grad_norm": 1.2168153670661628, "learning_rate": 1.947946990558107e-06, "loss": 0.4764, "step": 23418 }, { "epoch": 0.7177577540762535, "grad_norm": 1.1051903521865025, "learning_rate": 1.947553879601758e-06, "loss": 0.5885, "step": 23419 }, { "epoch": 0.7177884025989947, "grad_norm": 1.2017513499345884, "learning_rate": 1.9471607987219394e-06, "loss": 0.5459, "step": 23420 }, { "epoch": 0.7178190511217359, "grad_norm": 0.477252447262342, "learning_rate": 1.946767747922526e-06, "loss": 0.4036, "step": 23421 }, { "epoch": 0.7178496996444771, "grad_norm": 1.4383457454069832, "learning_rate": 1.9463747272073935e-06, "loss": 0.7006, "step": 23422 }, { "epoch": 0.7178803481672184, "grad_norm": 1.2506465361115442, "learning_rate": 1.94598173658041e-06, "loss": 0.6264, "step": 23423 }, { "epoch": 0.7179109966899595, "grad_norm": 1.3630756779617905, "learning_rate": 1.9455887760454505e-06, "loss": 0.5842, "step": 23424 }, { "epoch": 0.7179416452127008, "grad_norm": 1.2155140968435554, "learning_rate": 1.9451958456063876e-06, "loss": 0.5757, "step": 23425 }, { "epoch": 0.7179722937354419, "grad_norm": 1.28081524355943, "learning_rate": 1.9448029452670902e-06, "loss": 0.5646, "step": 23426 }, { "epoch": 0.7180029422581832, "grad_norm": 1.1781208826687144, "learning_rate": 1.9444100750314327e-06, "loss": 0.5399, "step": 23427 }, { "epoch": 0.7180335907809243, "grad_norm": 0.4460927138635369, "learning_rate": 1.9440172349032806e-06, "loss": 0.386, "step": 23428 }, { "epoch": 0.7180642393036656, "grad_norm": 1.5180624911661116, "learning_rate": 1.9436244248865123e-06, "loss": 0.6679, "step": 23429 }, { "epoch": 0.7180948878264067, "grad_norm": 1.4127043567656072, "learning_rate": 1.9432316449849946e-06, "loss": 0.5899, "step": 23430 }, { "epoch": 0.718125536349148, "grad_norm": 1.3131766917686967, "learning_rate": 1.9428388952025963e-06, "loss": 0.6532, "step": 23431 }, { "epoch": 0.7181561848718891, "grad_norm": 0.43637826806412733, "learning_rate": 1.942446175543188e-06, "loss": 0.4046, "step": 23432 }, { "epoch": 0.7181868333946304, "grad_norm": 1.1960429942082142, "learning_rate": 1.9420534860106417e-06, "loss": 0.6433, "step": 23433 }, { "epoch": 0.7182174819173716, "grad_norm": 1.2871496766180854, "learning_rate": 1.941660826608823e-06, "loss": 0.567, "step": 23434 }, { "epoch": 0.7182481304401128, "grad_norm": 1.1981735053878984, "learning_rate": 1.941268197341603e-06, "loss": 0.5197, "step": 23435 }, { "epoch": 0.718278778962854, "grad_norm": 1.2133080104387692, "learning_rate": 1.9408755982128498e-06, "loss": 0.5164, "step": 23436 }, { "epoch": 0.7183094274855952, "grad_norm": 1.375741765838275, "learning_rate": 1.9404830292264336e-06, "loss": 0.613, "step": 23437 }, { "epoch": 0.7183400760083364, "grad_norm": 1.354348766680668, "learning_rate": 1.9400904903862215e-06, "loss": 0.5226, "step": 23438 }, { "epoch": 0.7183707245310776, "grad_norm": 1.1910379756853717, "learning_rate": 1.939697981696077e-06, "loss": 0.5718, "step": 23439 }, { "epoch": 0.7184013730538188, "grad_norm": 1.3510846263624603, "learning_rate": 1.9393055031598745e-06, "loss": 0.6068, "step": 23440 }, { "epoch": 0.71843202157656, "grad_norm": 0.439694314044136, "learning_rate": 1.9389130547814784e-06, "loss": 0.4005, "step": 23441 }, { "epoch": 0.7184626700993012, "grad_norm": 1.2216789862909623, "learning_rate": 1.938520636564753e-06, "loss": 0.5205, "step": 23442 }, { "epoch": 0.7184933186220425, "grad_norm": 1.2761835527611936, "learning_rate": 1.9381282485135676e-06, "loss": 0.5735, "step": 23443 }, { "epoch": 0.7185239671447836, "grad_norm": 1.4305415296889394, "learning_rate": 1.937735890631788e-06, "loss": 0.5987, "step": 23444 }, { "epoch": 0.7185546156675249, "grad_norm": 1.5909938953171416, "learning_rate": 1.937343562923281e-06, "loss": 0.5783, "step": 23445 }, { "epoch": 0.718585264190266, "grad_norm": 1.2336256194447417, "learning_rate": 1.936951265391911e-06, "loss": 0.6137, "step": 23446 }, { "epoch": 0.7186159127130073, "grad_norm": 1.4066955486176802, "learning_rate": 1.9365589980415427e-06, "loss": 0.6496, "step": 23447 }, { "epoch": 0.7186465612357484, "grad_norm": 1.4787818846943075, "learning_rate": 1.936166760876045e-06, "loss": 0.5925, "step": 23448 }, { "epoch": 0.7186772097584896, "grad_norm": 0.4420731569459736, "learning_rate": 1.93577455389928e-06, "loss": 0.4129, "step": 23449 }, { "epoch": 0.7187078582812308, "grad_norm": 1.332433301190187, "learning_rate": 1.93538237711511e-06, "loss": 0.645, "step": 23450 }, { "epoch": 0.718738506803972, "grad_norm": 1.1829186675489467, "learning_rate": 1.934990230527402e-06, "loss": 0.5345, "step": 23451 }, { "epoch": 0.7187691553267133, "grad_norm": 0.426775204741545, "learning_rate": 1.9345981141400215e-06, "loss": 0.3821, "step": 23452 }, { "epoch": 0.7187998038494544, "grad_norm": 0.4424262069902243, "learning_rate": 1.934206027956828e-06, "loss": 0.4114, "step": 23453 }, { "epoch": 0.7188304523721957, "grad_norm": 1.2139652213166954, "learning_rate": 1.9338139719816866e-06, "loss": 0.637, "step": 23454 }, { "epoch": 0.7188611008949368, "grad_norm": 1.2669488625712455, "learning_rate": 1.933421946218461e-06, "loss": 0.6887, "step": 23455 }, { "epoch": 0.7188917494176781, "grad_norm": 1.5202914248203656, "learning_rate": 1.933029950671016e-06, "loss": 0.7291, "step": 23456 }, { "epoch": 0.7189223979404192, "grad_norm": 1.321320533961443, "learning_rate": 1.932637985343211e-06, "loss": 0.6264, "step": 23457 }, { "epoch": 0.7189530464631605, "grad_norm": 1.2583824110638437, "learning_rate": 1.932246050238905e-06, "loss": 0.5749, "step": 23458 }, { "epoch": 0.7189836949859016, "grad_norm": 1.2242795866199883, "learning_rate": 1.9318541453619673e-06, "loss": 0.6623, "step": 23459 }, { "epoch": 0.7190143435086429, "grad_norm": 1.2882649422145787, "learning_rate": 1.9314622707162555e-06, "loss": 0.5473, "step": 23460 }, { "epoch": 0.7190449920313841, "grad_norm": 1.4823137870280887, "learning_rate": 1.9310704263056295e-06, "loss": 0.5235, "step": 23461 }, { "epoch": 0.7190756405541253, "grad_norm": 1.2839622008364917, "learning_rate": 1.9306786121339522e-06, "loss": 0.6235, "step": 23462 }, { "epoch": 0.7191062890768665, "grad_norm": 1.2174647556362415, "learning_rate": 1.9302868282050837e-06, "loss": 0.6276, "step": 23463 }, { "epoch": 0.7191369375996077, "grad_norm": 1.2387261960355025, "learning_rate": 1.9298950745228858e-06, "loss": 0.6017, "step": 23464 }, { "epoch": 0.7191675861223489, "grad_norm": 1.2677821883283147, "learning_rate": 1.929503351091216e-06, "loss": 0.4662, "step": 23465 }, { "epoch": 0.7191982346450901, "grad_norm": 1.266527727945804, "learning_rate": 1.9291116579139347e-06, "loss": 0.5705, "step": 23466 }, { "epoch": 0.7192288831678313, "grad_norm": 1.5025750073831243, "learning_rate": 1.9287199949949043e-06, "loss": 0.6592, "step": 23467 }, { "epoch": 0.7192595316905726, "grad_norm": 1.2829221009499328, "learning_rate": 1.928328362337981e-06, "loss": 0.5771, "step": 23468 }, { "epoch": 0.7192901802133137, "grad_norm": 1.2066348004467768, "learning_rate": 1.9279367599470216e-06, "loss": 0.6185, "step": 23469 }, { "epoch": 0.719320828736055, "grad_norm": 1.3285252164801624, "learning_rate": 1.9275451878258905e-06, "loss": 0.6342, "step": 23470 }, { "epoch": 0.7193514772587961, "grad_norm": 0.4405120095720238, "learning_rate": 1.9271536459784406e-06, "loss": 0.4088, "step": 23471 }, { "epoch": 0.7193821257815374, "grad_norm": 1.6476529950734822, "learning_rate": 1.926762134408534e-06, "loss": 0.6338, "step": 23472 }, { "epoch": 0.7194127743042785, "grad_norm": 1.3645863148966346, "learning_rate": 1.926370653120025e-06, "loss": 0.6212, "step": 23473 }, { "epoch": 0.7194434228270198, "grad_norm": 1.3114760047908653, "learning_rate": 1.9259792021167724e-06, "loss": 0.6523, "step": 23474 }, { "epoch": 0.7194740713497609, "grad_norm": 1.396500236527817, "learning_rate": 1.925587781402635e-06, "loss": 0.7621, "step": 23475 }, { "epoch": 0.7195047198725022, "grad_norm": 0.4298468627838156, "learning_rate": 1.925196390981465e-06, "loss": 0.406, "step": 23476 }, { "epoch": 0.7195353683952433, "grad_norm": 1.2463705727545062, "learning_rate": 1.9248050308571224e-06, "loss": 0.5662, "step": 23477 }, { "epoch": 0.7195660169179846, "grad_norm": 1.4409721120880936, "learning_rate": 1.924413701033465e-06, "loss": 0.5925, "step": 23478 }, { "epoch": 0.7195966654407258, "grad_norm": 1.1434794052266726, "learning_rate": 1.9240224015143455e-06, "loss": 0.5328, "step": 23479 }, { "epoch": 0.7196273139634669, "grad_norm": 1.1981970312493577, "learning_rate": 1.9236311323036172e-06, "loss": 0.5807, "step": 23480 }, { "epoch": 0.7196579624862082, "grad_norm": 1.2530648183183126, "learning_rate": 1.923239893405142e-06, "loss": 0.636, "step": 23481 }, { "epoch": 0.7196886110089493, "grad_norm": 1.1992174426548101, "learning_rate": 1.9228486848227697e-06, "loss": 0.5289, "step": 23482 }, { "epoch": 0.7197192595316906, "grad_norm": 0.45839306743724484, "learning_rate": 1.922457506560358e-06, "loss": 0.3851, "step": 23483 }, { "epoch": 0.7197499080544317, "grad_norm": 1.3201374799347514, "learning_rate": 1.9220663586217585e-06, "loss": 0.6456, "step": 23484 }, { "epoch": 0.719780556577173, "grad_norm": 1.4058994951655672, "learning_rate": 1.9216752410108264e-06, "loss": 0.6622, "step": 23485 }, { "epoch": 0.7198112050999141, "grad_norm": 1.385913433604514, "learning_rate": 1.9212841537314173e-06, "loss": 0.5667, "step": 23486 }, { "epoch": 0.7198418536226554, "grad_norm": 1.2617890730980925, "learning_rate": 1.9208930967873824e-06, "loss": 0.6066, "step": 23487 }, { "epoch": 0.7198725021453966, "grad_norm": 1.2290454473498644, "learning_rate": 1.920502070182575e-06, "loss": 0.5755, "step": 23488 }, { "epoch": 0.7199031506681378, "grad_norm": 0.43558960834639865, "learning_rate": 1.92011107392085e-06, "loss": 0.3935, "step": 23489 }, { "epoch": 0.719933799190879, "grad_norm": 1.3679165558549533, "learning_rate": 1.9197201080060572e-06, "loss": 0.5683, "step": 23490 }, { "epoch": 0.7199644477136202, "grad_norm": 1.4899877351172806, "learning_rate": 1.9193291724420503e-06, "loss": 0.6266, "step": 23491 }, { "epoch": 0.7199950962363614, "grad_norm": 1.3686011754454614, "learning_rate": 1.9189382672326833e-06, "loss": 0.6966, "step": 23492 }, { "epoch": 0.7200257447591026, "grad_norm": 1.1525102515116983, "learning_rate": 1.918547392381804e-06, "loss": 0.6701, "step": 23493 }, { "epoch": 0.7200563932818438, "grad_norm": 1.3286524880395827, "learning_rate": 1.9181565478932675e-06, "loss": 0.6649, "step": 23494 }, { "epoch": 0.720087041804585, "grad_norm": 1.3193022189730552, "learning_rate": 1.917765733770921e-06, "loss": 0.6732, "step": 23495 }, { "epoch": 0.7201176903273262, "grad_norm": 0.45769304324400006, "learning_rate": 1.9173749500186174e-06, "loss": 0.3805, "step": 23496 }, { "epoch": 0.7201483388500675, "grad_norm": 1.4263036370146138, "learning_rate": 1.9169841966402085e-06, "loss": 0.5985, "step": 23497 }, { "epoch": 0.7201789873728086, "grad_norm": 1.3865860454863241, "learning_rate": 1.916593473639542e-06, "loss": 0.6981, "step": 23498 }, { "epoch": 0.7202096358955499, "grad_norm": 0.43343606390311795, "learning_rate": 1.916202781020469e-06, "loss": 0.4049, "step": 23499 }, { "epoch": 0.720240284418291, "grad_norm": 0.47747437696022443, "learning_rate": 1.9158121187868407e-06, "loss": 0.4177, "step": 23500 }, { "epoch": 0.7202709329410323, "grad_norm": 0.4579269687040319, "learning_rate": 1.9154214869425026e-06, "loss": 0.4084, "step": 23501 }, { "epoch": 0.7203015814637734, "grad_norm": 1.1774135316580538, "learning_rate": 1.915030885491308e-06, "loss": 0.6509, "step": 23502 }, { "epoch": 0.7203322299865147, "grad_norm": 1.2820424413053433, "learning_rate": 1.9146403144371014e-06, "loss": 0.62, "step": 23503 }, { "epoch": 0.7203628785092558, "grad_norm": 1.2951388403705708, "learning_rate": 1.9142497737837327e-06, "loss": 0.6698, "step": 23504 }, { "epoch": 0.7203935270319971, "grad_norm": 1.329655871275434, "learning_rate": 1.9138592635350535e-06, "loss": 0.6218, "step": 23505 }, { "epoch": 0.7204241755547383, "grad_norm": 0.45528251599213515, "learning_rate": 1.913468783694906e-06, "loss": 0.4315, "step": 23506 }, { "epoch": 0.7204548240774795, "grad_norm": 1.4195947408772265, "learning_rate": 1.9130783342671406e-06, "loss": 0.5558, "step": 23507 }, { "epoch": 0.7204854726002207, "grad_norm": 1.2952700278324005, "learning_rate": 1.9126879152556055e-06, "loss": 0.577, "step": 23508 }, { "epoch": 0.7205161211229619, "grad_norm": 1.333661353880549, "learning_rate": 1.9122975266641446e-06, "loss": 0.6982, "step": 23509 }, { "epoch": 0.7205467696457031, "grad_norm": 1.2661112108306878, "learning_rate": 1.9119071684966065e-06, "loss": 0.4912, "step": 23510 }, { "epoch": 0.7205774181684442, "grad_norm": 0.41818717551309614, "learning_rate": 1.911516840756839e-06, "loss": 0.3928, "step": 23511 }, { "epoch": 0.7206080666911855, "grad_norm": 1.3621746033780338, "learning_rate": 1.911126543448685e-06, "loss": 0.515, "step": 23512 }, { "epoch": 0.7206387152139266, "grad_norm": 1.301901391182583, "learning_rate": 1.910736276575992e-06, "loss": 0.5777, "step": 23513 }, { "epoch": 0.7206693637366679, "grad_norm": 1.4451296299970449, "learning_rate": 1.910346040142604e-06, "loss": 0.5585, "step": 23514 }, { "epoch": 0.720700012259409, "grad_norm": 1.2995478257420794, "learning_rate": 1.9099558341523664e-06, "loss": 0.6833, "step": 23515 }, { "epoch": 0.7207306607821503, "grad_norm": 0.43047573718594817, "learning_rate": 1.9095656586091273e-06, "loss": 0.3824, "step": 23516 }, { "epoch": 0.7207613093048915, "grad_norm": 0.46519375308115274, "learning_rate": 1.909175513516726e-06, "loss": 0.3911, "step": 23517 }, { "epoch": 0.7207919578276327, "grad_norm": 1.2876219803661109, "learning_rate": 1.908785398879009e-06, "loss": 0.6518, "step": 23518 }, { "epoch": 0.7208226063503739, "grad_norm": 0.4371615371891821, "learning_rate": 1.9083953146998224e-06, "loss": 0.3834, "step": 23519 }, { "epoch": 0.7208532548731151, "grad_norm": 1.3647804057866812, "learning_rate": 1.9080052609830065e-06, "loss": 0.6278, "step": 23520 }, { "epoch": 0.7208839033958563, "grad_norm": 1.330736491059146, "learning_rate": 1.907615237732405e-06, "loss": 0.6544, "step": 23521 }, { "epoch": 0.7209145519185975, "grad_norm": 1.3007299817698434, "learning_rate": 1.9072252449518647e-06, "loss": 0.6042, "step": 23522 }, { "epoch": 0.7209452004413387, "grad_norm": 1.2673203022295019, "learning_rate": 1.9068352826452225e-06, "loss": 0.5749, "step": 23523 }, { "epoch": 0.72097584896408, "grad_norm": 1.4059795023272743, "learning_rate": 1.9064453508163261e-06, "loss": 0.654, "step": 23524 }, { "epoch": 0.7210064974868211, "grad_norm": 1.187660482009502, "learning_rate": 1.906055449469013e-06, "loss": 0.5409, "step": 23525 }, { "epoch": 0.7210371460095624, "grad_norm": 1.3024690771621246, "learning_rate": 1.9056655786071277e-06, "loss": 0.6225, "step": 23526 }, { "epoch": 0.7210677945323035, "grad_norm": 0.4548056888853426, "learning_rate": 1.9052757382345128e-06, "loss": 0.4058, "step": 23527 }, { "epoch": 0.7210984430550448, "grad_norm": 1.4345665541111836, "learning_rate": 1.9048859283550059e-06, "loss": 0.5985, "step": 23528 }, { "epoch": 0.7211290915777859, "grad_norm": 1.2753977146448134, "learning_rate": 1.9044961489724495e-06, "loss": 0.5253, "step": 23529 }, { "epoch": 0.7211597401005272, "grad_norm": 1.3051292797798713, "learning_rate": 1.9041064000906868e-06, "loss": 0.6155, "step": 23530 }, { "epoch": 0.7211903886232683, "grad_norm": 1.1760351374045186, "learning_rate": 1.9037166817135538e-06, "loss": 0.5802, "step": 23531 }, { "epoch": 0.7212210371460096, "grad_norm": 0.42805562181743306, "learning_rate": 1.9033269938448927e-06, "loss": 0.3908, "step": 23532 }, { "epoch": 0.7212516856687508, "grad_norm": 1.36967538456969, "learning_rate": 1.902937336488545e-06, "loss": 0.6732, "step": 23533 }, { "epoch": 0.721282334191492, "grad_norm": 0.44566790116367555, "learning_rate": 1.9025477096483463e-06, "loss": 0.3739, "step": 23534 }, { "epoch": 0.7213129827142332, "grad_norm": 1.304253365445034, "learning_rate": 1.9021581133281397e-06, "loss": 0.7426, "step": 23535 }, { "epoch": 0.7213436312369744, "grad_norm": 1.538601878659378, "learning_rate": 1.9017685475317576e-06, "loss": 0.7188, "step": 23536 }, { "epoch": 0.7213742797597156, "grad_norm": 1.3226026842138614, "learning_rate": 1.9013790122630465e-06, "loss": 0.7111, "step": 23537 }, { "epoch": 0.7214049282824568, "grad_norm": 0.440739657278173, "learning_rate": 1.900989507525841e-06, "loss": 0.3828, "step": 23538 }, { "epoch": 0.721435576805198, "grad_norm": 1.1759656101274396, "learning_rate": 1.9006000333239766e-06, "loss": 0.5697, "step": 23539 }, { "epoch": 0.7214662253279392, "grad_norm": 1.3933446776337226, "learning_rate": 1.9002105896612932e-06, "loss": 0.5673, "step": 23540 }, { "epoch": 0.7214968738506804, "grad_norm": 1.261370421282139, "learning_rate": 1.8998211765416303e-06, "loss": 0.5498, "step": 23541 }, { "epoch": 0.7215275223734215, "grad_norm": 1.2998028810234972, "learning_rate": 1.89943179396882e-06, "loss": 0.6398, "step": 23542 }, { "epoch": 0.7215581708961628, "grad_norm": 1.486748002404484, "learning_rate": 1.8990424419467019e-06, "loss": 0.5537, "step": 23543 }, { "epoch": 0.721588819418904, "grad_norm": 1.2364686092069632, "learning_rate": 1.8986531204791124e-06, "loss": 0.4884, "step": 23544 }, { "epoch": 0.7216194679416452, "grad_norm": 0.44429414327431843, "learning_rate": 1.8982638295698885e-06, "loss": 0.419, "step": 23545 }, { "epoch": 0.7216501164643864, "grad_norm": 1.5034581492514072, "learning_rate": 1.897874569222865e-06, "loss": 0.5883, "step": 23546 }, { "epoch": 0.7216807649871276, "grad_norm": 1.3093553055073441, "learning_rate": 1.8974853394418752e-06, "loss": 0.6337, "step": 23547 }, { "epoch": 0.7217114135098688, "grad_norm": 1.35161095716818, "learning_rate": 1.897096140230756e-06, "loss": 0.5865, "step": 23548 }, { "epoch": 0.72174206203261, "grad_norm": 1.3410295205516447, "learning_rate": 1.8967069715933444e-06, "loss": 0.5673, "step": 23549 }, { "epoch": 0.7217727105553512, "grad_norm": 1.3668947023124487, "learning_rate": 1.8963178335334708e-06, "loss": 0.6225, "step": 23550 }, { "epoch": 0.7218033590780925, "grad_norm": 0.4388776765299445, "learning_rate": 1.8959287260549724e-06, "loss": 0.378, "step": 23551 }, { "epoch": 0.7218340076008336, "grad_norm": 1.326353064312466, "learning_rate": 1.895539649161684e-06, "loss": 0.6204, "step": 23552 }, { "epoch": 0.7218646561235749, "grad_norm": 1.11999493576185, "learning_rate": 1.8951506028574356e-06, "loss": 0.5033, "step": 23553 }, { "epoch": 0.721895304646316, "grad_norm": 1.2453714410094, "learning_rate": 1.894761587146065e-06, "loss": 0.6297, "step": 23554 }, { "epoch": 0.7219259531690573, "grad_norm": 1.2006033478325266, "learning_rate": 1.894372602031399e-06, "loss": 0.5828, "step": 23555 }, { "epoch": 0.7219566016917984, "grad_norm": 1.4230930457945874, "learning_rate": 1.8939836475172784e-06, "loss": 0.6525, "step": 23556 }, { "epoch": 0.7219872502145397, "grad_norm": 1.4227463898962838, "learning_rate": 1.8935947236075314e-06, "loss": 0.7123, "step": 23557 }, { "epoch": 0.7220178987372808, "grad_norm": 1.2513756384965549, "learning_rate": 1.8932058303059887e-06, "loss": 0.5924, "step": 23558 }, { "epoch": 0.7220485472600221, "grad_norm": 0.439962393453018, "learning_rate": 1.8928169676164836e-06, "loss": 0.3973, "step": 23559 }, { "epoch": 0.7220791957827633, "grad_norm": 0.4405081437236182, "learning_rate": 1.8924281355428498e-06, "loss": 0.3809, "step": 23560 }, { "epoch": 0.7221098443055045, "grad_norm": 1.4834315112851029, "learning_rate": 1.8920393340889148e-06, "loss": 0.5513, "step": 23561 }, { "epoch": 0.7221404928282457, "grad_norm": 1.2795420630484884, "learning_rate": 1.8916505632585119e-06, "loss": 0.6324, "step": 23562 }, { "epoch": 0.7221711413509869, "grad_norm": 0.44296381984154937, "learning_rate": 1.8912618230554708e-06, "loss": 0.369, "step": 23563 }, { "epoch": 0.7222017898737281, "grad_norm": 1.3351041464723012, "learning_rate": 1.8908731134836244e-06, "loss": 0.6551, "step": 23564 }, { "epoch": 0.7222324383964693, "grad_norm": 1.2190077380960929, "learning_rate": 1.8904844345468004e-06, "loss": 0.5274, "step": 23565 }, { "epoch": 0.7222630869192105, "grad_norm": 1.3769980985446142, "learning_rate": 1.8900957862488262e-06, "loss": 0.6174, "step": 23566 }, { "epoch": 0.7222937354419517, "grad_norm": 1.3514404491073602, "learning_rate": 1.8897071685935364e-06, "loss": 0.6349, "step": 23567 }, { "epoch": 0.7223243839646929, "grad_norm": 1.399485424262945, "learning_rate": 1.8893185815847581e-06, "loss": 0.6343, "step": 23568 }, { "epoch": 0.7223550324874342, "grad_norm": 1.3443586250132205, "learning_rate": 1.888930025226318e-06, "loss": 0.6718, "step": 23569 }, { "epoch": 0.7223856810101753, "grad_norm": 1.5134980636109352, "learning_rate": 1.8885414995220464e-06, "loss": 0.6534, "step": 23570 }, { "epoch": 0.7224163295329166, "grad_norm": 1.229133169608197, "learning_rate": 1.8881530044757718e-06, "loss": 0.6108, "step": 23571 }, { "epoch": 0.7224469780556577, "grad_norm": 1.2744182793335015, "learning_rate": 1.8877645400913231e-06, "loss": 0.5623, "step": 23572 }, { "epoch": 0.7224776265783989, "grad_norm": 1.4223907715459276, "learning_rate": 1.887376106372525e-06, "loss": 0.5819, "step": 23573 }, { "epoch": 0.7225082751011401, "grad_norm": 0.4563418257024737, "learning_rate": 1.8869877033232065e-06, "loss": 0.4, "step": 23574 }, { "epoch": 0.7225389236238813, "grad_norm": 1.1508943454453187, "learning_rate": 1.8865993309471965e-06, "loss": 0.6159, "step": 23575 }, { "epoch": 0.7225695721466225, "grad_norm": 1.669290243672067, "learning_rate": 1.88621098924832e-06, "loss": 0.5865, "step": 23576 }, { "epoch": 0.7226002206693637, "grad_norm": 0.4428493747298311, "learning_rate": 1.8858226782303996e-06, "loss": 0.3851, "step": 23577 }, { "epoch": 0.722630869192105, "grad_norm": 1.308426189135173, "learning_rate": 1.885434397897269e-06, "loss": 0.6617, "step": 23578 }, { "epoch": 0.7226615177148461, "grad_norm": 1.3190704944639131, "learning_rate": 1.8850461482527498e-06, "loss": 0.6294, "step": 23579 }, { "epoch": 0.7226921662375874, "grad_norm": 1.6373295515938688, "learning_rate": 1.884657929300666e-06, "loss": 0.676, "step": 23580 }, { "epoch": 0.7227228147603285, "grad_norm": 1.3674076241852413, "learning_rate": 1.8842697410448457e-06, "loss": 0.593, "step": 23581 }, { "epoch": 0.7227534632830698, "grad_norm": 1.2410822768814056, "learning_rate": 1.8838815834891116e-06, "loss": 0.5778, "step": 23582 }, { "epoch": 0.7227841118058109, "grad_norm": 1.489107872881538, "learning_rate": 1.883493456637292e-06, "loss": 0.5875, "step": 23583 }, { "epoch": 0.7228147603285522, "grad_norm": 1.3811682749293548, "learning_rate": 1.883105360493207e-06, "loss": 0.5669, "step": 23584 }, { "epoch": 0.7228454088512933, "grad_norm": 1.2201946665792758, "learning_rate": 1.882717295060682e-06, "loss": 0.6282, "step": 23585 }, { "epoch": 0.7228760573740346, "grad_norm": 1.153602069472241, "learning_rate": 1.8823292603435434e-06, "loss": 0.6096, "step": 23586 }, { "epoch": 0.7229067058967757, "grad_norm": 1.3738234846598254, "learning_rate": 1.8819412563456118e-06, "loss": 0.6229, "step": 23587 }, { "epoch": 0.722937354419517, "grad_norm": 1.1875818597038506, "learning_rate": 1.881553283070708e-06, "loss": 0.5024, "step": 23588 }, { "epoch": 0.7229680029422582, "grad_norm": 1.2563973438218718, "learning_rate": 1.88116534052266e-06, "loss": 0.6374, "step": 23589 }, { "epoch": 0.7229986514649994, "grad_norm": 1.573962357488227, "learning_rate": 1.8807774287052866e-06, "loss": 0.6093, "step": 23590 }, { "epoch": 0.7230292999877406, "grad_norm": 1.4248369157100018, "learning_rate": 1.8803895476224133e-06, "loss": 0.6247, "step": 23591 }, { "epoch": 0.7230599485104818, "grad_norm": 1.29381727277967, "learning_rate": 1.8800016972778578e-06, "loss": 0.5324, "step": 23592 }, { "epoch": 0.723090597033223, "grad_norm": 1.5317403571012076, "learning_rate": 1.8796138776754442e-06, "loss": 0.5797, "step": 23593 }, { "epoch": 0.7231212455559642, "grad_norm": 1.3746884165129396, "learning_rate": 1.879226088818995e-06, "loss": 0.5605, "step": 23594 }, { "epoch": 0.7231518940787054, "grad_norm": 1.3451826746069964, "learning_rate": 1.878838330712328e-06, "loss": 0.5348, "step": 23595 }, { "epoch": 0.7231825426014467, "grad_norm": 1.4761991169234265, "learning_rate": 1.8784506033592658e-06, "loss": 0.5455, "step": 23596 }, { "epoch": 0.7232131911241878, "grad_norm": 1.3107185858909463, "learning_rate": 1.8780629067636296e-06, "loss": 0.5231, "step": 23597 }, { "epoch": 0.7232438396469291, "grad_norm": 1.1163434170296056, "learning_rate": 1.877675240929237e-06, "loss": 0.5956, "step": 23598 }, { "epoch": 0.7232744881696702, "grad_norm": 1.3196015636183136, "learning_rate": 1.877287605859911e-06, "loss": 0.5773, "step": 23599 }, { "epoch": 0.7233051366924115, "grad_norm": 1.3406468873909572, "learning_rate": 1.8769000015594675e-06, "loss": 0.5054, "step": 23600 }, { "epoch": 0.7233357852151526, "grad_norm": 1.411064497200526, "learning_rate": 1.876512428031727e-06, "loss": 0.6142, "step": 23601 }, { "epoch": 0.7233664337378939, "grad_norm": 1.4445432216393421, "learning_rate": 1.8761248852805103e-06, "loss": 0.6546, "step": 23602 }, { "epoch": 0.723397082260635, "grad_norm": 1.252859653516454, "learning_rate": 1.8757373733096334e-06, "loss": 0.5735, "step": 23603 }, { "epoch": 0.7234277307833762, "grad_norm": 1.354513755695535, "learning_rate": 1.8753498921229146e-06, "loss": 0.6497, "step": 23604 }, { "epoch": 0.7234583793061174, "grad_norm": 1.2024312558698176, "learning_rate": 1.8749624417241751e-06, "loss": 0.6304, "step": 23605 }, { "epoch": 0.7234890278288586, "grad_norm": 1.2973130734417364, "learning_rate": 1.8745750221172305e-06, "loss": 0.5468, "step": 23606 }, { "epoch": 0.7235196763515999, "grad_norm": 1.459384429036048, "learning_rate": 1.874187633305894e-06, "loss": 0.6835, "step": 23607 }, { "epoch": 0.723550324874341, "grad_norm": 1.2806408766586004, "learning_rate": 1.8738002752939899e-06, "loss": 0.6184, "step": 23608 }, { "epoch": 0.7235809733970823, "grad_norm": 1.2974995943402974, "learning_rate": 1.8734129480853302e-06, "loss": 0.5672, "step": 23609 }, { "epoch": 0.7236116219198234, "grad_norm": 0.4522866552877792, "learning_rate": 1.8730256516837342e-06, "loss": 0.4017, "step": 23610 }, { "epoch": 0.7236422704425647, "grad_norm": 0.443189387123694, "learning_rate": 1.8726383860930148e-06, "loss": 0.4116, "step": 23611 }, { "epoch": 0.7236729189653058, "grad_norm": 1.3580115301028486, "learning_rate": 1.8722511513169894e-06, "loss": 0.6054, "step": 23612 }, { "epoch": 0.7237035674880471, "grad_norm": 1.2482275609832298, "learning_rate": 1.8718639473594757e-06, "loss": 0.5605, "step": 23613 }, { "epoch": 0.7237342160107882, "grad_norm": 1.3875355303636423, "learning_rate": 1.871476774224285e-06, "loss": 0.6512, "step": 23614 }, { "epoch": 0.7237648645335295, "grad_norm": 1.5026357870109488, "learning_rate": 1.8710896319152339e-06, "loss": 0.6802, "step": 23615 }, { "epoch": 0.7237955130562707, "grad_norm": 1.3352682578672317, "learning_rate": 1.8707025204361396e-06, "loss": 0.6015, "step": 23616 }, { "epoch": 0.7238261615790119, "grad_norm": 1.410706237953787, "learning_rate": 1.8703154397908119e-06, "loss": 0.5011, "step": 23617 }, { "epoch": 0.7238568101017531, "grad_norm": 1.2179783400811452, "learning_rate": 1.8699283899830667e-06, "loss": 0.6047, "step": 23618 }, { "epoch": 0.7238874586244943, "grad_norm": 1.4499005328893677, "learning_rate": 1.86954137101672e-06, "loss": 0.6195, "step": 23619 }, { "epoch": 0.7239181071472355, "grad_norm": 1.377678128604584, "learning_rate": 1.869154382895581e-06, "loss": 0.687, "step": 23620 }, { "epoch": 0.7239487556699767, "grad_norm": 1.3299087570773118, "learning_rate": 1.8687674256234672e-06, "loss": 0.5961, "step": 23621 }, { "epoch": 0.7239794041927179, "grad_norm": 1.4567335833808033, "learning_rate": 1.868380499204187e-06, "loss": 0.6635, "step": 23622 }, { "epoch": 0.7240100527154592, "grad_norm": 1.3047559205316637, "learning_rate": 1.8679936036415552e-06, "loss": 0.5966, "step": 23623 }, { "epoch": 0.7240407012382003, "grad_norm": 1.344800648049005, "learning_rate": 1.8676067389393854e-06, "loss": 0.5902, "step": 23624 }, { "epoch": 0.7240713497609416, "grad_norm": 0.43964749444163104, "learning_rate": 1.8672199051014862e-06, "loss": 0.4057, "step": 23625 }, { "epoch": 0.7241019982836827, "grad_norm": 1.327127884234886, "learning_rate": 1.866833102131671e-06, "loss": 0.6011, "step": 23626 }, { "epoch": 0.724132646806424, "grad_norm": 1.369975399899378, "learning_rate": 1.8664463300337521e-06, "loss": 0.666, "step": 23627 }, { "epoch": 0.7241632953291651, "grad_norm": 1.337281337321686, "learning_rate": 1.866059588811538e-06, "loss": 0.5995, "step": 23628 }, { "epoch": 0.7241939438519064, "grad_norm": 1.2951426337114256, "learning_rate": 1.8656728784688404e-06, "loss": 0.5532, "step": 23629 }, { "epoch": 0.7242245923746475, "grad_norm": 1.2401334972314626, "learning_rate": 1.8652861990094717e-06, "loss": 0.6791, "step": 23630 }, { "epoch": 0.7242552408973888, "grad_norm": 1.5486982209253475, "learning_rate": 1.8648995504372386e-06, "loss": 0.6235, "step": 23631 }, { "epoch": 0.72428588942013, "grad_norm": 1.5253982576339025, "learning_rate": 1.8645129327559536e-06, "loss": 0.67, "step": 23632 }, { "epoch": 0.7243165379428712, "grad_norm": 1.2464983251363624, "learning_rate": 1.8641263459694237e-06, "loss": 0.5951, "step": 23633 }, { "epoch": 0.7243471864656124, "grad_norm": 1.3065229753214904, "learning_rate": 1.8637397900814591e-06, "loss": 0.5759, "step": 23634 }, { "epoch": 0.7243778349883535, "grad_norm": 1.4839197074071802, "learning_rate": 1.8633532650958702e-06, "loss": 0.6159, "step": 23635 }, { "epoch": 0.7244084835110948, "grad_norm": 1.342044062910098, "learning_rate": 1.8629667710164628e-06, "loss": 0.6566, "step": 23636 }, { "epoch": 0.7244391320338359, "grad_norm": 1.6698525275719505, "learning_rate": 1.8625803078470467e-06, "loss": 0.6017, "step": 23637 }, { "epoch": 0.7244697805565772, "grad_norm": 1.1603254841424424, "learning_rate": 1.8621938755914309e-06, "loss": 0.5523, "step": 23638 }, { "epoch": 0.7245004290793183, "grad_norm": 1.3552538206765734, "learning_rate": 1.8618074742534199e-06, "loss": 0.5952, "step": 23639 }, { "epoch": 0.7245310776020596, "grad_norm": 1.4228268511365234, "learning_rate": 1.861421103836823e-06, "loss": 0.6122, "step": 23640 }, { "epoch": 0.7245617261248007, "grad_norm": 0.4614721641689881, "learning_rate": 1.8610347643454486e-06, "loss": 0.3881, "step": 23641 }, { "epoch": 0.724592374647542, "grad_norm": 1.4631945182187132, "learning_rate": 1.8606484557831e-06, "loss": 0.6671, "step": 23642 }, { "epoch": 0.7246230231702832, "grad_norm": 1.2048864246099567, "learning_rate": 1.8602621781535874e-06, "loss": 0.5168, "step": 23643 }, { "epoch": 0.7246536716930244, "grad_norm": 1.2897461265224224, "learning_rate": 1.8598759314607129e-06, "loss": 0.5582, "step": 23644 }, { "epoch": 0.7246843202157656, "grad_norm": 1.33640574716802, "learning_rate": 1.859489715708284e-06, "loss": 0.6713, "step": 23645 }, { "epoch": 0.7247149687385068, "grad_norm": 1.3982865968511453, "learning_rate": 1.8591035309001087e-06, "loss": 0.5471, "step": 23646 }, { "epoch": 0.724745617261248, "grad_norm": 1.3252701518726828, "learning_rate": 1.8587173770399886e-06, "loss": 0.5967, "step": 23647 }, { "epoch": 0.7247762657839892, "grad_norm": 1.4044496977694356, "learning_rate": 1.8583312541317288e-06, "loss": 0.664, "step": 23648 }, { "epoch": 0.7248069143067304, "grad_norm": 0.4531745323648328, "learning_rate": 1.8579451621791377e-06, "loss": 0.4164, "step": 23649 }, { "epoch": 0.7248375628294716, "grad_norm": 0.4396538608855096, "learning_rate": 1.8575591011860146e-06, "loss": 0.3945, "step": 23650 }, { "epoch": 0.7248682113522128, "grad_norm": 1.3158906729719704, "learning_rate": 1.8571730711561675e-06, "loss": 0.6552, "step": 23651 }, { "epoch": 0.7248988598749541, "grad_norm": 0.44537053000739707, "learning_rate": 1.8567870720933967e-06, "loss": 0.3798, "step": 23652 }, { "epoch": 0.7249295083976952, "grad_norm": 1.3180481967464006, "learning_rate": 1.8564011040015074e-06, "loss": 0.6087, "step": 23653 }, { "epoch": 0.7249601569204365, "grad_norm": 1.2191607125374218, "learning_rate": 1.8560151668843035e-06, "loss": 0.5521, "step": 23654 }, { "epoch": 0.7249908054431776, "grad_norm": 0.45443509488723843, "learning_rate": 1.855629260745585e-06, "loss": 0.3997, "step": 23655 }, { "epoch": 0.7250214539659189, "grad_norm": 1.5322256623468502, "learning_rate": 1.855243385589156e-06, "loss": 0.7108, "step": 23656 }, { "epoch": 0.72505210248866, "grad_norm": 1.4849170531617384, "learning_rate": 1.8548575414188202e-06, "loss": 0.647, "step": 23657 }, { "epoch": 0.7250827510114013, "grad_norm": 1.2446313451058375, "learning_rate": 1.8544717282383755e-06, "loss": 0.603, "step": 23658 }, { "epoch": 0.7251133995341424, "grad_norm": 0.46566668503998276, "learning_rate": 1.8540859460516265e-06, "loss": 0.3978, "step": 23659 }, { "epoch": 0.7251440480568837, "grad_norm": 1.3621423577279461, "learning_rate": 1.853700194862375e-06, "loss": 0.5731, "step": 23660 }, { "epoch": 0.7251746965796249, "grad_norm": 1.1855725522058789, "learning_rate": 1.8533144746744192e-06, "loss": 0.5631, "step": 23661 }, { "epoch": 0.7252053451023661, "grad_norm": 0.4449902714749347, "learning_rate": 1.8529287854915622e-06, "loss": 0.3942, "step": 23662 }, { "epoch": 0.7252359936251073, "grad_norm": 1.6185045780155383, "learning_rate": 1.8525431273175998e-06, "loss": 0.5982, "step": 23663 }, { "epoch": 0.7252666421478485, "grad_norm": 1.2243441433543332, "learning_rate": 1.8521575001563386e-06, "loss": 0.6701, "step": 23664 }, { "epoch": 0.7252972906705897, "grad_norm": 1.3862955123888587, "learning_rate": 1.851771904011575e-06, "loss": 0.5941, "step": 23665 }, { "epoch": 0.7253279391933308, "grad_norm": 1.3568999839092322, "learning_rate": 1.8513863388871067e-06, "loss": 0.6613, "step": 23666 }, { "epoch": 0.7253585877160721, "grad_norm": 1.4207389479165748, "learning_rate": 1.8510008047867345e-06, "loss": 0.6316, "step": 23667 }, { "epoch": 0.7253892362388132, "grad_norm": 1.2067742809082382, "learning_rate": 1.8506153017142587e-06, "loss": 0.5683, "step": 23668 }, { "epoch": 0.7254198847615545, "grad_norm": 0.4474470361915144, "learning_rate": 1.8502298296734744e-06, "loss": 0.3956, "step": 23669 }, { "epoch": 0.7254505332842957, "grad_norm": 1.144367397016092, "learning_rate": 1.8498443886681817e-06, "loss": 0.549, "step": 23670 }, { "epoch": 0.7254811818070369, "grad_norm": 1.3495954747546755, "learning_rate": 1.8494589787021777e-06, "loss": 0.6582, "step": 23671 }, { "epoch": 0.7255118303297781, "grad_norm": 1.2552538822056973, "learning_rate": 1.8490735997792625e-06, "loss": 0.6172, "step": 23672 }, { "epoch": 0.7255424788525193, "grad_norm": 1.1400330921307626, "learning_rate": 1.8486882519032317e-06, "loss": 0.636, "step": 23673 }, { "epoch": 0.7255731273752605, "grad_norm": 1.278200954607798, "learning_rate": 1.848302935077878e-06, "loss": 0.6141, "step": 23674 }, { "epoch": 0.7256037758980017, "grad_norm": 1.3072153358786, "learning_rate": 1.8479176493070055e-06, "loss": 0.5376, "step": 23675 }, { "epoch": 0.7256344244207429, "grad_norm": 0.44396783612188795, "learning_rate": 1.8475323945944067e-06, "loss": 0.3825, "step": 23676 }, { "epoch": 0.7256650729434841, "grad_norm": 1.5473850444538533, "learning_rate": 1.847147170943876e-06, "loss": 0.5855, "step": 23677 }, { "epoch": 0.7256957214662253, "grad_norm": 1.207433905917257, "learning_rate": 1.8467619783592112e-06, "loss": 0.5691, "step": 23678 }, { "epoch": 0.7257263699889666, "grad_norm": 1.3485954561356712, "learning_rate": 1.8463768168442091e-06, "loss": 0.5956, "step": 23679 }, { "epoch": 0.7257570185117077, "grad_norm": 1.5199518338386495, "learning_rate": 1.8459916864026611e-06, "loss": 0.5836, "step": 23680 }, { "epoch": 0.725787667034449, "grad_norm": 1.3740169652318475, "learning_rate": 1.8456065870383643e-06, "loss": 0.6274, "step": 23681 }, { "epoch": 0.7258183155571901, "grad_norm": 1.2529509222353745, "learning_rate": 1.8452215187551132e-06, "loss": 0.5819, "step": 23682 }, { "epoch": 0.7258489640799314, "grad_norm": 1.3617944618583024, "learning_rate": 1.8448364815567027e-06, "loss": 0.6853, "step": 23683 }, { "epoch": 0.7258796126026725, "grad_norm": 1.3289428493543722, "learning_rate": 1.844451475446926e-06, "loss": 0.5766, "step": 23684 }, { "epoch": 0.7259102611254138, "grad_norm": 1.3781909898702853, "learning_rate": 1.8440665004295743e-06, "loss": 0.6129, "step": 23685 }, { "epoch": 0.7259409096481549, "grad_norm": 1.3003334539014584, "learning_rate": 1.8436815565084432e-06, "loss": 0.5991, "step": 23686 }, { "epoch": 0.7259715581708962, "grad_norm": 1.3958659860388596, "learning_rate": 1.8432966436873268e-06, "loss": 0.5985, "step": 23687 }, { "epoch": 0.7260022066936374, "grad_norm": 1.3258288580643176, "learning_rate": 1.8429117619700149e-06, "loss": 0.6203, "step": 23688 }, { "epoch": 0.7260328552163786, "grad_norm": 1.266285557954684, "learning_rate": 1.8425269113603005e-06, "loss": 0.5704, "step": 23689 }, { "epoch": 0.7260635037391198, "grad_norm": 1.4155031319685683, "learning_rate": 1.842142091861977e-06, "loss": 0.5438, "step": 23690 }, { "epoch": 0.726094152261861, "grad_norm": 1.2556043098747127, "learning_rate": 1.8417573034788367e-06, "loss": 0.6301, "step": 23691 }, { "epoch": 0.7261248007846022, "grad_norm": 1.4182308129618453, "learning_rate": 1.841372546214668e-06, "loss": 0.5779, "step": 23692 }, { "epoch": 0.7261554493073434, "grad_norm": 1.189125399686553, "learning_rate": 1.8409878200732644e-06, "loss": 0.6243, "step": 23693 }, { "epoch": 0.7261860978300846, "grad_norm": 0.4492513931026815, "learning_rate": 1.8406031250584171e-06, "loss": 0.384, "step": 23694 }, { "epoch": 0.7262167463528258, "grad_norm": 1.537134826792286, "learning_rate": 1.840218461173916e-06, "loss": 0.5771, "step": 23695 }, { "epoch": 0.726247394875567, "grad_norm": 1.511274793754665, "learning_rate": 1.839833828423549e-06, "loss": 0.6454, "step": 23696 }, { "epoch": 0.7262780433983081, "grad_norm": 1.3499137779661965, "learning_rate": 1.8394492268111081e-06, "loss": 0.5598, "step": 23697 }, { "epoch": 0.7263086919210494, "grad_norm": 1.170074436498821, "learning_rate": 1.8390646563403819e-06, "loss": 0.5037, "step": 23698 }, { "epoch": 0.7263393404437906, "grad_norm": 1.4913615252656727, "learning_rate": 1.838680117015163e-06, "loss": 0.6037, "step": 23699 }, { "epoch": 0.7263699889665318, "grad_norm": 1.37457726825483, "learning_rate": 1.8382956088392356e-06, "loss": 0.6166, "step": 23700 }, { "epoch": 0.726400637489273, "grad_norm": 1.2529052775191214, "learning_rate": 1.8379111318163905e-06, "loss": 0.5267, "step": 23701 }, { "epoch": 0.7264312860120142, "grad_norm": 0.46048477722535175, "learning_rate": 1.8375266859504177e-06, "loss": 0.4038, "step": 23702 }, { "epoch": 0.7264619345347554, "grad_norm": 1.4012717315696812, "learning_rate": 1.8371422712451037e-06, "loss": 0.6184, "step": 23703 }, { "epoch": 0.7264925830574966, "grad_norm": 1.3797641719391849, "learning_rate": 1.836757887704233e-06, "loss": 0.6304, "step": 23704 }, { "epoch": 0.7265232315802378, "grad_norm": 1.4344875832874542, "learning_rate": 1.8363735353315991e-06, "loss": 0.6121, "step": 23705 }, { "epoch": 0.726553880102979, "grad_norm": 1.2802577943218791, "learning_rate": 1.835989214130987e-06, "loss": 0.5333, "step": 23706 }, { "epoch": 0.7265845286257202, "grad_norm": 1.1316876363776125, "learning_rate": 1.8356049241061802e-06, "loss": 0.5062, "step": 23707 }, { "epoch": 0.7266151771484615, "grad_norm": 1.2530596607651445, "learning_rate": 1.8352206652609682e-06, "loss": 0.5793, "step": 23708 }, { "epoch": 0.7266458256712026, "grad_norm": 1.3027975754764973, "learning_rate": 1.8348364375991368e-06, "loss": 0.5731, "step": 23709 }, { "epoch": 0.7266764741939439, "grad_norm": 1.3656402402359884, "learning_rate": 1.834452241124473e-06, "loss": 0.6067, "step": 23710 }, { "epoch": 0.726707122716685, "grad_norm": 0.44790509139949763, "learning_rate": 1.8340680758407597e-06, "loss": 0.3835, "step": 23711 }, { "epoch": 0.7267377712394263, "grad_norm": 1.2251673669510081, "learning_rate": 1.8336839417517837e-06, "loss": 0.5729, "step": 23712 }, { "epoch": 0.7267684197621674, "grad_norm": 1.3405203239142514, "learning_rate": 1.833299838861332e-06, "loss": 0.6436, "step": 23713 }, { "epoch": 0.7267990682849087, "grad_norm": 1.441148739166679, "learning_rate": 1.8329157671731873e-06, "loss": 0.6712, "step": 23714 }, { "epoch": 0.7268297168076499, "grad_norm": 1.3518628644237334, "learning_rate": 1.8325317266911297e-06, "loss": 0.6134, "step": 23715 }, { "epoch": 0.7268603653303911, "grad_norm": 0.4374194764863506, "learning_rate": 1.8321477174189518e-06, "loss": 0.3841, "step": 23716 }, { "epoch": 0.7268910138531323, "grad_norm": 1.4600836847127665, "learning_rate": 1.8317637393604304e-06, "loss": 0.5534, "step": 23717 }, { "epoch": 0.7269216623758735, "grad_norm": 1.2259467250935667, "learning_rate": 1.831379792519354e-06, "loss": 0.6305, "step": 23718 }, { "epoch": 0.7269523108986147, "grad_norm": 1.224957327500885, "learning_rate": 1.8309958768995007e-06, "loss": 0.6015, "step": 23719 }, { "epoch": 0.7269829594213559, "grad_norm": 1.4073555351302074, "learning_rate": 1.830611992504656e-06, "loss": 0.696, "step": 23720 }, { "epoch": 0.7270136079440971, "grad_norm": 1.1270954477459227, "learning_rate": 1.8302281393386046e-06, "loss": 0.7161, "step": 23721 }, { "epoch": 0.7270442564668383, "grad_norm": 1.4604910958841315, "learning_rate": 1.829844317405124e-06, "loss": 0.6627, "step": 23722 }, { "epoch": 0.7270749049895795, "grad_norm": 1.2404600524897575, "learning_rate": 1.829460526707999e-06, "loss": 0.5558, "step": 23723 }, { "epoch": 0.7271055535123208, "grad_norm": 1.4220972652098054, "learning_rate": 1.8290767672510117e-06, "loss": 0.6224, "step": 23724 }, { "epoch": 0.7271362020350619, "grad_norm": 0.4433100641416153, "learning_rate": 1.8286930390379416e-06, "loss": 0.3859, "step": 23725 }, { "epoch": 0.7271668505578032, "grad_norm": 1.260455391079721, "learning_rate": 1.8283093420725695e-06, "loss": 0.592, "step": 23726 }, { "epoch": 0.7271974990805443, "grad_norm": 1.2289432624203651, "learning_rate": 1.827925676358679e-06, "loss": 0.7217, "step": 23727 }, { "epoch": 0.7272281476032855, "grad_norm": 1.2791323464919424, "learning_rate": 1.8275420419000466e-06, "loss": 0.593, "step": 23728 }, { "epoch": 0.7272587961260267, "grad_norm": 1.2989463241195698, "learning_rate": 1.8271584387004559e-06, "loss": 0.6459, "step": 23729 }, { "epoch": 0.7272894446487679, "grad_norm": 1.743177112390859, "learning_rate": 1.8267748667636831e-06, "loss": 0.5227, "step": 23730 }, { "epoch": 0.7273200931715091, "grad_norm": 1.490531727579782, "learning_rate": 1.8263913260935102e-06, "loss": 0.6513, "step": 23731 }, { "epoch": 0.7273507416942503, "grad_norm": 1.4462319488158224, "learning_rate": 1.8260078166937161e-06, "loss": 0.6574, "step": 23732 }, { "epoch": 0.7273813902169916, "grad_norm": 1.3420060408252428, "learning_rate": 1.8256243385680782e-06, "loss": 0.5716, "step": 23733 }, { "epoch": 0.7274120387397327, "grad_norm": 1.3092409661430793, "learning_rate": 1.8252408917203756e-06, "loss": 0.6895, "step": 23734 }, { "epoch": 0.727442687262474, "grad_norm": 1.2001070974335168, "learning_rate": 1.8248574761543885e-06, "loss": 0.5185, "step": 23735 }, { "epoch": 0.7274733357852151, "grad_norm": 1.384276818646398, "learning_rate": 1.8244740918738917e-06, "loss": 0.5932, "step": 23736 }, { "epoch": 0.7275039843079564, "grad_norm": 1.3422040192507558, "learning_rate": 1.8240907388826656e-06, "loss": 0.5931, "step": 23737 }, { "epoch": 0.7275346328306975, "grad_norm": 1.3749195535536034, "learning_rate": 1.8237074171844843e-06, "loss": 0.6278, "step": 23738 }, { "epoch": 0.7275652813534388, "grad_norm": 0.47056313397075283, "learning_rate": 1.8233241267831265e-06, "loss": 0.409, "step": 23739 }, { "epoch": 0.7275959298761799, "grad_norm": 1.5168099377504372, "learning_rate": 1.8229408676823707e-06, "loss": 0.5642, "step": 23740 }, { "epoch": 0.7276265783989212, "grad_norm": 0.4421657726938261, "learning_rate": 1.8225576398859896e-06, "loss": 0.3875, "step": 23741 }, { "epoch": 0.7276572269216623, "grad_norm": 1.3224515507455916, "learning_rate": 1.8221744433977612e-06, "loss": 0.5675, "step": 23742 }, { "epoch": 0.7276878754444036, "grad_norm": 1.5278883709853772, "learning_rate": 1.8217912782214625e-06, "loss": 0.6557, "step": 23743 }, { "epoch": 0.7277185239671448, "grad_norm": 1.3647698647931976, "learning_rate": 1.821408144360866e-06, "loss": 0.5769, "step": 23744 }, { "epoch": 0.727749172489886, "grad_norm": 1.2737026778781986, "learning_rate": 1.8210250418197484e-06, "loss": 0.5172, "step": 23745 }, { "epoch": 0.7277798210126272, "grad_norm": 1.287488664137442, "learning_rate": 1.8206419706018857e-06, "loss": 0.6495, "step": 23746 }, { "epoch": 0.7278104695353684, "grad_norm": 2.90183470396849, "learning_rate": 1.8202589307110501e-06, "loss": 0.5723, "step": 23747 }, { "epoch": 0.7278411180581096, "grad_norm": 1.0450579239661488, "learning_rate": 1.8198759221510182e-06, "loss": 0.5338, "step": 23748 }, { "epoch": 0.7278717665808508, "grad_norm": 1.3000671121127176, "learning_rate": 1.8194929449255605e-06, "loss": 0.5827, "step": 23749 }, { "epoch": 0.727902415103592, "grad_norm": 1.2121043270997685, "learning_rate": 1.819109999038453e-06, "loss": 0.6735, "step": 23750 }, { "epoch": 0.7279330636263333, "grad_norm": 1.3818544413090388, "learning_rate": 1.8187270844934702e-06, "loss": 0.6018, "step": 23751 }, { "epoch": 0.7279637121490744, "grad_norm": 0.44904824471923654, "learning_rate": 1.818344201294382e-06, "loss": 0.3934, "step": 23752 }, { "epoch": 0.7279943606718157, "grad_norm": 1.1826741201725561, "learning_rate": 1.8179613494449615e-06, "loss": 0.5625, "step": 23753 }, { "epoch": 0.7280250091945568, "grad_norm": 1.210060311083726, "learning_rate": 1.8175785289489844e-06, "loss": 0.5911, "step": 23754 }, { "epoch": 0.7280556577172981, "grad_norm": 0.43822858446897395, "learning_rate": 1.8171957398102186e-06, "loss": 0.3922, "step": 23755 }, { "epoch": 0.7280863062400392, "grad_norm": 1.3381356701807323, "learning_rate": 1.8168129820324375e-06, "loss": 0.6278, "step": 23756 }, { "epoch": 0.7281169547627805, "grad_norm": 1.0537805688850128, "learning_rate": 1.816430255619414e-06, "loss": 0.5496, "step": 23757 }, { "epoch": 0.7281476032855216, "grad_norm": 0.45832777513190326, "learning_rate": 1.8160475605749166e-06, "loss": 0.3942, "step": 23758 }, { "epoch": 0.7281782518082628, "grad_norm": 1.1899211148262065, "learning_rate": 1.8156648969027186e-06, "loss": 0.55, "step": 23759 }, { "epoch": 0.728208900331004, "grad_norm": 1.4147292225235406, "learning_rate": 1.815282264606587e-06, "loss": 0.6529, "step": 23760 }, { "epoch": 0.7282395488537452, "grad_norm": 1.425734302361413, "learning_rate": 1.814899663690295e-06, "loss": 0.7126, "step": 23761 }, { "epoch": 0.7282701973764865, "grad_norm": 0.44311969774682214, "learning_rate": 1.8145170941576124e-06, "loss": 0.3977, "step": 23762 }, { "epoch": 0.7283008458992276, "grad_norm": 1.454159140787077, "learning_rate": 1.8141345560123065e-06, "loss": 0.6236, "step": 23763 }, { "epoch": 0.7283314944219689, "grad_norm": 1.3042644665141294, "learning_rate": 1.8137520492581478e-06, "loss": 0.7269, "step": 23764 }, { "epoch": 0.72836214294471, "grad_norm": 1.2945573358422908, "learning_rate": 1.8133695738989077e-06, "loss": 0.7181, "step": 23765 }, { "epoch": 0.7283927914674513, "grad_norm": 1.3725625356988105, "learning_rate": 1.81298712993835e-06, "loss": 0.6717, "step": 23766 }, { "epoch": 0.7284234399901924, "grad_norm": 1.2556512579181065, "learning_rate": 1.8126047173802463e-06, "loss": 0.5882, "step": 23767 }, { "epoch": 0.7284540885129337, "grad_norm": 1.3026049345166357, "learning_rate": 1.8122223362283653e-06, "loss": 0.613, "step": 23768 }, { "epoch": 0.7284847370356748, "grad_norm": 1.2573437525682591, "learning_rate": 1.811839986486471e-06, "loss": 0.5178, "step": 23769 }, { "epoch": 0.7285153855584161, "grad_norm": 1.1861496396244389, "learning_rate": 1.8114576681583351e-06, "loss": 0.5978, "step": 23770 }, { "epoch": 0.7285460340811573, "grad_norm": 1.3169841568930947, "learning_rate": 1.8110753812477195e-06, "loss": 0.6037, "step": 23771 }, { "epoch": 0.7285766826038985, "grad_norm": 1.2109552405249446, "learning_rate": 1.8106931257583975e-06, "loss": 0.6339, "step": 23772 }, { "epoch": 0.7286073311266397, "grad_norm": 0.46721556903220424, "learning_rate": 1.8103109016941317e-06, "loss": 0.3965, "step": 23773 }, { "epoch": 0.7286379796493809, "grad_norm": 1.2622663335076856, "learning_rate": 1.8099287090586876e-06, "loss": 0.5601, "step": 23774 }, { "epoch": 0.7286686281721221, "grad_norm": 1.147304486240526, "learning_rate": 1.8095465478558317e-06, "loss": 0.5878, "step": 23775 }, { "epoch": 0.7286992766948633, "grad_norm": 0.4346940642544407, "learning_rate": 1.8091644180893313e-06, "loss": 0.394, "step": 23776 }, { "epoch": 0.7287299252176045, "grad_norm": 1.3223677311978423, "learning_rate": 1.8087823197629495e-06, "loss": 0.5548, "step": 23777 }, { "epoch": 0.7287605737403458, "grad_norm": 1.3070532822463523, "learning_rate": 1.8084002528804518e-06, "loss": 0.5387, "step": 23778 }, { "epoch": 0.7287912222630869, "grad_norm": 1.1849494786755108, "learning_rate": 1.8080182174456024e-06, "loss": 0.565, "step": 23779 }, { "epoch": 0.7288218707858282, "grad_norm": 1.2833852642436152, "learning_rate": 1.8076362134621683e-06, "loss": 0.5329, "step": 23780 }, { "epoch": 0.7288525193085693, "grad_norm": 1.283589711940046, "learning_rate": 1.8072542409339117e-06, "loss": 0.6128, "step": 23781 }, { "epoch": 0.7288831678313106, "grad_norm": 1.2953432188073322, "learning_rate": 1.8068722998645939e-06, "loss": 0.5875, "step": 23782 }, { "epoch": 0.7289138163540517, "grad_norm": 1.4386900715741233, "learning_rate": 1.80649039025798e-06, "loss": 0.7101, "step": 23783 }, { "epoch": 0.728944464876793, "grad_norm": 1.2898774705324831, "learning_rate": 1.8061085121178357e-06, "loss": 0.5729, "step": 23784 }, { "epoch": 0.7289751133995341, "grad_norm": 1.2140353106086088, "learning_rate": 1.8057266654479195e-06, "loss": 0.5939, "step": 23785 }, { "epoch": 0.7290057619222754, "grad_norm": 1.3361177810576599, "learning_rate": 1.8053448502519954e-06, "loss": 0.6343, "step": 23786 }, { "epoch": 0.7290364104450165, "grad_norm": 0.4591619821688812, "learning_rate": 1.804963066533828e-06, "loss": 0.4163, "step": 23787 }, { "epoch": 0.7290670589677578, "grad_norm": 1.2701263407997536, "learning_rate": 1.8045813142971752e-06, "loss": 0.6193, "step": 23788 }, { "epoch": 0.729097707490499, "grad_norm": 1.3409962961510848, "learning_rate": 1.8041995935458023e-06, "loss": 0.5816, "step": 23789 }, { "epoch": 0.7291283560132401, "grad_norm": 1.5779098105458658, "learning_rate": 1.8038179042834648e-06, "loss": 0.6269, "step": 23790 }, { "epoch": 0.7291590045359814, "grad_norm": 1.4649858146774404, "learning_rate": 1.8034362465139304e-06, "loss": 0.6402, "step": 23791 }, { "epoch": 0.7291896530587225, "grad_norm": 1.450627286488387, "learning_rate": 1.803054620240957e-06, "loss": 0.5492, "step": 23792 }, { "epoch": 0.7292203015814638, "grad_norm": 1.2858037744870008, "learning_rate": 1.8026730254683023e-06, "loss": 0.5823, "step": 23793 }, { "epoch": 0.7292509501042049, "grad_norm": 1.4467986204679777, "learning_rate": 1.802291462199729e-06, "loss": 0.6441, "step": 23794 }, { "epoch": 0.7292815986269462, "grad_norm": 1.2720588061112803, "learning_rate": 1.801909930438997e-06, "loss": 0.5495, "step": 23795 }, { "epoch": 0.7293122471496873, "grad_norm": 1.3928457173856772, "learning_rate": 1.8015284301898633e-06, "loss": 0.6606, "step": 23796 }, { "epoch": 0.7293428956724286, "grad_norm": 1.3154726714322678, "learning_rate": 1.8011469614560883e-06, "loss": 0.6016, "step": 23797 }, { "epoch": 0.7293735441951698, "grad_norm": 1.3901993881849788, "learning_rate": 1.8007655242414313e-06, "loss": 0.5697, "step": 23798 }, { "epoch": 0.729404192717911, "grad_norm": 1.4445332407831615, "learning_rate": 1.8003841185496513e-06, "loss": 0.5855, "step": 23799 }, { "epoch": 0.7294348412406522, "grad_norm": 1.2734124526946629, "learning_rate": 1.8000027443845052e-06, "loss": 0.6325, "step": 23800 }, { "epoch": 0.7294654897633934, "grad_norm": 1.439509725668293, "learning_rate": 1.7996214017497477e-06, "loss": 0.6612, "step": 23801 }, { "epoch": 0.7294961382861346, "grad_norm": 1.4591478475556239, "learning_rate": 1.7992400906491426e-06, "loss": 0.6138, "step": 23802 }, { "epoch": 0.7295267868088758, "grad_norm": 1.3924355228246967, "learning_rate": 1.7988588110864436e-06, "loss": 0.6053, "step": 23803 }, { "epoch": 0.729557435331617, "grad_norm": 1.240235962667947, "learning_rate": 1.7984775630654067e-06, "loss": 0.5872, "step": 23804 }, { "epoch": 0.7295880838543582, "grad_norm": 1.3242405242109525, "learning_rate": 1.798096346589789e-06, "loss": 0.6998, "step": 23805 }, { "epoch": 0.7296187323770994, "grad_norm": 1.283136104945478, "learning_rate": 1.7977151616633475e-06, "loss": 0.6561, "step": 23806 }, { "epoch": 0.7296493808998407, "grad_norm": 1.3989255361540507, "learning_rate": 1.7973340082898395e-06, "loss": 0.6308, "step": 23807 }, { "epoch": 0.7296800294225818, "grad_norm": 1.3572688532750283, "learning_rate": 1.7969528864730168e-06, "loss": 0.6019, "step": 23808 }, { "epoch": 0.7297106779453231, "grad_norm": 1.4426238194112335, "learning_rate": 1.7965717962166374e-06, "loss": 0.6169, "step": 23809 }, { "epoch": 0.7297413264680642, "grad_norm": 1.3146585645037476, "learning_rate": 1.7961907375244574e-06, "loss": 0.6268, "step": 23810 }, { "epoch": 0.7297719749908055, "grad_norm": 1.5418313456706023, "learning_rate": 1.7958097104002297e-06, "loss": 0.6291, "step": 23811 }, { "epoch": 0.7298026235135466, "grad_norm": 1.3185990513325447, "learning_rate": 1.795428714847705e-06, "loss": 0.6368, "step": 23812 }, { "epoch": 0.7298332720362879, "grad_norm": 1.263957882756217, "learning_rate": 1.7950477508706448e-06, "loss": 0.5601, "step": 23813 }, { "epoch": 0.729863920559029, "grad_norm": 1.2679260524755585, "learning_rate": 1.7946668184727995e-06, "loss": 0.5934, "step": 23814 }, { "epoch": 0.7298945690817703, "grad_norm": 1.2883944587353846, "learning_rate": 1.7942859176579203e-06, "loss": 0.5863, "step": 23815 }, { "epoch": 0.7299252176045115, "grad_norm": 1.2743499123831357, "learning_rate": 1.7939050484297616e-06, "loss": 0.6021, "step": 23816 }, { "epoch": 0.7299558661272527, "grad_norm": 1.3165936554856998, "learning_rate": 1.7935242107920775e-06, "loss": 0.5237, "step": 23817 }, { "epoch": 0.7299865146499939, "grad_norm": 1.3079977699845502, "learning_rate": 1.7931434047486208e-06, "loss": 0.6871, "step": 23818 }, { "epoch": 0.7300171631727351, "grad_norm": 1.3032085385975853, "learning_rate": 1.7927626303031414e-06, "loss": 0.6178, "step": 23819 }, { "epoch": 0.7300478116954763, "grad_norm": 1.2779530390738982, "learning_rate": 1.7923818874593924e-06, "loss": 0.5437, "step": 23820 }, { "epoch": 0.7300784602182174, "grad_norm": 1.4066287924478202, "learning_rate": 1.792001176221127e-06, "loss": 0.5533, "step": 23821 }, { "epoch": 0.7301091087409587, "grad_norm": 1.3951530467681277, "learning_rate": 1.7916204965920946e-06, "loss": 0.575, "step": 23822 }, { "epoch": 0.7301397572636998, "grad_norm": 1.369579714827884, "learning_rate": 1.791239848576043e-06, "loss": 0.6699, "step": 23823 }, { "epoch": 0.7301704057864411, "grad_norm": 1.466572377201821, "learning_rate": 1.7908592321767298e-06, "loss": 0.586, "step": 23824 }, { "epoch": 0.7302010543091823, "grad_norm": 1.0329414823088006, "learning_rate": 1.7904786473978996e-06, "loss": 0.5234, "step": 23825 }, { "epoch": 0.7302317028319235, "grad_norm": 1.2195402944719818, "learning_rate": 1.7900980942433067e-06, "loss": 0.5502, "step": 23826 }, { "epoch": 0.7302623513546647, "grad_norm": 1.469192485592942, "learning_rate": 1.7897175727166966e-06, "loss": 0.5975, "step": 23827 }, { "epoch": 0.7302929998774059, "grad_norm": 1.3426316470154929, "learning_rate": 1.7893370828218204e-06, "loss": 0.6382, "step": 23828 }, { "epoch": 0.7303236484001471, "grad_norm": 0.4423607688790246, "learning_rate": 1.7889566245624296e-06, "loss": 0.3789, "step": 23829 }, { "epoch": 0.7303542969228883, "grad_norm": 1.4169768824923041, "learning_rate": 1.788576197942269e-06, "loss": 0.5665, "step": 23830 }, { "epoch": 0.7303849454456295, "grad_norm": 0.46001893627552015, "learning_rate": 1.788195802965088e-06, "loss": 0.3966, "step": 23831 }, { "epoch": 0.7304155939683707, "grad_norm": 0.4568187198751718, "learning_rate": 1.787815439634638e-06, "loss": 0.4047, "step": 23832 }, { "epoch": 0.7304462424911119, "grad_norm": 1.327237004368599, "learning_rate": 1.7874351079546642e-06, "loss": 0.6364, "step": 23833 }, { "epoch": 0.7304768910138532, "grad_norm": 1.3048539163665005, "learning_rate": 1.7870548079289123e-06, "loss": 0.6419, "step": 23834 }, { "epoch": 0.7305075395365943, "grad_norm": 1.3951414997780365, "learning_rate": 1.7866745395611318e-06, "loss": 0.6341, "step": 23835 }, { "epoch": 0.7305381880593356, "grad_norm": 1.414084707833192, "learning_rate": 1.7862943028550694e-06, "loss": 0.7, "step": 23836 }, { "epoch": 0.7305688365820767, "grad_norm": 1.2171184739681755, "learning_rate": 1.785914097814473e-06, "loss": 0.545, "step": 23837 }, { "epoch": 0.730599485104818, "grad_norm": 1.3312434679523981, "learning_rate": 1.7855339244430852e-06, "loss": 0.6505, "step": 23838 }, { "epoch": 0.7306301336275591, "grad_norm": 1.2755787008379904, "learning_rate": 1.7851537827446548e-06, "loss": 0.647, "step": 23839 }, { "epoch": 0.7306607821503004, "grad_norm": 1.3039368175874524, "learning_rate": 1.7847736727229276e-06, "loss": 0.6378, "step": 23840 }, { "epoch": 0.7306914306730415, "grad_norm": 1.1473567726006826, "learning_rate": 1.7843935943816488e-06, "loss": 0.5771, "step": 23841 }, { "epoch": 0.7307220791957828, "grad_norm": 1.256895469703077, "learning_rate": 1.784013547724559e-06, "loss": 0.5396, "step": 23842 }, { "epoch": 0.730752727718524, "grad_norm": 0.4532940308008411, "learning_rate": 1.7836335327554099e-06, "loss": 0.3956, "step": 23843 }, { "epoch": 0.7307833762412652, "grad_norm": 1.4873984811099248, "learning_rate": 1.7832535494779408e-06, "loss": 0.5679, "step": 23844 }, { "epoch": 0.7308140247640064, "grad_norm": 1.5021542209994263, "learning_rate": 1.7828735978958995e-06, "loss": 0.5645, "step": 23845 }, { "epoch": 0.7308446732867476, "grad_norm": 1.1512843420841874, "learning_rate": 1.782493678013026e-06, "loss": 0.5422, "step": 23846 }, { "epoch": 0.7308753218094888, "grad_norm": 1.2607821221983762, "learning_rate": 1.7821137898330654e-06, "loss": 0.5741, "step": 23847 }, { "epoch": 0.73090597033223, "grad_norm": 0.45129969209369575, "learning_rate": 1.7817339333597622e-06, "loss": 0.3849, "step": 23848 }, { "epoch": 0.7309366188549712, "grad_norm": 0.47633041978851476, "learning_rate": 1.7813541085968573e-06, "loss": 0.4118, "step": 23849 }, { "epoch": 0.7309672673777124, "grad_norm": 1.2923830221119879, "learning_rate": 1.7809743155480929e-06, "loss": 0.632, "step": 23850 }, { "epoch": 0.7309979159004536, "grad_norm": 1.3833958834220939, "learning_rate": 1.7805945542172143e-06, "loss": 0.6733, "step": 23851 }, { "epoch": 0.7310285644231947, "grad_norm": 1.1911201570288235, "learning_rate": 1.7802148246079597e-06, "loss": 0.5905, "step": 23852 }, { "epoch": 0.731059212945936, "grad_norm": 1.3189163009428437, "learning_rate": 1.7798351267240722e-06, "loss": 0.5254, "step": 23853 }, { "epoch": 0.7310898614686772, "grad_norm": 1.3707771632079593, "learning_rate": 1.779455460569295e-06, "loss": 0.6161, "step": 23854 }, { "epoch": 0.7311205099914184, "grad_norm": 1.3100435206743672, "learning_rate": 1.7790758261473651e-06, "loss": 0.6467, "step": 23855 }, { "epoch": 0.7311511585141596, "grad_norm": 1.444076563559779, "learning_rate": 1.778696223462027e-06, "loss": 0.632, "step": 23856 }, { "epoch": 0.7311818070369008, "grad_norm": 1.2789951849235834, "learning_rate": 1.7783166525170175e-06, "loss": 0.5599, "step": 23857 }, { "epoch": 0.731212455559642, "grad_norm": 1.2916303840664336, "learning_rate": 1.7779371133160784e-06, "loss": 0.5288, "step": 23858 }, { "epoch": 0.7312431040823832, "grad_norm": 0.4509166700633039, "learning_rate": 1.7775576058629512e-06, "loss": 0.3962, "step": 23859 }, { "epoch": 0.7312737526051244, "grad_norm": 1.3349541749935159, "learning_rate": 1.7771781301613716e-06, "loss": 0.6292, "step": 23860 }, { "epoch": 0.7313044011278657, "grad_norm": 1.4669058564911193, "learning_rate": 1.7767986862150805e-06, "loss": 0.5923, "step": 23861 }, { "epoch": 0.7313350496506068, "grad_norm": 1.4631937155180514, "learning_rate": 1.776419274027818e-06, "loss": 0.6072, "step": 23862 }, { "epoch": 0.7313656981733481, "grad_norm": 0.4730563907200288, "learning_rate": 1.7760398936033195e-06, "loss": 0.399, "step": 23863 }, { "epoch": 0.7313963466960892, "grad_norm": 1.2339172107713534, "learning_rate": 1.7756605449453252e-06, "loss": 0.6646, "step": 23864 }, { "epoch": 0.7314269952188305, "grad_norm": 1.3035581075473497, "learning_rate": 1.7752812280575737e-06, "loss": 0.5724, "step": 23865 }, { "epoch": 0.7314576437415716, "grad_norm": 1.6064843564780558, "learning_rate": 1.7749019429438003e-06, "loss": 0.5703, "step": 23866 }, { "epoch": 0.7314882922643129, "grad_norm": 1.4924718837669815, "learning_rate": 1.7745226896077444e-06, "loss": 0.6323, "step": 23867 }, { "epoch": 0.731518940787054, "grad_norm": 0.4396450701897857, "learning_rate": 1.7741434680531405e-06, "loss": 0.4094, "step": 23868 }, { "epoch": 0.7315495893097953, "grad_norm": 1.6222628878670853, "learning_rate": 1.773764278283726e-06, "loss": 0.5827, "step": 23869 }, { "epoch": 0.7315802378325365, "grad_norm": 1.3636031719787376, "learning_rate": 1.7733851203032393e-06, "loss": 0.6093, "step": 23870 }, { "epoch": 0.7316108863552777, "grad_norm": 1.4766059369293774, "learning_rate": 1.7730059941154133e-06, "loss": 0.668, "step": 23871 }, { "epoch": 0.7316415348780189, "grad_norm": 1.3075676651121761, "learning_rate": 1.7726268997239843e-06, "loss": 0.6421, "step": 23872 }, { "epoch": 0.7316721834007601, "grad_norm": 1.4700462849050753, "learning_rate": 1.7722478371326902e-06, "loss": 0.5849, "step": 23873 }, { "epoch": 0.7317028319235013, "grad_norm": 1.3292507780966105, "learning_rate": 1.7718688063452621e-06, "loss": 0.6531, "step": 23874 }, { "epoch": 0.7317334804462425, "grad_norm": 1.3573682389859423, "learning_rate": 1.7714898073654368e-06, "loss": 0.5697, "step": 23875 }, { "epoch": 0.7317641289689837, "grad_norm": 1.2168821449206917, "learning_rate": 1.7711108401969502e-06, "loss": 0.6705, "step": 23876 }, { "epoch": 0.731794777491725, "grad_norm": 0.43316510126939983, "learning_rate": 1.770731904843533e-06, "loss": 0.3963, "step": 23877 }, { "epoch": 0.7318254260144661, "grad_norm": 1.4781289096421797, "learning_rate": 1.7703530013089221e-06, "loss": 0.6456, "step": 23878 }, { "epoch": 0.7318560745372074, "grad_norm": 1.3808023403180198, "learning_rate": 1.7699741295968476e-06, "loss": 0.5223, "step": 23879 }, { "epoch": 0.7318867230599485, "grad_norm": 1.4629089927192056, "learning_rate": 1.7695952897110447e-06, "loss": 0.5763, "step": 23880 }, { "epoch": 0.7319173715826898, "grad_norm": 1.4188623673234935, "learning_rate": 1.7692164816552476e-06, "loss": 0.6182, "step": 23881 }, { "epoch": 0.7319480201054309, "grad_norm": 1.166898879090845, "learning_rate": 1.7688377054331858e-06, "loss": 0.5217, "step": 23882 }, { "epoch": 0.7319786686281721, "grad_norm": 1.466006623795969, "learning_rate": 1.768458961048592e-06, "loss": 0.5618, "step": 23883 }, { "epoch": 0.7320093171509133, "grad_norm": 1.397698683187213, "learning_rate": 1.7680802485052011e-06, "loss": 0.7314, "step": 23884 }, { "epoch": 0.7320399656736545, "grad_norm": 0.44048379913620644, "learning_rate": 1.7677015678067405e-06, "loss": 0.414, "step": 23885 }, { "epoch": 0.7320706141963957, "grad_norm": 1.2549023922383045, "learning_rate": 1.7673229189569451e-06, "loss": 0.6203, "step": 23886 }, { "epoch": 0.7321012627191369, "grad_norm": 1.4327318940517564, "learning_rate": 1.766944301959543e-06, "loss": 0.595, "step": 23887 }, { "epoch": 0.7321319112418782, "grad_norm": 1.3321121674188552, "learning_rate": 1.7665657168182655e-06, "loss": 0.5638, "step": 23888 }, { "epoch": 0.7321625597646193, "grad_norm": 1.2934428579654242, "learning_rate": 1.7661871635368444e-06, "loss": 0.617, "step": 23889 }, { "epoch": 0.7321932082873606, "grad_norm": 1.3334725507751575, "learning_rate": 1.7658086421190074e-06, "loss": 0.5849, "step": 23890 }, { "epoch": 0.7322238568101017, "grad_norm": 1.324844769533885, "learning_rate": 1.7654301525684853e-06, "loss": 0.6284, "step": 23891 }, { "epoch": 0.732254505332843, "grad_norm": 1.3120883995181079, "learning_rate": 1.7650516948890095e-06, "loss": 0.5415, "step": 23892 }, { "epoch": 0.7322851538555841, "grad_norm": 1.531186924847613, "learning_rate": 1.764673269084305e-06, "loss": 0.6446, "step": 23893 }, { "epoch": 0.7323158023783254, "grad_norm": 0.4552954550008431, "learning_rate": 1.7642948751581029e-06, "loss": 0.4026, "step": 23894 }, { "epoch": 0.7323464509010665, "grad_norm": 1.2340981364932913, "learning_rate": 1.7639165131141329e-06, "loss": 0.6678, "step": 23895 }, { "epoch": 0.7323770994238078, "grad_norm": 1.3544232168907033, "learning_rate": 1.7635381829561193e-06, "loss": 0.6652, "step": 23896 }, { "epoch": 0.732407747946549, "grad_norm": 1.4570365905511766, "learning_rate": 1.763159884687794e-06, "loss": 0.5888, "step": 23897 }, { "epoch": 0.7324383964692902, "grad_norm": 0.4314434402918364, "learning_rate": 1.7627816183128793e-06, "loss": 0.3971, "step": 23898 }, { "epoch": 0.7324690449920314, "grad_norm": 1.3459822966167825, "learning_rate": 1.762403383835109e-06, "loss": 0.5563, "step": 23899 }, { "epoch": 0.7324996935147726, "grad_norm": 0.41399833774948847, "learning_rate": 1.7620251812582068e-06, "loss": 0.4112, "step": 23900 }, { "epoch": 0.7325303420375138, "grad_norm": 1.5299968020901775, "learning_rate": 1.7616470105858968e-06, "loss": 0.6964, "step": 23901 }, { "epoch": 0.732560990560255, "grad_norm": 1.264095866165402, "learning_rate": 1.7612688718219072e-06, "loss": 0.6051, "step": 23902 }, { "epoch": 0.7325916390829962, "grad_norm": 1.2954965807920693, "learning_rate": 1.7608907649699663e-06, "loss": 0.6837, "step": 23903 }, { "epoch": 0.7326222876057374, "grad_norm": 0.4381513925001064, "learning_rate": 1.7605126900337953e-06, "loss": 0.3909, "step": 23904 }, { "epoch": 0.7326529361284786, "grad_norm": 1.3351571228358678, "learning_rate": 1.760134647017122e-06, "loss": 0.6282, "step": 23905 }, { "epoch": 0.7326835846512199, "grad_norm": 1.1741703215675783, "learning_rate": 1.7597566359236712e-06, "loss": 0.6537, "step": 23906 }, { "epoch": 0.732714233173961, "grad_norm": 1.4018148060810387, "learning_rate": 1.7593786567571686e-06, "loss": 0.5742, "step": 23907 }, { "epoch": 0.7327448816967023, "grad_norm": 0.4556707687626654, "learning_rate": 1.7590007095213369e-06, "loss": 0.3815, "step": 23908 }, { "epoch": 0.7327755302194434, "grad_norm": 0.4285886273656457, "learning_rate": 1.7586227942198975e-06, "loss": 0.3996, "step": 23909 }, { "epoch": 0.7328061787421847, "grad_norm": 0.45037166387011845, "learning_rate": 1.7582449108565807e-06, "loss": 0.4324, "step": 23910 }, { "epoch": 0.7328368272649258, "grad_norm": 1.428513000893167, "learning_rate": 1.757867059435106e-06, "loss": 0.6148, "step": 23911 }, { "epoch": 0.7328674757876671, "grad_norm": 1.2503280831567938, "learning_rate": 1.7574892399591947e-06, "loss": 0.6036, "step": 23912 }, { "epoch": 0.7328981243104082, "grad_norm": 1.43654088876557, "learning_rate": 1.7571114524325716e-06, "loss": 0.5636, "step": 23913 }, { "epoch": 0.7329287728331494, "grad_norm": 1.414067769168348, "learning_rate": 1.756733696858961e-06, "loss": 0.6296, "step": 23914 }, { "epoch": 0.7329594213558907, "grad_norm": 1.3460557426239192, "learning_rate": 1.7563559732420815e-06, "loss": 0.6028, "step": 23915 }, { "epoch": 0.7329900698786318, "grad_norm": 1.1897093426529088, "learning_rate": 1.7559782815856563e-06, "loss": 0.5364, "step": 23916 }, { "epoch": 0.7330207184013731, "grad_norm": 0.47344009718959246, "learning_rate": 1.7556006218934074e-06, "loss": 0.388, "step": 23917 }, { "epoch": 0.7330513669241142, "grad_norm": 1.4550441585350766, "learning_rate": 1.7552229941690573e-06, "loss": 0.6904, "step": 23918 }, { "epoch": 0.7330820154468555, "grad_norm": 0.4416397761223414, "learning_rate": 1.754845398416325e-06, "loss": 0.3906, "step": 23919 }, { "epoch": 0.7331126639695966, "grad_norm": 1.3123787204626427, "learning_rate": 1.7544678346389283e-06, "loss": 0.5653, "step": 23920 }, { "epoch": 0.7331433124923379, "grad_norm": 0.4508096048935092, "learning_rate": 1.7540903028405936e-06, "loss": 0.4152, "step": 23921 }, { "epoch": 0.733173961015079, "grad_norm": 1.3040554697112423, "learning_rate": 1.7537128030250372e-06, "loss": 0.5601, "step": 23922 }, { "epoch": 0.7332046095378203, "grad_norm": 1.4169280702810043, "learning_rate": 1.7533353351959782e-06, "loss": 0.6453, "step": 23923 }, { "epoch": 0.7332352580605614, "grad_norm": 1.7168915688308697, "learning_rate": 1.7529578993571367e-06, "loss": 0.6491, "step": 23924 }, { "epoch": 0.7332659065833027, "grad_norm": 1.21516774523119, "learning_rate": 1.7525804955122316e-06, "loss": 0.5916, "step": 23925 }, { "epoch": 0.7332965551060439, "grad_norm": 1.373619632648683, "learning_rate": 1.7522031236649833e-06, "loss": 0.6597, "step": 23926 }, { "epoch": 0.7333272036287851, "grad_norm": 1.3419894694963035, "learning_rate": 1.7518257838191073e-06, "loss": 0.633, "step": 23927 }, { "epoch": 0.7333578521515263, "grad_norm": 1.3006053086224003, "learning_rate": 1.7514484759783223e-06, "loss": 0.5716, "step": 23928 }, { "epoch": 0.7333885006742675, "grad_norm": 0.4633548192736223, "learning_rate": 1.7510712001463493e-06, "loss": 0.405, "step": 23929 }, { "epoch": 0.7334191491970087, "grad_norm": 0.44351249353937044, "learning_rate": 1.7506939563269021e-06, "loss": 0.3933, "step": 23930 }, { "epoch": 0.7334497977197499, "grad_norm": 1.2755476445341392, "learning_rate": 1.7503167445236974e-06, "loss": 0.5651, "step": 23931 }, { "epoch": 0.7334804462424911, "grad_norm": 0.4609260568072437, "learning_rate": 1.7499395647404532e-06, "loss": 0.4097, "step": 23932 }, { "epoch": 0.7335110947652324, "grad_norm": 1.2541325086883581, "learning_rate": 1.7495624169808862e-06, "loss": 0.648, "step": 23933 }, { "epoch": 0.7335417432879735, "grad_norm": 1.3339209301294876, "learning_rate": 1.7491853012487141e-06, "loss": 0.5743, "step": 23934 }, { "epoch": 0.7335723918107148, "grad_norm": 1.280826023972813, "learning_rate": 1.7488082175476495e-06, "loss": 0.6043, "step": 23935 }, { "epoch": 0.7336030403334559, "grad_norm": 0.4490062378276497, "learning_rate": 1.748431165881409e-06, "loss": 0.4199, "step": 23936 }, { "epoch": 0.7336336888561972, "grad_norm": 0.4487745468920408, "learning_rate": 1.7480541462537098e-06, "loss": 0.4061, "step": 23937 }, { "epoch": 0.7336643373789383, "grad_norm": 1.2451910257486833, "learning_rate": 1.7476771586682655e-06, "loss": 0.5327, "step": 23938 }, { "epoch": 0.7336949859016796, "grad_norm": 0.44566046472077075, "learning_rate": 1.7473002031287867e-06, "loss": 0.4107, "step": 23939 }, { "epoch": 0.7337256344244207, "grad_norm": 1.3384076821674382, "learning_rate": 1.7469232796389945e-06, "loss": 0.5472, "step": 23940 }, { "epoch": 0.733756282947162, "grad_norm": 1.4083105461273002, "learning_rate": 1.7465463882025995e-06, "loss": 0.629, "step": 23941 }, { "epoch": 0.7337869314699031, "grad_norm": 0.42706364927178025, "learning_rate": 1.7461695288233138e-06, "loss": 0.3798, "step": 23942 }, { "epoch": 0.7338175799926444, "grad_norm": 0.4263440396833441, "learning_rate": 1.7457927015048526e-06, "loss": 0.397, "step": 23943 }, { "epoch": 0.7338482285153856, "grad_norm": 1.3830631977402994, "learning_rate": 1.7454159062509286e-06, "loss": 0.5707, "step": 23944 }, { "epoch": 0.7338788770381267, "grad_norm": 1.4191904793892043, "learning_rate": 1.7450391430652552e-06, "loss": 0.6671, "step": 23945 }, { "epoch": 0.733909525560868, "grad_norm": 1.423007257130298, "learning_rate": 1.7446624119515432e-06, "loss": 0.6907, "step": 23946 }, { "epoch": 0.7339401740836091, "grad_norm": 1.2158757961888222, "learning_rate": 1.744285712913505e-06, "loss": 0.4959, "step": 23947 }, { "epoch": 0.7339708226063504, "grad_norm": 1.268221886048921, "learning_rate": 1.7439090459548541e-06, "loss": 0.5344, "step": 23948 }, { "epoch": 0.7340014711290915, "grad_norm": 1.3896114394094807, "learning_rate": 1.7435324110793006e-06, "loss": 0.6337, "step": 23949 }, { "epoch": 0.7340321196518328, "grad_norm": 1.3725267433797594, "learning_rate": 1.7431558082905525e-06, "loss": 0.6366, "step": 23950 }, { "epoch": 0.7340627681745739, "grad_norm": 1.3391325918181949, "learning_rate": 1.7427792375923264e-06, "loss": 0.6489, "step": 23951 }, { "epoch": 0.7340934166973152, "grad_norm": 1.4091669671047689, "learning_rate": 1.7424026989883285e-06, "loss": 0.6356, "step": 23952 }, { "epoch": 0.7341240652200564, "grad_norm": 0.4617562303461587, "learning_rate": 1.7420261924822717e-06, "loss": 0.3969, "step": 23953 }, { "epoch": 0.7341547137427976, "grad_norm": 1.4566573679975308, "learning_rate": 1.741649718077863e-06, "loss": 0.6381, "step": 23954 }, { "epoch": 0.7341853622655388, "grad_norm": 1.4772449697610912, "learning_rate": 1.741273275778813e-06, "loss": 0.5717, "step": 23955 }, { "epoch": 0.73421601078828, "grad_norm": 1.2402259644987634, "learning_rate": 1.740896865588833e-06, "loss": 0.5353, "step": 23956 }, { "epoch": 0.7342466593110212, "grad_norm": 0.4753120914995345, "learning_rate": 1.7405204875116289e-06, "loss": 0.4238, "step": 23957 }, { "epoch": 0.7342773078337624, "grad_norm": 0.4350515321547527, "learning_rate": 1.7401441415509096e-06, "loss": 0.3975, "step": 23958 }, { "epoch": 0.7343079563565036, "grad_norm": 1.2487931409952486, "learning_rate": 1.7397678277103863e-06, "loss": 0.6609, "step": 23959 }, { "epoch": 0.7343386048792448, "grad_norm": 1.2867879772829216, "learning_rate": 1.7393915459937631e-06, "loss": 0.5301, "step": 23960 }, { "epoch": 0.734369253401986, "grad_norm": 1.1312830377557537, "learning_rate": 1.7390152964047492e-06, "loss": 0.4894, "step": 23961 }, { "epoch": 0.7343999019247273, "grad_norm": 1.367218596545985, "learning_rate": 1.7386390789470536e-06, "loss": 0.6039, "step": 23962 }, { "epoch": 0.7344305504474684, "grad_norm": 1.3547226802991554, "learning_rate": 1.73826289362438e-06, "loss": 0.5993, "step": 23963 }, { "epoch": 0.7344611989702097, "grad_norm": 1.5159562355807723, "learning_rate": 1.7378867404404382e-06, "loss": 0.6205, "step": 23964 }, { "epoch": 0.7344918474929508, "grad_norm": 1.6411697162795238, "learning_rate": 1.737510619398931e-06, "loss": 0.6555, "step": 23965 }, { "epoch": 0.7345224960156921, "grad_norm": 1.1864213240327846, "learning_rate": 1.737134530503567e-06, "loss": 0.5802, "step": 23966 }, { "epoch": 0.7345531445384332, "grad_norm": 1.4228426056330379, "learning_rate": 1.7367584737580528e-06, "loss": 0.6376, "step": 23967 }, { "epoch": 0.7345837930611745, "grad_norm": 1.4332162693522545, "learning_rate": 1.7363824491660902e-06, "loss": 0.622, "step": 23968 }, { "epoch": 0.7346144415839156, "grad_norm": 1.3621136917873078, "learning_rate": 1.7360064567313866e-06, "loss": 0.6023, "step": 23969 }, { "epoch": 0.7346450901066569, "grad_norm": 1.2940382476531567, "learning_rate": 1.7356304964576488e-06, "loss": 0.5297, "step": 23970 }, { "epoch": 0.7346757386293981, "grad_norm": 1.3017749745308305, "learning_rate": 1.7352545683485766e-06, "loss": 0.6015, "step": 23971 }, { "epoch": 0.7347063871521393, "grad_norm": 1.42896535446624, "learning_rate": 1.7348786724078765e-06, "loss": 0.6418, "step": 23972 }, { "epoch": 0.7347370356748805, "grad_norm": 1.414326498435274, "learning_rate": 1.734502808639254e-06, "loss": 0.5953, "step": 23973 }, { "epoch": 0.7347676841976217, "grad_norm": 1.3155022509710828, "learning_rate": 1.7341269770464091e-06, "loss": 0.6775, "step": 23974 }, { "epoch": 0.7347983327203629, "grad_norm": 0.4518501773483574, "learning_rate": 1.733751177633049e-06, "loss": 0.3907, "step": 23975 }, { "epoch": 0.734828981243104, "grad_norm": 0.443668881759025, "learning_rate": 1.7333754104028721e-06, "loss": 0.3832, "step": 23976 }, { "epoch": 0.7348596297658453, "grad_norm": 1.2744047152506472, "learning_rate": 1.732999675359583e-06, "loss": 0.5598, "step": 23977 }, { "epoch": 0.7348902782885864, "grad_norm": 1.3928539853507282, "learning_rate": 1.7326239725068856e-06, "loss": 0.6439, "step": 23978 }, { "epoch": 0.7349209268113277, "grad_norm": 0.4618069117036211, "learning_rate": 1.7322483018484787e-06, "loss": 0.3909, "step": 23979 }, { "epoch": 0.7349515753340689, "grad_norm": 1.2965702605931884, "learning_rate": 1.7318726633880655e-06, "loss": 0.6224, "step": 23980 }, { "epoch": 0.7349822238568101, "grad_norm": 1.3955229578236692, "learning_rate": 1.7314970571293488e-06, "loss": 0.6527, "step": 23981 }, { "epoch": 0.7350128723795513, "grad_norm": 0.45775846282369537, "learning_rate": 1.7311214830760258e-06, "loss": 0.3976, "step": 23982 }, { "epoch": 0.7350435209022925, "grad_norm": 1.1634434876518298, "learning_rate": 1.7307459412318013e-06, "loss": 0.5642, "step": 23983 }, { "epoch": 0.7350741694250337, "grad_norm": 1.147597814756986, "learning_rate": 1.7303704316003716e-06, "loss": 0.6702, "step": 23984 }, { "epoch": 0.7351048179477749, "grad_norm": 1.704180886513894, "learning_rate": 1.7299949541854382e-06, "loss": 0.7594, "step": 23985 }, { "epoch": 0.7351354664705161, "grad_norm": 1.2055642812020944, "learning_rate": 1.7296195089907037e-06, "loss": 0.531, "step": 23986 }, { "epoch": 0.7351661149932573, "grad_norm": 0.4299291111191444, "learning_rate": 1.7292440960198631e-06, "loss": 0.3752, "step": 23987 }, { "epoch": 0.7351967635159985, "grad_norm": 1.3217529882124568, "learning_rate": 1.728868715276617e-06, "loss": 0.7117, "step": 23988 }, { "epoch": 0.7352274120387398, "grad_norm": 1.312933933406547, "learning_rate": 1.728493366764666e-06, "loss": 0.5603, "step": 23989 }, { "epoch": 0.7352580605614809, "grad_norm": 1.5489057612600503, "learning_rate": 1.7281180504877053e-06, "loss": 0.646, "step": 23990 }, { "epoch": 0.7352887090842222, "grad_norm": 1.2877777864988855, "learning_rate": 1.7277427664494352e-06, "loss": 0.6187, "step": 23991 }, { "epoch": 0.7353193576069633, "grad_norm": 0.4567007223154566, "learning_rate": 1.7273675146535535e-06, "loss": 0.4069, "step": 23992 }, { "epoch": 0.7353500061297046, "grad_norm": 1.217565584552609, "learning_rate": 1.726992295103756e-06, "loss": 0.4912, "step": 23993 }, { "epoch": 0.7353806546524457, "grad_norm": 0.45287046944865367, "learning_rate": 1.7266171078037424e-06, "loss": 0.4026, "step": 23994 }, { "epoch": 0.735411303175187, "grad_norm": 1.3850276725089388, "learning_rate": 1.7262419527572062e-06, "loss": 0.5621, "step": 23995 }, { "epoch": 0.7354419516979281, "grad_norm": 0.43936828291258057, "learning_rate": 1.725866829967846e-06, "loss": 0.3984, "step": 23996 }, { "epoch": 0.7354726002206694, "grad_norm": 0.4647286967787241, "learning_rate": 1.7254917394393588e-06, "loss": 0.4058, "step": 23997 }, { "epoch": 0.7355032487434106, "grad_norm": 1.3042128852311776, "learning_rate": 1.7251166811754384e-06, "loss": 0.7014, "step": 23998 }, { "epoch": 0.7355338972661518, "grad_norm": 1.3136515360162797, "learning_rate": 1.7247416551797802e-06, "loss": 0.5996, "step": 23999 }, { "epoch": 0.735564545788893, "grad_norm": 1.4169201052854294, "learning_rate": 1.7243666614560828e-06, "loss": 0.6899, "step": 24000 }, { "epoch": 0.7355951943116342, "grad_norm": 1.2704972368448917, "learning_rate": 1.723991700008037e-06, "loss": 0.5318, "step": 24001 }, { "epoch": 0.7356258428343754, "grad_norm": 1.51804106880857, "learning_rate": 1.7236167708393393e-06, "loss": 0.6348, "step": 24002 }, { "epoch": 0.7356564913571166, "grad_norm": 0.44644828435005607, "learning_rate": 1.7232418739536854e-06, "loss": 0.3761, "step": 24003 }, { "epoch": 0.7356871398798578, "grad_norm": 0.4587545172205438, "learning_rate": 1.7228670093547661e-06, "loss": 0.375, "step": 24004 }, { "epoch": 0.735717788402599, "grad_norm": 1.4913834103934904, "learning_rate": 1.7224921770462782e-06, "loss": 0.599, "step": 24005 }, { "epoch": 0.7357484369253402, "grad_norm": 1.1962137750516604, "learning_rate": 1.7221173770319105e-06, "loss": 0.5109, "step": 24006 }, { "epoch": 0.7357790854480813, "grad_norm": 0.45717636733261613, "learning_rate": 1.7217426093153623e-06, "loss": 0.3738, "step": 24007 }, { "epoch": 0.7358097339708226, "grad_norm": 1.1622714207786495, "learning_rate": 1.7213678739003225e-06, "loss": 0.5339, "step": 24008 }, { "epoch": 0.7358403824935638, "grad_norm": 1.279281887691701, "learning_rate": 1.7209931707904826e-06, "loss": 0.5482, "step": 24009 }, { "epoch": 0.735871031016305, "grad_norm": 1.7446961131612013, "learning_rate": 1.7206184999895354e-06, "loss": 0.59, "step": 24010 }, { "epoch": 0.7359016795390462, "grad_norm": 1.2955806475097473, "learning_rate": 1.7202438615011757e-06, "loss": 0.5915, "step": 24011 }, { "epoch": 0.7359323280617874, "grad_norm": 1.4445729505557683, "learning_rate": 1.7198692553290903e-06, "loss": 0.5949, "step": 24012 }, { "epoch": 0.7359629765845286, "grad_norm": 1.5390244836260656, "learning_rate": 1.719494681476972e-06, "loss": 0.6632, "step": 24013 }, { "epoch": 0.7359936251072698, "grad_norm": 1.4666436603556876, "learning_rate": 1.7191201399485141e-06, "loss": 0.5334, "step": 24014 }, { "epoch": 0.736024273630011, "grad_norm": 1.358965295738611, "learning_rate": 1.7187456307474031e-06, "loss": 0.6659, "step": 24015 }, { "epoch": 0.7360549221527523, "grad_norm": 1.5792166815978501, "learning_rate": 1.7183711538773328e-06, "loss": 0.6289, "step": 24016 }, { "epoch": 0.7360855706754934, "grad_norm": 1.2571449811362756, "learning_rate": 1.7179967093419876e-06, "loss": 0.5783, "step": 24017 }, { "epoch": 0.7361162191982347, "grad_norm": 1.5190389355094878, "learning_rate": 1.717622297145064e-06, "loss": 0.6112, "step": 24018 }, { "epoch": 0.7361468677209758, "grad_norm": 1.2521126793217454, "learning_rate": 1.7172479172902474e-06, "loss": 0.557, "step": 24019 }, { "epoch": 0.7361775162437171, "grad_norm": 1.2677736257258219, "learning_rate": 1.7168735697812254e-06, "loss": 0.6468, "step": 24020 }, { "epoch": 0.7362081647664582, "grad_norm": 1.1668463825541042, "learning_rate": 1.7164992546216886e-06, "loss": 0.6204, "step": 24021 }, { "epoch": 0.7362388132891995, "grad_norm": 1.558690460072395, "learning_rate": 1.7161249718153266e-06, "loss": 0.596, "step": 24022 }, { "epoch": 0.7362694618119406, "grad_norm": 1.402141349978269, "learning_rate": 1.7157507213658232e-06, "loss": 0.5284, "step": 24023 }, { "epoch": 0.7363001103346819, "grad_norm": 1.1976799919663987, "learning_rate": 1.7153765032768683e-06, "loss": 0.608, "step": 24024 }, { "epoch": 0.736330758857423, "grad_norm": 1.4337180990374165, "learning_rate": 1.7150023175521496e-06, "loss": 0.6273, "step": 24025 }, { "epoch": 0.7363614073801643, "grad_norm": 0.4231557028288453, "learning_rate": 1.714628164195355e-06, "loss": 0.3917, "step": 24026 }, { "epoch": 0.7363920559029055, "grad_norm": 1.3433099758954334, "learning_rate": 1.7142540432101695e-06, "loss": 0.5962, "step": 24027 }, { "epoch": 0.7364227044256467, "grad_norm": 1.4122724538856557, "learning_rate": 1.7138799546002776e-06, "loss": 0.5824, "step": 24028 }, { "epoch": 0.7364533529483879, "grad_norm": 1.3981671342808506, "learning_rate": 1.7135058983693682e-06, "loss": 0.4765, "step": 24029 }, { "epoch": 0.7364840014711291, "grad_norm": 1.2409000187259214, "learning_rate": 1.7131318745211272e-06, "loss": 0.5566, "step": 24030 }, { "epoch": 0.7365146499938703, "grad_norm": 1.2624753823288644, "learning_rate": 1.7127578830592374e-06, "loss": 0.6108, "step": 24031 }, { "epoch": 0.7365452985166115, "grad_norm": 1.474824004027792, "learning_rate": 1.7123839239873845e-06, "loss": 0.6637, "step": 24032 }, { "epoch": 0.7365759470393527, "grad_norm": 1.438842222416612, "learning_rate": 1.7120099973092551e-06, "loss": 0.5633, "step": 24033 }, { "epoch": 0.736606595562094, "grad_norm": 1.3227751574612718, "learning_rate": 1.7116361030285334e-06, "loss": 0.6169, "step": 24034 }, { "epoch": 0.7366372440848351, "grad_norm": 0.45533516869811463, "learning_rate": 1.7112622411489026e-06, "loss": 0.3845, "step": 24035 }, { "epoch": 0.7366678926075764, "grad_norm": 1.463013848270938, "learning_rate": 1.7108884116740432e-06, "loss": 0.6334, "step": 24036 }, { "epoch": 0.7366985411303175, "grad_norm": 1.420700154049398, "learning_rate": 1.7105146146076452e-06, "loss": 0.5308, "step": 24037 }, { "epoch": 0.7367291896530587, "grad_norm": 1.2968863833952882, "learning_rate": 1.7101408499533883e-06, "loss": 0.5164, "step": 24038 }, { "epoch": 0.7367598381757999, "grad_norm": 1.4576200648984678, "learning_rate": 1.7097671177149538e-06, "loss": 0.5925, "step": 24039 }, { "epoch": 0.7367904866985411, "grad_norm": 1.375771568860517, "learning_rate": 1.7093934178960258e-06, "loss": 0.6249, "step": 24040 }, { "epoch": 0.7368211352212823, "grad_norm": 1.2904050832333602, "learning_rate": 1.7090197505002877e-06, "loss": 0.5998, "step": 24041 }, { "epoch": 0.7368517837440235, "grad_norm": 1.3707052354849745, "learning_rate": 1.7086461155314189e-06, "loss": 0.5904, "step": 24042 }, { "epoch": 0.7368824322667648, "grad_norm": 1.3808821231389343, "learning_rate": 1.7082725129931015e-06, "loss": 0.6295, "step": 24043 }, { "epoch": 0.7369130807895059, "grad_norm": 1.5026993012560392, "learning_rate": 1.7078989428890176e-06, "loss": 0.5483, "step": 24044 }, { "epoch": 0.7369437293122472, "grad_norm": 1.2568392053307704, "learning_rate": 1.707525405222849e-06, "loss": 0.5477, "step": 24045 }, { "epoch": 0.7369743778349883, "grad_norm": 1.5149694228227872, "learning_rate": 1.7071518999982756e-06, "loss": 0.6455, "step": 24046 }, { "epoch": 0.7370050263577296, "grad_norm": 1.4134104095178233, "learning_rate": 1.706778427218973e-06, "loss": 0.5968, "step": 24047 }, { "epoch": 0.7370356748804707, "grad_norm": 1.2855664568632, "learning_rate": 1.706404986888629e-06, "loss": 0.6261, "step": 24048 }, { "epoch": 0.737066323403212, "grad_norm": 1.3590122916150078, "learning_rate": 1.7060315790109195e-06, "loss": 0.5097, "step": 24049 }, { "epoch": 0.7370969719259531, "grad_norm": 1.3197868559074597, "learning_rate": 1.7056582035895213e-06, "loss": 0.573, "step": 24050 }, { "epoch": 0.7371276204486944, "grad_norm": 1.3137177681187169, "learning_rate": 1.7052848606281164e-06, "loss": 0.5905, "step": 24051 }, { "epoch": 0.7371582689714355, "grad_norm": 1.568442647485071, "learning_rate": 1.7049115501303827e-06, "loss": 0.7137, "step": 24052 }, { "epoch": 0.7371889174941768, "grad_norm": 1.2971524013482418, "learning_rate": 1.7045382720999997e-06, "loss": 0.5538, "step": 24053 }, { "epoch": 0.737219566016918, "grad_norm": 1.3652947708711136, "learning_rate": 1.7041650265406428e-06, "loss": 0.5966, "step": 24054 }, { "epoch": 0.7372502145396592, "grad_norm": 1.3390680896484015, "learning_rate": 1.7037918134559917e-06, "loss": 0.5378, "step": 24055 }, { "epoch": 0.7372808630624004, "grad_norm": 0.43545526935065876, "learning_rate": 1.7034186328497243e-06, "loss": 0.3898, "step": 24056 }, { "epoch": 0.7373115115851416, "grad_norm": 1.4027177713206629, "learning_rate": 1.7030454847255168e-06, "loss": 0.4442, "step": 24057 }, { "epoch": 0.7373421601078828, "grad_norm": 0.4462661161902056, "learning_rate": 1.7026723690870422e-06, "loss": 0.3798, "step": 24058 }, { "epoch": 0.737372808630624, "grad_norm": 1.1227835646432036, "learning_rate": 1.7022992859379844e-06, "loss": 0.4569, "step": 24059 }, { "epoch": 0.7374034571533652, "grad_norm": 0.42381136709883305, "learning_rate": 1.7019262352820132e-06, "loss": 0.383, "step": 24060 }, { "epoch": 0.7374341056761065, "grad_norm": 1.487038525847736, "learning_rate": 1.7015532171228083e-06, "loss": 0.7179, "step": 24061 }, { "epoch": 0.7374647541988476, "grad_norm": 1.227107729260864, "learning_rate": 1.7011802314640418e-06, "loss": 0.5655, "step": 24062 }, { "epoch": 0.7374954027215889, "grad_norm": 0.45247733473705737, "learning_rate": 1.7008072783093909e-06, "loss": 0.3662, "step": 24063 }, { "epoch": 0.73752605124433, "grad_norm": 1.4555327473171942, "learning_rate": 1.7004343576625315e-06, "loss": 0.6269, "step": 24064 }, { "epoch": 0.7375566997670713, "grad_norm": 1.1865614721276951, "learning_rate": 1.700061469527135e-06, "loss": 0.5477, "step": 24065 }, { "epoch": 0.7375873482898124, "grad_norm": 1.4180266672666237, "learning_rate": 1.699688613906877e-06, "loss": 0.6536, "step": 24066 }, { "epoch": 0.7376179968125537, "grad_norm": 1.4322943839644202, "learning_rate": 1.6993157908054335e-06, "loss": 0.5464, "step": 24067 }, { "epoch": 0.7376486453352948, "grad_norm": 0.44398683076754925, "learning_rate": 1.6989430002264757e-06, "loss": 0.4028, "step": 24068 }, { "epoch": 0.737679293858036, "grad_norm": 1.2226045124478304, "learning_rate": 1.698570242173674e-06, "loss": 0.5749, "step": 24069 }, { "epoch": 0.7377099423807773, "grad_norm": 1.3675716569628686, "learning_rate": 1.6981975166507076e-06, "loss": 0.5344, "step": 24070 }, { "epoch": 0.7377405909035184, "grad_norm": 1.3768289571379135, "learning_rate": 1.6978248236612443e-06, "loss": 0.6436, "step": 24071 }, { "epoch": 0.7377712394262597, "grad_norm": 0.42699811683555194, "learning_rate": 1.6974521632089597e-06, "loss": 0.3782, "step": 24072 }, { "epoch": 0.7378018879490008, "grad_norm": 1.2390291351377838, "learning_rate": 1.6970795352975216e-06, "loss": 0.6356, "step": 24073 }, { "epoch": 0.7378325364717421, "grad_norm": 1.4944212534162566, "learning_rate": 1.6967069399306047e-06, "loss": 0.6725, "step": 24074 }, { "epoch": 0.7378631849944832, "grad_norm": 0.43093402518435364, "learning_rate": 1.6963343771118806e-06, "loss": 0.4061, "step": 24075 }, { "epoch": 0.7378938335172245, "grad_norm": 1.264026875398148, "learning_rate": 1.6959618468450179e-06, "loss": 0.5742, "step": 24076 }, { "epoch": 0.7379244820399656, "grad_norm": 1.3863142558864758, "learning_rate": 1.6955893491336884e-06, "loss": 0.5989, "step": 24077 }, { "epoch": 0.7379551305627069, "grad_norm": 4.880046746329616, "learning_rate": 1.695216883981564e-06, "loss": 0.6558, "step": 24078 }, { "epoch": 0.737985779085448, "grad_norm": 1.258400764982674, "learning_rate": 1.6948444513923118e-06, "loss": 0.5975, "step": 24079 }, { "epoch": 0.7380164276081893, "grad_norm": 1.4673941293216062, "learning_rate": 1.6944720513696045e-06, "loss": 0.5189, "step": 24080 }, { "epoch": 0.7380470761309305, "grad_norm": 0.43811870736319997, "learning_rate": 1.694099683917108e-06, "loss": 0.374, "step": 24081 }, { "epoch": 0.7380777246536717, "grad_norm": 1.3109968887918244, "learning_rate": 1.6937273490384936e-06, "loss": 0.6078, "step": 24082 }, { "epoch": 0.7381083731764129, "grad_norm": 1.4284572347458286, "learning_rate": 1.693355046737431e-06, "loss": 0.6751, "step": 24083 }, { "epoch": 0.7381390216991541, "grad_norm": 1.2915520688737425, "learning_rate": 1.6929827770175849e-06, "loss": 0.527, "step": 24084 }, { "epoch": 0.7381696702218953, "grad_norm": 1.3430834460076018, "learning_rate": 1.6926105398826264e-06, "loss": 0.5824, "step": 24085 }, { "epoch": 0.7382003187446365, "grad_norm": 1.346906146381198, "learning_rate": 1.6922383353362237e-06, "loss": 0.5867, "step": 24086 }, { "epoch": 0.7382309672673777, "grad_norm": 1.3245599451669259, "learning_rate": 1.6918661633820415e-06, "loss": 0.5957, "step": 24087 }, { "epoch": 0.738261615790119, "grad_norm": 1.3960279320250581, "learning_rate": 1.6914940240237486e-06, "loss": 0.5619, "step": 24088 }, { "epoch": 0.7382922643128601, "grad_norm": 1.3622299231106922, "learning_rate": 1.6911219172650133e-06, "loss": 0.6126, "step": 24089 }, { "epoch": 0.7383229128356014, "grad_norm": 1.3047827177908111, "learning_rate": 1.690749843109498e-06, "loss": 0.6093, "step": 24090 }, { "epoch": 0.7383535613583425, "grad_norm": 1.3167647307733459, "learning_rate": 1.690377801560874e-06, "loss": 0.6515, "step": 24091 }, { "epoch": 0.7383842098810838, "grad_norm": 1.171391912495439, "learning_rate": 1.690005792622802e-06, "loss": 0.6215, "step": 24092 }, { "epoch": 0.7384148584038249, "grad_norm": 1.2642614953395837, "learning_rate": 1.6896338162989494e-06, "loss": 0.5698, "step": 24093 }, { "epoch": 0.7384455069265662, "grad_norm": 1.1546519065440752, "learning_rate": 1.6892618725929843e-06, "loss": 0.6786, "step": 24094 }, { "epoch": 0.7384761554493073, "grad_norm": 1.3025386988637941, "learning_rate": 1.6888899615085668e-06, "loss": 0.5574, "step": 24095 }, { "epoch": 0.7385068039720486, "grad_norm": 0.4427519711475595, "learning_rate": 1.688518083049364e-06, "loss": 0.4333, "step": 24096 }, { "epoch": 0.7385374524947897, "grad_norm": 1.2373026297184955, "learning_rate": 1.6881462372190415e-06, "loss": 0.5862, "step": 24097 }, { "epoch": 0.738568101017531, "grad_norm": 1.4402813147559228, "learning_rate": 1.6877744240212596e-06, "loss": 0.594, "step": 24098 }, { "epoch": 0.7385987495402722, "grad_norm": 1.6473468935367959, "learning_rate": 1.687402643459684e-06, "loss": 0.6739, "step": 24099 }, { "epoch": 0.7386293980630133, "grad_norm": 1.2412127969746016, "learning_rate": 1.6870308955379795e-06, "loss": 0.6253, "step": 24100 }, { "epoch": 0.7386600465857546, "grad_norm": 1.2761763166545403, "learning_rate": 1.6866591802598054e-06, "loss": 0.6801, "step": 24101 }, { "epoch": 0.7386906951084957, "grad_norm": 1.373010945909985, "learning_rate": 1.6862874976288274e-06, "loss": 0.5988, "step": 24102 }, { "epoch": 0.738721343631237, "grad_norm": 1.3938434589948716, "learning_rate": 1.6859158476487053e-06, "loss": 0.6444, "step": 24103 }, { "epoch": 0.7387519921539781, "grad_norm": 1.1459999514599124, "learning_rate": 1.6855442303231023e-06, "loss": 0.6641, "step": 24104 }, { "epoch": 0.7387826406767194, "grad_norm": 1.1919085836478005, "learning_rate": 1.6851726456556816e-06, "loss": 0.5877, "step": 24105 }, { "epoch": 0.7388132891994605, "grad_norm": 0.4280756145500042, "learning_rate": 1.6848010936501014e-06, "loss": 0.3889, "step": 24106 }, { "epoch": 0.7388439377222018, "grad_norm": 1.2591445953423022, "learning_rate": 1.6844295743100243e-06, "loss": 0.6299, "step": 24107 }, { "epoch": 0.738874586244943, "grad_norm": 1.3128549760991446, "learning_rate": 1.6840580876391126e-06, "loss": 0.6089, "step": 24108 }, { "epoch": 0.7389052347676842, "grad_norm": 1.406217418014713, "learning_rate": 1.6836866336410229e-06, "loss": 0.5577, "step": 24109 }, { "epoch": 0.7389358832904254, "grad_norm": 1.3381395528486253, "learning_rate": 1.683315212319418e-06, "loss": 0.6161, "step": 24110 }, { "epoch": 0.7389665318131666, "grad_norm": 1.2758899680389413, "learning_rate": 1.6829438236779582e-06, "loss": 0.5695, "step": 24111 }, { "epoch": 0.7389971803359078, "grad_norm": 0.48313361889993955, "learning_rate": 1.6825724677202998e-06, "loss": 0.4044, "step": 24112 }, { "epoch": 0.739027828858649, "grad_norm": 1.4049641293592416, "learning_rate": 1.6822011444501058e-06, "loss": 0.6244, "step": 24113 }, { "epoch": 0.7390584773813902, "grad_norm": 1.3362718496765367, "learning_rate": 1.6818298538710287e-06, "loss": 0.6142, "step": 24114 }, { "epoch": 0.7390891259041314, "grad_norm": 1.1933605243680694, "learning_rate": 1.6814585959867353e-06, "loss": 0.5446, "step": 24115 }, { "epoch": 0.7391197744268726, "grad_norm": 1.3276135607098123, "learning_rate": 1.681087370800879e-06, "loss": 0.6526, "step": 24116 }, { "epoch": 0.7391504229496139, "grad_norm": 1.283564541640912, "learning_rate": 1.680716178317116e-06, "loss": 0.637, "step": 24117 }, { "epoch": 0.739181071472355, "grad_norm": 1.2764914146874262, "learning_rate": 1.6803450185391063e-06, "loss": 0.5562, "step": 24118 }, { "epoch": 0.7392117199950963, "grad_norm": 1.2328530078063944, "learning_rate": 1.6799738914705078e-06, "loss": 0.5282, "step": 24119 }, { "epoch": 0.7392423685178374, "grad_norm": 1.3627775283381436, "learning_rate": 1.6796027971149748e-06, "loss": 0.6746, "step": 24120 }, { "epoch": 0.7392730170405787, "grad_norm": 1.2343718062291862, "learning_rate": 1.6792317354761644e-06, "loss": 0.5704, "step": 24121 }, { "epoch": 0.7393036655633198, "grad_norm": 1.2597910841022184, "learning_rate": 1.6788607065577355e-06, "loss": 0.6, "step": 24122 }, { "epoch": 0.7393343140860611, "grad_norm": 1.459873749057876, "learning_rate": 1.6784897103633401e-06, "loss": 0.7049, "step": 24123 }, { "epoch": 0.7393649626088022, "grad_norm": 1.1462742719093455, "learning_rate": 1.678118746896637e-06, "loss": 0.5162, "step": 24124 }, { "epoch": 0.7393956111315435, "grad_norm": 1.1858212534613006, "learning_rate": 1.6777478161612781e-06, "loss": 0.637, "step": 24125 }, { "epoch": 0.7394262596542847, "grad_norm": 1.165201536784827, "learning_rate": 1.6773769181609201e-06, "loss": 0.4666, "step": 24126 }, { "epoch": 0.7394569081770259, "grad_norm": 0.4900047292995065, "learning_rate": 1.6770060528992194e-06, "loss": 0.4079, "step": 24127 }, { "epoch": 0.7394875566997671, "grad_norm": 1.5075600837360439, "learning_rate": 1.676635220379826e-06, "loss": 0.5444, "step": 24128 }, { "epoch": 0.7395182052225083, "grad_norm": 1.283338512206741, "learning_rate": 1.6762644206063967e-06, "loss": 0.6135, "step": 24129 }, { "epoch": 0.7395488537452495, "grad_norm": 1.190448627618246, "learning_rate": 1.6758936535825853e-06, "loss": 0.528, "step": 24130 }, { "epoch": 0.7395795022679906, "grad_norm": 0.4510696723553066, "learning_rate": 1.6755229193120437e-06, "loss": 0.4174, "step": 24131 }, { "epoch": 0.7396101507907319, "grad_norm": 0.43576499321408874, "learning_rate": 1.6751522177984264e-06, "loss": 0.3845, "step": 24132 }, { "epoch": 0.739640799313473, "grad_norm": 1.3853490547722793, "learning_rate": 1.6747815490453816e-06, "loss": 0.6343, "step": 24133 }, { "epoch": 0.7396714478362143, "grad_norm": 1.3626509275224343, "learning_rate": 1.6744109130565684e-06, "loss": 0.6749, "step": 24134 }, { "epoch": 0.7397020963589555, "grad_norm": 0.4637123014888773, "learning_rate": 1.6740403098356357e-06, "loss": 0.387, "step": 24135 }, { "epoch": 0.7397327448816967, "grad_norm": 1.5396906449624537, "learning_rate": 1.6736697393862328e-06, "loss": 0.6006, "step": 24136 }, { "epoch": 0.7397633934044379, "grad_norm": 1.4689572763255387, "learning_rate": 1.673299201712013e-06, "loss": 0.5927, "step": 24137 }, { "epoch": 0.7397940419271791, "grad_norm": 1.4541569957231302, "learning_rate": 1.6729286968166291e-06, "loss": 0.5928, "step": 24138 }, { "epoch": 0.7398246904499203, "grad_norm": 1.2309587170726084, "learning_rate": 1.672558224703728e-06, "loss": 0.6464, "step": 24139 }, { "epoch": 0.7398553389726615, "grad_norm": 1.5117449430756043, "learning_rate": 1.6721877853769624e-06, "loss": 0.6825, "step": 24140 }, { "epoch": 0.7398859874954027, "grad_norm": 1.1869234232811823, "learning_rate": 1.6718173788399822e-06, "loss": 0.515, "step": 24141 }, { "epoch": 0.739916636018144, "grad_norm": 1.6221035979313099, "learning_rate": 1.6714470050964387e-06, "loss": 0.659, "step": 24142 }, { "epoch": 0.7399472845408851, "grad_norm": 1.2848215167606685, "learning_rate": 1.6710766641499793e-06, "loss": 0.5905, "step": 24143 }, { "epoch": 0.7399779330636264, "grad_norm": 1.366509573457136, "learning_rate": 1.6707063560042497e-06, "loss": 0.565, "step": 24144 }, { "epoch": 0.7400085815863675, "grad_norm": 0.46345791241999496, "learning_rate": 1.6703360806629055e-06, "loss": 0.3999, "step": 24145 }, { "epoch": 0.7400392301091088, "grad_norm": 1.2885622851606962, "learning_rate": 1.6699658381295919e-06, "loss": 0.5584, "step": 24146 }, { "epoch": 0.7400698786318499, "grad_norm": 1.5475239240231162, "learning_rate": 1.6695956284079557e-06, "loss": 0.6327, "step": 24147 }, { "epoch": 0.7401005271545912, "grad_norm": 0.44124989003523357, "learning_rate": 1.6692254515016455e-06, "loss": 0.3955, "step": 24148 }, { "epoch": 0.7401311756773323, "grad_norm": 1.3031880178121056, "learning_rate": 1.668855307414311e-06, "loss": 0.5276, "step": 24149 }, { "epoch": 0.7401618242000736, "grad_norm": 1.26710352769583, "learning_rate": 1.6684851961495956e-06, "loss": 0.6004, "step": 24150 }, { "epoch": 0.7401924727228147, "grad_norm": 1.3884548537275283, "learning_rate": 1.6681151177111482e-06, "loss": 0.5719, "step": 24151 }, { "epoch": 0.740223121245556, "grad_norm": 1.3685736688988963, "learning_rate": 1.667745072102615e-06, "loss": 0.5956, "step": 24152 }, { "epoch": 0.7402537697682972, "grad_norm": 1.3774935432813336, "learning_rate": 1.6673750593276433e-06, "loss": 0.6124, "step": 24153 }, { "epoch": 0.7402844182910384, "grad_norm": 1.3207345991054251, "learning_rate": 1.6670050793898785e-06, "loss": 0.6069, "step": 24154 }, { "epoch": 0.7403150668137796, "grad_norm": 1.4009988171847894, "learning_rate": 1.6666351322929618e-06, "loss": 0.5761, "step": 24155 }, { "epoch": 0.7403457153365208, "grad_norm": 1.3544545357832893, "learning_rate": 1.6662652180405458e-06, "loss": 0.5507, "step": 24156 }, { "epoch": 0.740376363859262, "grad_norm": 1.3124020084341408, "learning_rate": 1.6658953366362713e-06, "loss": 0.6597, "step": 24157 }, { "epoch": 0.7404070123820032, "grad_norm": 1.2170152489409016, "learning_rate": 1.6655254880837812e-06, "loss": 0.6132, "step": 24158 }, { "epoch": 0.7404376609047444, "grad_norm": 1.3050719517258176, "learning_rate": 1.6651556723867219e-06, "loss": 0.5858, "step": 24159 }, { "epoch": 0.7404683094274856, "grad_norm": 1.5135443799579777, "learning_rate": 1.6647858895487368e-06, "loss": 0.5705, "step": 24160 }, { "epoch": 0.7404989579502268, "grad_norm": 1.5043454957439666, "learning_rate": 1.6644161395734715e-06, "loss": 0.6682, "step": 24161 }, { "epoch": 0.740529606472968, "grad_norm": 0.42989685665684074, "learning_rate": 1.6640464224645657e-06, "loss": 0.391, "step": 24162 }, { "epoch": 0.7405602549957092, "grad_norm": 1.3314666903284813, "learning_rate": 1.6636767382256641e-06, "loss": 0.6599, "step": 24163 }, { "epoch": 0.7405909035184504, "grad_norm": 1.5268513773937522, "learning_rate": 1.6633070868604107e-06, "loss": 0.5923, "step": 24164 }, { "epoch": 0.7406215520411916, "grad_norm": 0.4807516713481974, "learning_rate": 1.6629374683724465e-06, "loss": 0.4026, "step": 24165 }, { "epoch": 0.7406522005639328, "grad_norm": 0.4459235435814599, "learning_rate": 1.6625678827654102e-06, "loss": 0.3811, "step": 24166 }, { "epoch": 0.740682849086674, "grad_norm": 1.2129801246112382, "learning_rate": 1.6621983300429495e-06, "loss": 0.5617, "step": 24167 }, { "epoch": 0.7407134976094152, "grad_norm": 1.377004810037642, "learning_rate": 1.6618288102087026e-06, "loss": 0.5834, "step": 24168 }, { "epoch": 0.7407441461321564, "grad_norm": 1.2217764695814892, "learning_rate": 1.6614593232663089e-06, "loss": 0.533, "step": 24169 }, { "epoch": 0.7407747946548976, "grad_norm": 1.1625374814617035, "learning_rate": 1.6610898692194106e-06, "loss": 0.5219, "step": 24170 }, { "epoch": 0.7408054431776389, "grad_norm": 1.337237794812855, "learning_rate": 1.6607204480716483e-06, "loss": 0.6279, "step": 24171 }, { "epoch": 0.74083609170038, "grad_norm": 1.3795214823695738, "learning_rate": 1.6603510598266631e-06, "loss": 0.6454, "step": 24172 }, { "epoch": 0.7408667402231213, "grad_norm": 1.384515439452503, "learning_rate": 1.6599817044880923e-06, "loss": 0.7136, "step": 24173 }, { "epoch": 0.7408973887458624, "grad_norm": 1.225635360583068, "learning_rate": 1.659612382059576e-06, "loss": 0.5708, "step": 24174 }, { "epoch": 0.7409280372686037, "grad_norm": 1.4915616456133352, "learning_rate": 1.6592430925447557e-06, "loss": 0.6848, "step": 24175 }, { "epoch": 0.7409586857913448, "grad_norm": 1.4330885922265884, "learning_rate": 1.6588738359472672e-06, "loss": 0.7091, "step": 24176 }, { "epoch": 0.7409893343140861, "grad_norm": 1.4759368470596521, "learning_rate": 1.6585046122707489e-06, "loss": 0.6831, "step": 24177 }, { "epoch": 0.7410199828368272, "grad_norm": 0.4782499881408257, "learning_rate": 1.658135421518839e-06, "loss": 0.4012, "step": 24178 }, { "epoch": 0.7410506313595685, "grad_norm": 1.358532473852233, "learning_rate": 1.6577662636951758e-06, "loss": 0.6864, "step": 24179 }, { "epoch": 0.7410812798823097, "grad_norm": 1.3220980747374649, "learning_rate": 1.6573971388033989e-06, "loss": 0.7077, "step": 24180 }, { "epoch": 0.7411119284050509, "grad_norm": 0.4735289359462096, "learning_rate": 1.6570280468471412e-06, "loss": 0.3926, "step": 24181 }, { "epoch": 0.7411425769277921, "grad_norm": 1.2213141020654674, "learning_rate": 1.6566589878300416e-06, "loss": 0.5988, "step": 24182 }, { "epoch": 0.7411732254505333, "grad_norm": 1.3120314211700723, "learning_rate": 1.6562899617557377e-06, "loss": 0.7048, "step": 24183 }, { "epoch": 0.7412038739732745, "grad_norm": 1.5728828672537167, "learning_rate": 1.6559209686278648e-06, "loss": 0.67, "step": 24184 }, { "epoch": 0.7412345224960157, "grad_norm": 1.3834130520834542, "learning_rate": 1.655552008450055e-06, "loss": 0.4963, "step": 24185 }, { "epoch": 0.7412651710187569, "grad_norm": 0.46232157233459875, "learning_rate": 1.6551830812259494e-06, "loss": 0.3999, "step": 24186 }, { "epoch": 0.7412958195414981, "grad_norm": 1.2500137868429362, "learning_rate": 1.65481418695918e-06, "loss": 0.5535, "step": 24187 }, { "epoch": 0.7413264680642393, "grad_norm": 1.375495959534244, "learning_rate": 1.6544453256533838e-06, "loss": 0.6737, "step": 24188 }, { "epoch": 0.7413571165869806, "grad_norm": 1.2309695835951449, "learning_rate": 1.654076497312192e-06, "loss": 0.6032, "step": 24189 }, { "epoch": 0.7413877651097217, "grad_norm": 1.3497699138293733, "learning_rate": 1.6537077019392406e-06, "loss": 0.5296, "step": 24190 }, { "epoch": 0.741418413632463, "grad_norm": 1.3155480420723256, "learning_rate": 1.653338939538165e-06, "loss": 0.6576, "step": 24191 }, { "epoch": 0.7414490621552041, "grad_norm": 0.45362981306808237, "learning_rate": 1.6529702101125955e-06, "loss": 0.3997, "step": 24192 }, { "epoch": 0.7414797106779453, "grad_norm": 1.2986875785320635, "learning_rate": 1.6526015136661666e-06, "loss": 0.6484, "step": 24193 }, { "epoch": 0.7415103592006865, "grad_norm": 1.3458999551574173, "learning_rate": 1.6522328502025137e-06, "loss": 0.6844, "step": 24194 }, { "epoch": 0.7415410077234277, "grad_norm": 1.2850422947090345, "learning_rate": 1.6518642197252666e-06, "loss": 0.6396, "step": 24195 }, { "epoch": 0.7415716562461689, "grad_norm": 0.4469384103532511, "learning_rate": 1.6514956222380552e-06, "loss": 0.3997, "step": 24196 }, { "epoch": 0.7416023047689101, "grad_norm": 1.2934254987501113, "learning_rate": 1.6511270577445171e-06, "loss": 0.5734, "step": 24197 }, { "epoch": 0.7416329532916514, "grad_norm": 1.3678871284832803, "learning_rate": 1.650758526248279e-06, "loss": 0.5482, "step": 24198 }, { "epoch": 0.7416636018143925, "grad_norm": 1.3011455313750426, "learning_rate": 1.6503900277529761e-06, "loss": 0.5241, "step": 24199 }, { "epoch": 0.7416942503371338, "grad_norm": 1.4233406935759862, "learning_rate": 1.6500215622622356e-06, "loss": 0.6317, "step": 24200 }, { "epoch": 0.7417248988598749, "grad_norm": 1.459279283698539, "learning_rate": 1.6496531297796902e-06, "loss": 0.6206, "step": 24201 }, { "epoch": 0.7417555473826162, "grad_norm": 0.45821974334783006, "learning_rate": 1.649284730308971e-06, "loss": 0.3863, "step": 24202 }, { "epoch": 0.7417861959053573, "grad_norm": 1.3219399276945054, "learning_rate": 1.6489163638537048e-06, "loss": 0.6507, "step": 24203 }, { "epoch": 0.7418168444280986, "grad_norm": 1.2650795760392115, "learning_rate": 1.6485480304175232e-06, "loss": 0.5288, "step": 24204 }, { "epoch": 0.7418474929508397, "grad_norm": 1.34101880535762, "learning_rate": 1.648179730004057e-06, "loss": 0.6346, "step": 24205 }, { "epoch": 0.741878141473581, "grad_norm": 0.44833317933402445, "learning_rate": 1.6478114626169322e-06, "loss": 0.4007, "step": 24206 }, { "epoch": 0.7419087899963221, "grad_norm": 0.4535346894512283, "learning_rate": 1.6474432282597784e-06, "loss": 0.392, "step": 24207 }, { "epoch": 0.7419394385190634, "grad_norm": 1.30526668211594, "learning_rate": 1.6470750269362263e-06, "loss": 0.6712, "step": 24208 }, { "epoch": 0.7419700870418046, "grad_norm": 1.220830119291529, "learning_rate": 1.6467068586498997e-06, "loss": 0.5073, "step": 24209 }, { "epoch": 0.7420007355645458, "grad_norm": 1.3648945938566608, "learning_rate": 1.6463387234044303e-06, "loss": 0.6085, "step": 24210 }, { "epoch": 0.742031384087287, "grad_norm": 1.360221138124019, "learning_rate": 1.6459706212034421e-06, "loss": 0.6029, "step": 24211 }, { "epoch": 0.7420620326100282, "grad_norm": 1.3208595054646255, "learning_rate": 1.6456025520505631e-06, "loss": 0.569, "step": 24212 }, { "epoch": 0.7420926811327694, "grad_norm": 1.5168110453195471, "learning_rate": 1.6452345159494222e-06, "loss": 0.6647, "step": 24213 }, { "epoch": 0.7421233296555106, "grad_norm": 0.46640496590718294, "learning_rate": 1.6448665129036423e-06, "loss": 0.385, "step": 24214 }, { "epoch": 0.7421539781782518, "grad_norm": 1.309688344615832, "learning_rate": 1.6444985429168514e-06, "loss": 0.6356, "step": 24215 }, { "epoch": 0.742184626700993, "grad_norm": 0.4610221288794272, "learning_rate": 1.6441306059926765e-06, "loss": 0.4157, "step": 24216 }, { "epoch": 0.7422152752237342, "grad_norm": 1.1827866554217206, "learning_rate": 1.643762702134739e-06, "loss": 0.634, "step": 24217 }, { "epoch": 0.7422459237464755, "grad_norm": 0.4351688432642816, "learning_rate": 1.6433948313466675e-06, "loss": 0.3972, "step": 24218 }, { "epoch": 0.7422765722692166, "grad_norm": 1.293314405987369, "learning_rate": 1.6430269936320864e-06, "loss": 0.595, "step": 24219 }, { "epoch": 0.7423072207919579, "grad_norm": 1.4140853921437888, "learning_rate": 1.6426591889946176e-06, "loss": 0.6442, "step": 24220 }, { "epoch": 0.742337869314699, "grad_norm": 1.6610290237671463, "learning_rate": 1.642291417437889e-06, "loss": 0.7154, "step": 24221 }, { "epoch": 0.7423685178374403, "grad_norm": 1.3723708379863597, "learning_rate": 1.6419236789655202e-06, "loss": 0.5975, "step": 24222 }, { "epoch": 0.7423991663601814, "grad_norm": 1.3085784305147088, "learning_rate": 1.6415559735811365e-06, "loss": 0.618, "step": 24223 }, { "epoch": 0.7424298148829226, "grad_norm": 1.395621565165697, "learning_rate": 1.641188301288363e-06, "loss": 0.6458, "step": 24224 }, { "epoch": 0.7424604634056639, "grad_norm": 1.3463192016582881, "learning_rate": 1.6408206620908185e-06, "loss": 0.5049, "step": 24225 }, { "epoch": 0.742491111928405, "grad_norm": 1.2124267515980003, "learning_rate": 1.6404530559921279e-06, "loss": 0.5727, "step": 24226 }, { "epoch": 0.7425217604511463, "grad_norm": 1.4490792113970197, "learning_rate": 1.640085482995914e-06, "loss": 0.6389, "step": 24227 }, { "epoch": 0.7425524089738874, "grad_norm": 1.1272257665083139, "learning_rate": 1.6397179431057965e-06, "loss": 0.6405, "step": 24228 }, { "epoch": 0.7425830574966287, "grad_norm": 1.4014230754957546, "learning_rate": 1.6393504363253986e-06, "loss": 0.5985, "step": 24229 }, { "epoch": 0.7426137060193698, "grad_norm": 1.4885846391569335, "learning_rate": 1.6389829626583404e-06, "loss": 0.6066, "step": 24230 }, { "epoch": 0.7426443545421111, "grad_norm": 1.3574378042759203, "learning_rate": 1.6386155221082422e-06, "loss": 0.6245, "step": 24231 }, { "epoch": 0.7426750030648522, "grad_norm": 1.2843867401774063, "learning_rate": 1.6382481146787272e-06, "loss": 0.6882, "step": 24232 }, { "epoch": 0.7427056515875935, "grad_norm": 1.4177079191183983, "learning_rate": 1.6378807403734115e-06, "loss": 0.6971, "step": 24233 }, { "epoch": 0.7427363001103346, "grad_norm": 1.388190886112617, "learning_rate": 1.6375133991959174e-06, "loss": 0.6335, "step": 24234 }, { "epoch": 0.7427669486330759, "grad_norm": 1.2171076849082332, "learning_rate": 1.637146091149866e-06, "loss": 0.585, "step": 24235 }, { "epoch": 0.7427975971558171, "grad_norm": 1.241339036219041, "learning_rate": 1.6367788162388732e-06, "loss": 0.5319, "step": 24236 }, { "epoch": 0.7428282456785583, "grad_norm": 1.3202912372575062, "learning_rate": 1.6364115744665588e-06, "loss": 0.6317, "step": 24237 }, { "epoch": 0.7428588942012995, "grad_norm": 1.353973863039807, "learning_rate": 1.6360443658365433e-06, "loss": 0.6124, "step": 24238 }, { "epoch": 0.7428895427240407, "grad_norm": 0.4285844690812854, "learning_rate": 1.6356771903524416e-06, "loss": 0.3881, "step": 24239 }, { "epoch": 0.7429201912467819, "grad_norm": 0.4513392969676777, "learning_rate": 1.6353100480178756e-06, "loss": 0.3929, "step": 24240 }, { "epoch": 0.7429508397695231, "grad_norm": 1.251373243944695, "learning_rate": 1.6349429388364568e-06, "loss": 0.6134, "step": 24241 }, { "epoch": 0.7429814882922643, "grad_norm": 1.36501355922874, "learning_rate": 1.6345758628118096e-06, "loss": 0.6145, "step": 24242 }, { "epoch": 0.7430121368150056, "grad_norm": 1.2570277712521407, "learning_rate": 1.6342088199475475e-06, "loss": 0.6599, "step": 24243 }, { "epoch": 0.7430427853377467, "grad_norm": 1.0175570212574812, "learning_rate": 1.6338418102472857e-06, "loss": 0.5359, "step": 24244 }, { "epoch": 0.743073433860488, "grad_norm": 1.2594034499413642, "learning_rate": 1.6334748337146417e-06, "loss": 0.5847, "step": 24245 }, { "epoch": 0.7431040823832291, "grad_norm": 1.3621491560683223, "learning_rate": 1.6331078903532332e-06, "loss": 0.6367, "step": 24246 }, { "epoch": 0.7431347309059704, "grad_norm": 0.44203152616011016, "learning_rate": 1.6327409801666722e-06, "loss": 0.3751, "step": 24247 }, { "epoch": 0.7431653794287115, "grad_norm": 1.2333567766144364, "learning_rate": 1.632374103158576e-06, "loss": 0.5883, "step": 24248 }, { "epoch": 0.7431960279514528, "grad_norm": 1.4041688107081376, "learning_rate": 1.6320072593325608e-06, "loss": 0.6108, "step": 24249 }, { "epoch": 0.7432266764741939, "grad_norm": 1.6377711345645283, "learning_rate": 1.631640448692239e-06, "loss": 0.6255, "step": 24250 }, { "epoch": 0.7432573249969352, "grad_norm": 1.3914686337662416, "learning_rate": 1.6312736712412264e-06, "loss": 0.6281, "step": 24251 }, { "epoch": 0.7432879735196763, "grad_norm": 0.43379266987814097, "learning_rate": 1.6309069269831334e-06, "loss": 0.3866, "step": 24252 }, { "epoch": 0.7433186220424176, "grad_norm": 1.4412363055641269, "learning_rate": 1.6305402159215799e-06, "loss": 0.6393, "step": 24253 }, { "epoch": 0.7433492705651588, "grad_norm": 1.2164002467639996, "learning_rate": 1.6301735380601751e-06, "loss": 0.6572, "step": 24254 }, { "epoch": 0.7433799190878999, "grad_norm": 1.1489909159758391, "learning_rate": 1.6298068934025318e-06, "loss": 0.5061, "step": 24255 }, { "epoch": 0.7434105676106412, "grad_norm": 1.2200549706452164, "learning_rate": 1.6294402819522632e-06, "loss": 0.6066, "step": 24256 }, { "epoch": 0.7434412161333823, "grad_norm": 1.4639254261018677, "learning_rate": 1.6290737037129834e-06, "loss": 0.6368, "step": 24257 }, { "epoch": 0.7434718646561236, "grad_norm": 1.3128621105580398, "learning_rate": 1.6287071586883014e-06, "loss": 0.5706, "step": 24258 }, { "epoch": 0.7435025131788647, "grad_norm": 1.3292965904572065, "learning_rate": 1.6283406468818303e-06, "loss": 0.4959, "step": 24259 }, { "epoch": 0.743533161701606, "grad_norm": 1.4168774265143038, "learning_rate": 1.627974168297181e-06, "loss": 0.5658, "step": 24260 }, { "epoch": 0.7435638102243471, "grad_norm": 1.4197244744091817, "learning_rate": 1.6276077229379672e-06, "loss": 0.5649, "step": 24261 }, { "epoch": 0.7435944587470884, "grad_norm": 1.3536397968588714, "learning_rate": 1.6272413108077973e-06, "loss": 0.6179, "step": 24262 }, { "epoch": 0.7436251072698296, "grad_norm": 1.468288594442599, "learning_rate": 1.6268749319102784e-06, "loss": 0.7079, "step": 24263 }, { "epoch": 0.7436557557925708, "grad_norm": 0.445124715217308, "learning_rate": 1.626508586249027e-06, "loss": 0.4189, "step": 24264 }, { "epoch": 0.743686404315312, "grad_norm": 1.308250236979864, "learning_rate": 1.62614227382765e-06, "loss": 0.6734, "step": 24265 }, { "epoch": 0.7437170528380532, "grad_norm": 1.143476559708098, "learning_rate": 1.6257759946497542e-06, "loss": 0.4649, "step": 24266 }, { "epoch": 0.7437477013607944, "grad_norm": 1.0936066324540676, "learning_rate": 1.6254097487189513e-06, "loss": 0.6283, "step": 24267 }, { "epoch": 0.7437783498835356, "grad_norm": 0.4851774280581948, "learning_rate": 1.6250435360388494e-06, "loss": 0.3994, "step": 24268 }, { "epoch": 0.7438089984062768, "grad_norm": 1.1320644923928027, "learning_rate": 1.624677356613059e-06, "loss": 0.4985, "step": 24269 }, { "epoch": 0.743839646929018, "grad_norm": 1.2394635264005662, "learning_rate": 1.624311210445184e-06, "loss": 0.5187, "step": 24270 }, { "epoch": 0.7438702954517592, "grad_norm": 1.4892916485197278, "learning_rate": 1.623945097538835e-06, "loss": 0.7122, "step": 24271 }, { "epoch": 0.7439009439745005, "grad_norm": 1.4199512379194212, "learning_rate": 1.62357901789762e-06, "loss": 0.5348, "step": 24272 }, { "epoch": 0.7439315924972416, "grad_norm": 1.5312152396834289, "learning_rate": 1.6232129715251449e-06, "loss": 0.5656, "step": 24273 }, { "epoch": 0.7439622410199829, "grad_norm": 1.2185488483957465, "learning_rate": 1.6228469584250151e-06, "loss": 0.5841, "step": 24274 }, { "epoch": 0.743992889542724, "grad_norm": 1.3068849232864321, "learning_rate": 1.6224809786008377e-06, "loss": 0.5811, "step": 24275 }, { "epoch": 0.7440235380654653, "grad_norm": 1.3688687215969089, "learning_rate": 1.6221150320562212e-06, "loss": 0.5798, "step": 24276 }, { "epoch": 0.7440541865882064, "grad_norm": 1.2800023331900592, "learning_rate": 1.6217491187947682e-06, "loss": 0.5684, "step": 24277 }, { "epoch": 0.7440848351109477, "grad_norm": 1.2974621153821249, "learning_rate": 1.621383238820085e-06, "loss": 0.6248, "step": 24278 }, { "epoch": 0.7441154836336888, "grad_norm": 1.2125674631391765, "learning_rate": 1.6210173921357775e-06, "loss": 0.533, "step": 24279 }, { "epoch": 0.7441461321564301, "grad_norm": 1.2593470647369767, "learning_rate": 1.6206515787454518e-06, "loss": 0.6007, "step": 24280 }, { "epoch": 0.7441767806791713, "grad_norm": 1.200837835846641, "learning_rate": 1.620285798652711e-06, "loss": 0.5142, "step": 24281 }, { "epoch": 0.7442074292019125, "grad_norm": 1.3609069424019073, "learning_rate": 1.6199200518611553e-06, "loss": 0.5848, "step": 24282 }, { "epoch": 0.7442380777246537, "grad_norm": 1.3135330149187674, "learning_rate": 1.6195543383743956e-06, "loss": 0.5605, "step": 24283 }, { "epoch": 0.7442687262473949, "grad_norm": 1.4696549046674237, "learning_rate": 1.619188658196032e-06, "loss": 0.5721, "step": 24284 }, { "epoch": 0.7442993747701361, "grad_norm": 1.1989526954902345, "learning_rate": 1.618823011329666e-06, "loss": 0.4995, "step": 24285 }, { "epoch": 0.7443300232928772, "grad_norm": 1.387585584643163, "learning_rate": 1.6184573977789014e-06, "loss": 0.6016, "step": 24286 }, { "epoch": 0.7443606718156185, "grad_norm": 0.45588821689032166, "learning_rate": 1.618091817547342e-06, "loss": 0.3646, "step": 24287 }, { "epoch": 0.7443913203383596, "grad_norm": 1.3088301973136771, "learning_rate": 1.6177262706385904e-06, "loss": 0.6912, "step": 24288 }, { "epoch": 0.7444219688611009, "grad_norm": 1.2709375242706413, "learning_rate": 1.617360757056246e-06, "loss": 0.4744, "step": 24289 }, { "epoch": 0.744452617383842, "grad_norm": 1.3211808543982257, "learning_rate": 1.616995276803911e-06, "loss": 0.5925, "step": 24290 }, { "epoch": 0.7444832659065833, "grad_norm": 1.264623752934023, "learning_rate": 1.616629829885189e-06, "loss": 0.5524, "step": 24291 }, { "epoch": 0.7445139144293245, "grad_norm": 1.5578651359801707, "learning_rate": 1.6162644163036795e-06, "loss": 0.6652, "step": 24292 }, { "epoch": 0.7445445629520657, "grad_norm": 0.44940050941513643, "learning_rate": 1.6158990360629783e-06, "loss": 0.3906, "step": 24293 }, { "epoch": 0.7445752114748069, "grad_norm": 1.4058147169735344, "learning_rate": 1.6155336891666935e-06, "loss": 0.5672, "step": 24294 }, { "epoch": 0.7446058599975481, "grad_norm": 1.3841202492413929, "learning_rate": 1.6151683756184193e-06, "loss": 0.7083, "step": 24295 }, { "epoch": 0.7446365085202893, "grad_norm": 0.4406082146465688, "learning_rate": 1.6148030954217592e-06, "loss": 0.3871, "step": 24296 }, { "epoch": 0.7446671570430305, "grad_norm": 0.44469177963589984, "learning_rate": 1.6144378485803086e-06, "loss": 0.3914, "step": 24297 }, { "epoch": 0.7446978055657717, "grad_norm": 1.27977894027731, "learning_rate": 1.6140726350976683e-06, "loss": 0.5984, "step": 24298 }, { "epoch": 0.744728454088513, "grad_norm": 1.4768029100207243, "learning_rate": 1.613707454977438e-06, "loss": 0.6377, "step": 24299 }, { "epoch": 0.7447591026112541, "grad_norm": 1.312439167514906, "learning_rate": 1.6133423082232131e-06, "loss": 0.668, "step": 24300 }, { "epoch": 0.7447897511339954, "grad_norm": 0.4258504738294503, "learning_rate": 1.6129771948385926e-06, "loss": 0.3863, "step": 24301 }, { "epoch": 0.7448203996567365, "grad_norm": 1.4150600030446758, "learning_rate": 1.612612114827176e-06, "loss": 0.6128, "step": 24302 }, { "epoch": 0.7448510481794778, "grad_norm": 1.3698458882771878, "learning_rate": 1.6122470681925594e-06, "loss": 0.6546, "step": 24303 }, { "epoch": 0.7448816967022189, "grad_norm": 1.3128578931134502, "learning_rate": 1.6118820549383358e-06, "loss": 0.6487, "step": 24304 }, { "epoch": 0.7449123452249602, "grad_norm": 1.2680099613055316, "learning_rate": 1.611517075068108e-06, "loss": 0.5798, "step": 24305 }, { "epoch": 0.7449429937477013, "grad_norm": 1.411955572746987, "learning_rate": 1.6111521285854687e-06, "loss": 0.603, "step": 24306 }, { "epoch": 0.7449736422704426, "grad_norm": 0.42493423825802673, "learning_rate": 1.6107872154940152e-06, "loss": 0.4057, "step": 24307 }, { "epoch": 0.7450042907931838, "grad_norm": 1.476083227957759, "learning_rate": 1.6104223357973414e-06, "loss": 0.6515, "step": 24308 }, { "epoch": 0.745034939315925, "grad_norm": 1.3804355937971127, "learning_rate": 1.6100574894990433e-06, "loss": 0.6112, "step": 24309 }, { "epoch": 0.7450655878386662, "grad_norm": 1.4183399803628356, "learning_rate": 1.6096926766027183e-06, "loss": 0.589, "step": 24310 }, { "epoch": 0.7450962363614074, "grad_norm": 1.1617011326849087, "learning_rate": 1.6093278971119569e-06, "loss": 0.5163, "step": 24311 }, { "epoch": 0.7451268848841486, "grad_norm": 1.1989041567072205, "learning_rate": 1.608963151030355e-06, "loss": 0.5451, "step": 24312 }, { "epoch": 0.7451575334068898, "grad_norm": 1.4579518276989367, "learning_rate": 1.6085984383615084e-06, "loss": 0.6, "step": 24313 }, { "epoch": 0.745188181929631, "grad_norm": 1.4354006045285541, "learning_rate": 1.608233759109008e-06, "loss": 0.6347, "step": 24314 }, { "epoch": 0.7452188304523722, "grad_norm": 1.1472826113113046, "learning_rate": 1.6078691132764478e-06, "loss": 0.5782, "step": 24315 }, { "epoch": 0.7452494789751134, "grad_norm": 1.3498220933687861, "learning_rate": 1.6075045008674228e-06, "loss": 0.678, "step": 24316 }, { "epoch": 0.7452801274978545, "grad_norm": 1.3801397355764042, "learning_rate": 1.6071399218855222e-06, "loss": 0.5401, "step": 24317 }, { "epoch": 0.7453107760205958, "grad_norm": 1.4142354878170535, "learning_rate": 1.606775376334342e-06, "loss": 0.712, "step": 24318 }, { "epoch": 0.745341424543337, "grad_norm": 1.2463216602578921, "learning_rate": 1.6064108642174702e-06, "loss": 0.5748, "step": 24319 }, { "epoch": 0.7453720730660782, "grad_norm": 0.4941682170583755, "learning_rate": 1.6060463855385005e-06, "loss": 0.3939, "step": 24320 }, { "epoch": 0.7454027215888194, "grad_norm": 1.2031044316535644, "learning_rate": 1.6056819403010265e-06, "loss": 0.6177, "step": 24321 }, { "epoch": 0.7454333701115606, "grad_norm": 1.3850209307475543, "learning_rate": 1.6053175285086341e-06, "loss": 0.6159, "step": 24322 }, { "epoch": 0.7454640186343018, "grad_norm": 1.3736965997794242, "learning_rate": 1.6049531501649173e-06, "loss": 0.6259, "step": 24323 }, { "epoch": 0.745494667157043, "grad_norm": 1.4120273502105622, "learning_rate": 1.6045888052734676e-06, "loss": 0.6385, "step": 24324 }, { "epoch": 0.7455253156797842, "grad_norm": 1.36834377138138, "learning_rate": 1.6042244938378709e-06, "loss": 0.6708, "step": 24325 }, { "epoch": 0.7455559642025255, "grad_norm": 1.4577426053666303, "learning_rate": 1.6038602158617211e-06, "loss": 0.659, "step": 24326 }, { "epoch": 0.7455866127252666, "grad_norm": 1.5088209849283336, "learning_rate": 1.6034959713486043e-06, "loss": 0.6844, "step": 24327 }, { "epoch": 0.7456172612480079, "grad_norm": 1.3646607336063061, "learning_rate": 1.6031317603021101e-06, "loss": 0.5663, "step": 24328 }, { "epoch": 0.745647909770749, "grad_norm": 1.3878122368683619, "learning_rate": 1.6027675827258294e-06, "loss": 0.5919, "step": 24329 }, { "epoch": 0.7456785582934903, "grad_norm": 1.5075223938740656, "learning_rate": 1.6024034386233477e-06, "loss": 0.6823, "step": 24330 }, { "epoch": 0.7457092068162314, "grad_norm": 1.3768889665888326, "learning_rate": 1.6020393279982539e-06, "loss": 0.5729, "step": 24331 }, { "epoch": 0.7457398553389727, "grad_norm": 1.2215961924729009, "learning_rate": 1.6016752508541377e-06, "loss": 0.6258, "step": 24332 }, { "epoch": 0.7457705038617138, "grad_norm": 1.2280190636592085, "learning_rate": 1.6013112071945835e-06, "loss": 0.6409, "step": 24333 }, { "epoch": 0.7458011523844551, "grad_norm": 1.6049095250803513, "learning_rate": 1.6009471970231793e-06, "loss": 0.5707, "step": 24334 }, { "epoch": 0.7458318009071963, "grad_norm": 1.4536297426577143, "learning_rate": 1.6005832203435135e-06, "loss": 0.6496, "step": 24335 }, { "epoch": 0.7458624494299375, "grad_norm": 1.3698078015215318, "learning_rate": 1.6002192771591697e-06, "loss": 0.6402, "step": 24336 }, { "epoch": 0.7458930979526787, "grad_norm": 0.43017984887043287, "learning_rate": 1.5998553674737365e-06, "loss": 0.3936, "step": 24337 }, { "epoch": 0.7459237464754199, "grad_norm": 1.2832953132054192, "learning_rate": 1.5994914912907973e-06, "loss": 0.6065, "step": 24338 }, { "epoch": 0.7459543949981611, "grad_norm": 1.2584245273300059, "learning_rate": 1.599127648613938e-06, "loss": 0.5255, "step": 24339 }, { "epoch": 0.7459850435209023, "grad_norm": 1.4689053295436512, "learning_rate": 1.5987638394467454e-06, "loss": 0.646, "step": 24340 }, { "epoch": 0.7460156920436435, "grad_norm": 1.091602241644695, "learning_rate": 1.598400063792802e-06, "loss": 0.5446, "step": 24341 }, { "epoch": 0.7460463405663847, "grad_norm": 1.2855536077722123, "learning_rate": 1.5980363216556926e-06, "loss": 0.6161, "step": 24342 }, { "epoch": 0.7460769890891259, "grad_norm": 1.2056393071120992, "learning_rate": 1.5976726130390036e-06, "loss": 0.5341, "step": 24343 }, { "epoch": 0.7461076376118672, "grad_norm": 1.2390089843533463, "learning_rate": 1.5973089379463152e-06, "loss": 0.6153, "step": 24344 }, { "epoch": 0.7461382861346083, "grad_norm": 1.3100246984943889, "learning_rate": 1.5969452963812126e-06, "loss": 0.6041, "step": 24345 }, { "epoch": 0.7461689346573496, "grad_norm": 1.345210424667892, "learning_rate": 1.5965816883472807e-06, "loss": 0.6094, "step": 24346 }, { "epoch": 0.7461995831800907, "grad_norm": 1.289928983739, "learning_rate": 1.5962181138480981e-06, "loss": 0.6019, "step": 24347 }, { "epoch": 0.7462302317028319, "grad_norm": 1.2799506100943356, "learning_rate": 1.595854572887251e-06, "loss": 0.5478, "step": 24348 }, { "epoch": 0.7462608802255731, "grad_norm": 1.4208176071282999, "learning_rate": 1.595491065468318e-06, "loss": 0.5812, "step": 24349 }, { "epoch": 0.7462915287483143, "grad_norm": 1.246576329996, "learning_rate": 1.5951275915948827e-06, "loss": 0.5477, "step": 24350 }, { "epoch": 0.7463221772710555, "grad_norm": 1.3661982390011402, "learning_rate": 1.5947641512705282e-06, "loss": 0.628, "step": 24351 }, { "epoch": 0.7463528257937967, "grad_norm": 0.45143281208761044, "learning_rate": 1.5944007444988318e-06, "loss": 0.3949, "step": 24352 }, { "epoch": 0.746383474316538, "grad_norm": 1.3722920638267169, "learning_rate": 1.5940373712833768e-06, "loss": 0.6586, "step": 24353 }, { "epoch": 0.7464141228392791, "grad_norm": 1.2888771834864658, "learning_rate": 1.5936740316277444e-06, "loss": 0.6577, "step": 24354 }, { "epoch": 0.7464447713620204, "grad_norm": 1.3245241647217714, "learning_rate": 1.5933107255355113e-06, "loss": 0.5505, "step": 24355 }, { "epoch": 0.7464754198847615, "grad_norm": 1.3130794420823837, "learning_rate": 1.5929474530102596e-06, "loss": 0.6739, "step": 24356 }, { "epoch": 0.7465060684075028, "grad_norm": 1.180308190833931, "learning_rate": 1.5925842140555704e-06, "loss": 0.5324, "step": 24357 }, { "epoch": 0.7465367169302439, "grad_norm": 1.3328041029847122, "learning_rate": 1.5922210086750183e-06, "loss": 0.578, "step": 24358 }, { "epoch": 0.7465673654529852, "grad_norm": 1.2701721439370677, "learning_rate": 1.5918578368721865e-06, "loss": 0.5033, "step": 24359 }, { "epoch": 0.7465980139757263, "grad_norm": 1.3845348385213259, "learning_rate": 1.5914946986506502e-06, "loss": 0.6953, "step": 24360 }, { "epoch": 0.7466286624984676, "grad_norm": 1.2654807433862656, "learning_rate": 1.5911315940139883e-06, "loss": 0.5939, "step": 24361 }, { "epoch": 0.7466593110212087, "grad_norm": 1.2300854352153325, "learning_rate": 1.590768522965781e-06, "loss": 0.5825, "step": 24362 }, { "epoch": 0.74668995954395, "grad_norm": 1.3197672785185557, "learning_rate": 1.5904054855096019e-06, "loss": 0.6422, "step": 24363 }, { "epoch": 0.7467206080666912, "grad_norm": 1.3692973608140788, "learning_rate": 1.5900424816490295e-06, "loss": 0.7126, "step": 24364 }, { "epoch": 0.7467512565894324, "grad_norm": 1.2350082745437565, "learning_rate": 1.5896795113876435e-06, "loss": 0.55, "step": 24365 }, { "epoch": 0.7467819051121736, "grad_norm": 1.3226422635145225, "learning_rate": 1.5893165747290156e-06, "loss": 0.5063, "step": 24366 }, { "epoch": 0.7468125536349148, "grad_norm": 1.3357046250877194, "learning_rate": 1.5889536716767246e-06, "loss": 0.6865, "step": 24367 }, { "epoch": 0.746843202157656, "grad_norm": 1.1647460836133312, "learning_rate": 1.5885908022343454e-06, "loss": 0.5572, "step": 24368 }, { "epoch": 0.7468738506803972, "grad_norm": 1.2861722619951346, "learning_rate": 1.5882279664054557e-06, "loss": 0.5695, "step": 24369 }, { "epoch": 0.7469044992031384, "grad_norm": 1.3287808638024263, "learning_rate": 1.5878651641936283e-06, "loss": 0.6506, "step": 24370 }, { "epoch": 0.7469351477258797, "grad_norm": 1.3102389758816846, "learning_rate": 1.5875023956024377e-06, "loss": 0.5086, "step": 24371 }, { "epoch": 0.7469657962486208, "grad_norm": 1.3428071949968499, "learning_rate": 1.5871396606354584e-06, "loss": 0.5398, "step": 24372 }, { "epoch": 0.7469964447713621, "grad_norm": 0.45464794945839776, "learning_rate": 1.5867769592962673e-06, "loss": 0.4004, "step": 24373 }, { "epoch": 0.7470270932941032, "grad_norm": 0.4645646648900408, "learning_rate": 1.586414291588434e-06, "loss": 0.4025, "step": 24374 }, { "epoch": 0.7470577418168445, "grad_norm": 1.2821254881729531, "learning_rate": 1.586051657515535e-06, "loss": 0.6293, "step": 24375 }, { "epoch": 0.7470883903395856, "grad_norm": 1.3565745372653102, "learning_rate": 1.5856890570811433e-06, "loss": 0.6567, "step": 24376 }, { "epoch": 0.7471190388623269, "grad_norm": 1.3565069093757047, "learning_rate": 1.5853264902888294e-06, "loss": 0.5829, "step": 24377 }, { "epoch": 0.747149687385068, "grad_norm": 1.210488647365366, "learning_rate": 1.5849639571421693e-06, "loss": 0.6385, "step": 24378 }, { "epoch": 0.7471803359078092, "grad_norm": 1.186025539477348, "learning_rate": 1.5846014576447294e-06, "loss": 0.5755, "step": 24379 }, { "epoch": 0.7472109844305505, "grad_norm": 0.4603586525880195, "learning_rate": 1.5842389918000888e-06, "loss": 0.4051, "step": 24380 }, { "epoch": 0.7472416329532916, "grad_norm": 1.415140410244018, "learning_rate": 1.583876559611815e-06, "loss": 0.5895, "step": 24381 }, { "epoch": 0.7472722814760329, "grad_norm": 0.444365445313553, "learning_rate": 1.583514161083478e-06, "loss": 0.3818, "step": 24382 }, { "epoch": 0.747302929998774, "grad_norm": 1.447065196734294, "learning_rate": 1.5831517962186505e-06, "loss": 0.6258, "step": 24383 }, { "epoch": 0.7473335785215153, "grad_norm": 1.2902927346289603, "learning_rate": 1.582789465020904e-06, "loss": 0.635, "step": 24384 }, { "epoch": 0.7473642270442564, "grad_norm": 1.2754924866697193, "learning_rate": 1.5824271674938057e-06, "loss": 0.545, "step": 24385 }, { "epoch": 0.7473948755669977, "grad_norm": 1.519403469226705, "learning_rate": 1.5820649036409269e-06, "loss": 0.6688, "step": 24386 }, { "epoch": 0.7474255240897388, "grad_norm": 1.3711921780863507, "learning_rate": 1.5817026734658369e-06, "loss": 0.5604, "step": 24387 }, { "epoch": 0.7474561726124801, "grad_norm": 1.1973073105990584, "learning_rate": 1.581340476972107e-06, "loss": 0.6349, "step": 24388 }, { "epoch": 0.7474868211352212, "grad_norm": 1.2669140334280649, "learning_rate": 1.580978314163304e-06, "loss": 0.6613, "step": 24389 }, { "epoch": 0.7475174696579625, "grad_norm": 1.4036691223808389, "learning_rate": 1.580616185042993e-06, "loss": 0.613, "step": 24390 }, { "epoch": 0.7475481181807037, "grad_norm": 1.3007710732013793, "learning_rate": 1.580254089614749e-06, "loss": 0.5252, "step": 24391 }, { "epoch": 0.7475787667034449, "grad_norm": 1.1735733909013046, "learning_rate": 1.5798920278821362e-06, "loss": 0.6115, "step": 24392 }, { "epoch": 0.7476094152261861, "grad_norm": 1.4415601696276827, "learning_rate": 1.5795299998487212e-06, "loss": 0.6483, "step": 24393 }, { "epoch": 0.7476400637489273, "grad_norm": 0.46085723036959436, "learning_rate": 1.579168005518072e-06, "loss": 0.3957, "step": 24394 }, { "epoch": 0.7476707122716685, "grad_norm": 1.427419194496152, "learning_rate": 1.5788060448937554e-06, "loss": 0.662, "step": 24395 }, { "epoch": 0.7477013607944097, "grad_norm": 1.5111117744701172, "learning_rate": 1.5784441179793402e-06, "loss": 0.6242, "step": 24396 }, { "epoch": 0.7477320093171509, "grad_norm": 1.387109447317295, "learning_rate": 1.5780822247783888e-06, "loss": 0.6917, "step": 24397 }, { "epoch": 0.7477626578398922, "grad_norm": 1.5577854243766474, "learning_rate": 1.577720365294469e-06, "loss": 0.6217, "step": 24398 }, { "epoch": 0.7477933063626333, "grad_norm": 1.1894138422351512, "learning_rate": 1.5773585395311474e-06, "loss": 0.5474, "step": 24399 }, { "epoch": 0.7478239548853746, "grad_norm": 0.442811812729214, "learning_rate": 1.576996747491988e-06, "loss": 0.401, "step": 24400 }, { "epoch": 0.7478546034081157, "grad_norm": 1.1663768066383757, "learning_rate": 1.5766349891805521e-06, "loss": 0.5458, "step": 24401 }, { "epoch": 0.747885251930857, "grad_norm": 1.4031591994447334, "learning_rate": 1.5762732646004109e-06, "loss": 0.5649, "step": 24402 }, { "epoch": 0.7479159004535981, "grad_norm": 1.2444714166172604, "learning_rate": 1.575911573755125e-06, "loss": 0.5151, "step": 24403 }, { "epoch": 0.7479465489763394, "grad_norm": 0.42334418644651883, "learning_rate": 1.5755499166482568e-06, "loss": 0.3922, "step": 24404 }, { "epoch": 0.7479771974990805, "grad_norm": 1.1579807068336008, "learning_rate": 1.5751882932833717e-06, "loss": 0.6573, "step": 24405 }, { "epoch": 0.7480078460218218, "grad_norm": 1.195123609795922, "learning_rate": 1.574826703664033e-06, "loss": 0.5773, "step": 24406 }, { "epoch": 0.748038494544563, "grad_norm": 1.2814273088365475, "learning_rate": 1.574465147793804e-06, "loss": 0.5469, "step": 24407 }, { "epoch": 0.7480691430673042, "grad_norm": 0.4523966631524858, "learning_rate": 1.5741036256762455e-06, "loss": 0.4026, "step": 24408 }, { "epoch": 0.7480997915900454, "grad_norm": 1.4841331754641531, "learning_rate": 1.5737421373149198e-06, "loss": 0.6456, "step": 24409 }, { "epoch": 0.7481304401127865, "grad_norm": 1.3530855292492219, "learning_rate": 1.5733806827133913e-06, "loss": 0.6651, "step": 24410 }, { "epoch": 0.7481610886355278, "grad_norm": 1.4764895781819787, "learning_rate": 1.57301926187522e-06, "loss": 0.6348, "step": 24411 }, { "epoch": 0.7481917371582689, "grad_norm": 0.47692151440776465, "learning_rate": 1.572657874803965e-06, "loss": 0.4192, "step": 24412 }, { "epoch": 0.7482223856810102, "grad_norm": 0.45572460786035457, "learning_rate": 1.5722965215031888e-06, "loss": 0.4115, "step": 24413 }, { "epoch": 0.7482530342037513, "grad_norm": 1.289267352339736, "learning_rate": 1.5719352019764516e-06, "loss": 0.6024, "step": 24414 }, { "epoch": 0.7482836827264926, "grad_norm": 1.2652547666141132, "learning_rate": 1.5715739162273163e-06, "loss": 0.5762, "step": 24415 }, { "epoch": 0.7483143312492337, "grad_norm": 1.2383809357363837, "learning_rate": 1.5712126642593385e-06, "loss": 0.6005, "step": 24416 }, { "epoch": 0.748344979771975, "grad_norm": 1.3959317433127267, "learning_rate": 1.5708514460760794e-06, "loss": 0.644, "step": 24417 }, { "epoch": 0.7483756282947162, "grad_norm": 1.3076017507599313, "learning_rate": 1.5704902616811002e-06, "loss": 0.6064, "step": 24418 }, { "epoch": 0.7484062768174574, "grad_norm": 1.4494605705555186, "learning_rate": 1.5701291110779565e-06, "loss": 0.6464, "step": 24419 }, { "epoch": 0.7484369253401986, "grad_norm": 0.4761494284943721, "learning_rate": 1.5697679942702077e-06, "loss": 0.4108, "step": 24420 }, { "epoch": 0.7484675738629398, "grad_norm": 1.3060371347801774, "learning_rate": 1.5694069112614146e-06, "loss": 0.6489, "step": 24421 }, { "epoch": 0.748498222385681, "grad_norm": 1.3037771616617773, "learning_rate": 1.5690458620551313e-06, "loss": 0.575, "step": 24422 }, { "epoch": 0.7485288709084222, "grad_norm": 1.3211619262979832, "learning_rate": 1.5686848466549182e-06, "loss": 0.6016, "step": 24423 }, { "epoch": 0.7485595194311634, "grad_norm": 0.45318014739048856, "learning_rate": 1.5683238650643302e-06, "loss": 0.4047, "step": 24424 }, { "epoch": 0.7485901679539047, "grad_norm": 1.2862101355954865, "learning_rate": 1.567962917286925e-06, "loss": 0.5613, "step": 24425 }, { "epoch": 0.7486208164766458, "grad_norm": 1.1820242998135126, "learning_rate": 1.5676020033262606e-06, "loss": 0.5875, "step": 24426 }, { "epoch": 0.7486514649993871, "grad_norm": 1.3625837053898657, "learning_rate": 1.5672411231858903e-06, "loss": 0.6381, "step": 24427 }, { "epoch": 0.7486821135221282, "grad_norm": 1.2730414435369846, "learning_rate": 1.566880276869372e-06, "loss": 0.6635, "step": 24428 }, { "epoch": 0.7487127620448695, "grad_norm": 1.3507329602196958, "learning_rate": 1.5665194643802617e-06, "loss": 0.6598, "step": 24429 }, { "epoch": 0.7487434105676106, "grad_norm": 1.3609820871844616, "learning_rate": 1.5661586857221139e-06, "loss": 0.5625, "step": 24430 }, { "epoch": 0.7487740590903519, "grad_norm": 1.3280394429618096, "learning_rate": 1.5657979408984803e-06, "loss": 0.6325, "step": 24431 }, { "epoch": 0.748804707613093, "grad_norm": 1.5130181448351576, "learning_rate": 1.5654372299129212e-06, "loss": 0.6017, "step": 24432 }, { "epoch": 0.7488353561358343, "grad_norm": 1.2467019429153132, "learning_rate": 1.565076552768986e-06, "loss": 0.5948, "step": 24433 }, { "epoch": 0.7488660046585754, "grad_norm": 1.3608455917861482, "learning_rate": 1.5647159094702325e-06, "loss": 0.6427, "step": 24434 }, { "epoch": 0.7488966531813167, "grad_norm": 1.1545483753785801, "learning_rate": 1.56435530002021e-06, "loss": 0.5313, "step": 24435 }, { "epoch": 0.7489273017040579, "grad_norm": 1.3602648372233954, "learning_rate": 1.563994724422474e-06, "loss": 0.5868, "step": 24436 }, { "epoch": 0.7489579502267991, "grad_norm": 1.3350068502939294, "learning_rate": 1.5636341826805783e-06, "loss": 0.6578, "step": 24437 }, { "epoch": 0.7489885987495403, "grad_norm": 1.2923520145925629, "learning_rate": 1.563273674798073e-06, "loss": 0.6033, "step": 24438 }, { "epoch": 0.7490192472722815, "grad_norm": 1.4068208575515813, "learning_rate": 1.562913200778512e-06, "loss": 0.5584, "step": 24439 }, { "epoch": 0.7490498957950227, "grad_norm": 1.3410061503454693, "learning_rate": 1.5625527606254477e-06, "loss": 0.6039, "step": 24440 }, { "epoch": 0.7490805443177638, "grad_norm": 1.2987227566686583, "learning_rate": 1.5621923543424288e-06, "loss": 0.5679, "step": 24441 }, { "epoch": 0.7491111928405051, "grad_norm": 1.4086452246196737, "learning_rate": 1.5618319819330086e-06, "loss": 0.6001, "step": 24442 }, { "epoch": 0.7491418413632462, "grad_norm": 1.6338906910365583, "learning_rate": 1.5614716434007393e-06, "loss": 0.6179, "step": 24443 }, { "epoch": 0.7491724898859875, "grad_norm": 1.6822167811679802, "learning_rate": 1.5611113387491678e-06, "loss": 0.655, "step": 24444 }, { "epoch": 0.7492031384087287, "grad_norm": 1.1699523125548237, "learning_rate": 1.5607510679818478e-06, "loss": 0.536, "step": 24445 }, { "epoch": 0.7492337869314699, "grad_norm": 1.2838984936808249, "learning_rate": 1.5603908311023258e-06, "loss": 0.5956, "step": 24446 }, { "epoch": 0.7492644354542111, "grad_norm": 1.7268428861940721, "learning_rate": 1.560030628114153e-06, "loss": 0.6644, "step": 24447 }, { "epoch": 0.7492950839769523, "grad_norm": 1.3122475436801406, "learning_rate": 1.5596704590208806e-06, "loss": 0.635, "step": 24448 }, { "epoch": 0.7493257324996935, "grad_norm": 1.331123637213362, "learning_rate": 1.5593103238260531e-06, "loss": 0.6662, "step": 24449 }, { "epoch": 0.7493563810224347, "grad_norm": 1.3219797491417729, "learning_rate": 1.558950222533222e-06, "loss": 0.7067, "step": 24450 }, { "epoch": 0.7493870295451759, "grad_norm": 1.4734449021046494, "learning_rate": 1.558590155145936e-06, "loss": 0.6106, "step": 24451 }, { "epoch": 0.7494176780679171, "grad_norm": 0.46187453594009753, "learning_rate": 1.5582301216677399e-06, "loss": 0.3881, "step": 24452 }, { "epoch": 0.7494483265906583, "grad_norm": 1.2651670736645866, "learning_rate": 1.5578701221021835e-06, "loss": 0.5458, "step": 24453 }, { "epoch": 0.7494789751133996, "grad_norm": 1.3767317961277417, "learning_rate": 1.557510156452815e-06, "loss": 0.5376, "step": 24454 }, { "epoch": 0.7495096236361407, "grad_norm": 1.379592955687046, "learning_rate": 1.5571502247231778e-06, "loss": 0.4828, "step": 24455 }, { "epoch": 0.749540272158882, "grad_norm": 1.5789710377867754, "learning_rate": 1.5567903269168222e-06, "loss": 0.6223, "step": 24456 }, { "epoch": 0.7495709206816231, "grad_norm": 1.3990031718513884, "learning_rate": 1.5564304630372912e-06, "loss": 0.7096, "step": 24457 }, { "epoch": 0.7496015692043644, "grad_norm": 1.3946196643772568, "learning_rate": 1.5560706330881313e-06, "loss": 0.5988, "step": 24458 }, { "epoch": 0.7496322177271055, "grad_norm": 1.1767463616849565, "learning_rate": 1.5557108370728908e-06, "loss": 0.6209, "step": 24459 }, { "epoch": 0.7496628662498468, "grad_norm": 1.4710766953484362, "learning_rate": 1.5553510749951106e-06, "loss": 0.607, "step": 24460 }, { "epoch": 0.7496935147725879, "grad_norm": 1.1566347664802856, "learning_rate": 1.5549913468583378e-06, "loss": 0.6067, "step": 24461 }, { "epoch": 0.7497241632953292, "grad_norm": 1.565446117547938, "learning_rate": 1.554631652666118e-06, "loss": 0.654, "step": 24462 }, { "epoch": 0.7497548118180704, "grad_norm": 1.493558926410964, "learning_rate": 1.5542719924219928e-06, "loss": 0.6935, "step": 24463 }, { "epoch": 0.7497854603408116, "grad_norm": 1.4465974377885755, "learning_rate": 1.553912366129509e-06, "loss": 0.6296, "step": 24464 }, { "epoch": 0.7498161088635528, "grad_norm": 1.3546946068233103, "learning_rate": 1.5535527737922068e-06, "loss": 0.5957, "step": 24465 }, { "epoch": 0.749846757386294, "grad_norm": 1.2973798417826414, "learning_rate": 1.5531932154136308e-06, "loss": 0.6555, "step": 24466 }, { "epoch": 0.7498774059090352, "grad_norm": 1.1351382128690366, "learning_rate": 1.5528336909973258e-06, "loss": 0.597, "step": 24467 }, { "epoch": 0.7499080544317764, "grad_norm": 1.5448730214198894, "learning_rate": 1.5524742005468302e-06, "loss": 0.7551, "step": 24468 }, { "epoch": 0.7499387029545176, "grad_norm": 1.2111037645722382, "learning_rate": 1.5521147440656887e-06, "loss": 0.6063, "step": 24469 }, { "epoch": 0.7499693514772588, "grad_norm": 1.3759355881055697, "learning_rate": 1.5517553215574444e-06, "loss": 0.5981, "step": 24470 }, { "epoch": 0.75, "grad_norm": 1.1569231719047097, "learning_rate": 1.5513959330256357e-06, "loss": 0.5768, "step": 24471 }, { "epoch": 0.7500306485227412, "grad_norm": 1.5527909738272625, "learning_rate": 1.5510365784738052e-06, "loss": 0.6116, "step": 24472 }, { "epoch": 0.7500612970454824, "grad_norm": 1.5080813510407232, "learning_rate": 1.5506772579054952e-06, "loss": 0.648, "step": 24473 }, { "epoch": 0.7500919455682236, "grad_norm": 0.44388697200459004, "learning_rate": 1.5503179713242432e-06, "loss": 0.3843, "step": 24474 }, { "epoch": 0.7501225940909648, "grad_norm": 1.4311637767560728, "learning_rate": 1.549958718733593e-06, "loss": 0.5959, "step": 24475 }, { "epoch": 0.750153242613706, "grad_norm": 1.3109711832159894, "learning_rate": 1.5495995001370784e-06, "loss": 0.5766, "step": 24476 }, { "epoch": 0.7501838911364472, "grad_norm": 1.3981437910478887, "learning_rate": 1.5492403155382462e-06, "loss": 0.5936, "step": 24477 }, { "epoch": 0.7502145396591884, "grad_norm": 1.3162821395656867, "learning_rate": 1.5488811649406322e-06, "loss": 0.686, "step": 24478 }, { "epoch": 0.7502451881819296, "grad_norm": 1.2387917926257808, "learning_rate": 1.5485220483477731e-06, "loss": 0.6751, "step": 24479 }, { "epoch": 0.7502758367046708, "grad_norm": 1.3433394104771907, "learning_rate": 1.5481629657632096e-06, "loss": 0.7151, "step": 24480 }, { "epoch": 0.7503064852274121, "grad_norm": 1.4239641682975215, "learning_rate": 1.5478039171904813e-06, "loss": 0.5764, "step": 24481 }, { "epoch": 0.7503371337501532, "grad_norm": 1.4250959099030343, "learning_rate": 1.5474449026331222e-06, "loss": 0.6499, "step": 24482 }, { "epoch": 0.7503677822728945, "grad_norm": 1.2370753026884638, "learning_rate": 1.5470859220946722e-06, "loss": 0.5143, "step": 24483 }, { "epoch": 0.7503984307956356, "grad_norm": 0.4328760810062136, "learning_rate": 1.5467269755786695e-06, "loss": 0.3907, "step": 24484 }, { "epoch": 0.7504290793183769, "grad_norm": 0.4640136219838238, "learning_rate": 1.5463680630886486e-06, "loss": 0.4222, "step": 24485 }, { "epoch": 0.750459727841118, "grad_norm": 1.2981837782399759, "learning_rate": 1.5460091846281477e-06, "loss": 0.6708, "step": 24486 }, { "epoch": 0.7504903763638593, "grad_norm": 1.2160749381882126, "learning_rate": 1.5456503402006989e-06, "loss": 0.6003, "step": 24487 }, { "epoch": 0.7505210248866004, "grad_norm": 1.2674641353305949, "learning_rate": 1.545291529809844e-06, "loss": 0.5306, "step": 24488 }, { "epoch": 0.7505516734093417, "grad_norm": 1.3261784622661388, "learning_rate": 1.5449327534591164e-06, "loss": 0.4981, "step": 24489 }, { "epoch": 0.7505823219320829, "grad_norm": 0.45976597171274497, "learning_rate": 1.5445740111520486e-06, "loss": 0.3984, "step": 24490 }, { "epoch": 0.7506129704548241, "grad_norm": 1.5165357792155492, "learning_rate": 1.5442153028921768e-06, "loss": 0.6377, "step": 24491 }, { "epoch": 0.7506436189775653, "grad_norm": 0.4204481863002107, "learning_rate": 1.5438566286830376e-06, "loss": 0.3794, "step": 24492 }, { "epoch": 0.7506742675003065, "grad_norm": 1.2967291823977, "learning_rate": 1.5434979885281615e-06, "loss": 0.6658, "step": 24493 }, { "epoch": 0.7507049160230477, "grad_norm": 1.337569523341474, "learning_rate": 1.5431393824310847e-06, "loss": 0.5788, "step": 24494 }, { "epoch": 0.7507355645457889, "grad_norm": 1.394849896704652, "learning_rate": 1.5427808103953396e-06, "loss": 0.5211, "step": 24495 }, { "epoch": 0.7507662130685301, "grad_norm": 0.449908290136856, "learning_rate": 1.5424222724244615e-06, "loss": 0.3952, "step": 24496 }, { "epoch": 0.7507968615912713, "grad_norm": 0.45168183964959374, "learning_rate": 1.5420637685219814e-06, "loss": 0.3994, "step": 24497 }, { "epoch": 0.7508275101140125, "grad_norm": 1.188471016928344, "learning_rate": 1.5417052986914283e-06, "loss": 0.6114, "step": 24498 }, { "epoch": 0.7508581586367538, "grad_norm": 1.220229368290223, "learning_rate": 1.541346862936341e-06, "loss": 0.6813, "step": 24499 }, { "epoch": 0.7508888071594949, "grad_norm": 1.3269916798392267, "learning_rate": 1.5409884612602482e-06, "loss": 0.6334, "step": 24500 }, { "epoch": 0.7509194556822362, "grad_norm": 1.2767395074557117, "learning_rate": 1.5406300936666796e-06, "loss": 0.5671, "step": 24501 }, { "epoch": 0.7509501042049773, "grad_norm": 1.2590077793862515, "learning_rate": 1.5402717601591676e-06, "loss": 0.5585, "step": 24502 }, { "epoch": 0.7509807527277185, "grad_norm": 1.3880301738391674, "learning_rate": 1.5399134607412447e-06, "loss": 0.6162, "step": 24503 }, { "epoch": 0.7510114012504597, "grad_norm": 1.4713221324871928, "learning_rate": 1.5395551954164383e-06, "loss": 0.6645, "step": 24504 }, { "epoch": 0.7510420497732009, "grad_norm": 1.202207617923123, "learning_rate": 1.5391969641882798e-06, "loss": 0.5485, "step": 24505 }, { "epoch": 0.7510726982959421, "grad_norm": 0.4756369468841819, "learning_rate": 1.5388387670602995e-06, "loss": 0.4151, "step": 24506 }, { "epoch": 0.7511033468186833, "grad_norm": 1.4911302921419525, "learning_rate": 1.5384806040360272e-06, "loss": 0.7219, "step": 24507 }, { "epoch": 0.7511339953414246, "grad_norm": 1.9380691949572573, "learning_rate": 1.538122475118992e-06, "loss": 0.5396, "step": 24508 }, { "epoch": 0.7511646438641657, "grad_norm": 1.368215537821551, "learning_rate": 1.5377643803127196e-06, "loss": 0.5897, "step": 24509 }, { "epoch": 0.751195292386907, "grad_norm": 1.3677240357795681, "learning_rate": 1.5374063196207411e-06, "loss": 0.6094, "step": 24510 }, { "epoch": 0.7512259409096481, "grad_norm": 1.323637955853121, "learning_rate": 1.5370482930465858e-06, "loss": 0.6251, "step": 24511 }, { "epoch": 0.7512565894323894, "grad_norm": 1.435824571521211, "learning_rate": 1.5366903005937777e-06, "loss": 0.5997, "step": 24512 }, { "epoch": 0.7512872379551305, "grad_norm": 1.3119725338653336, "learning_rate": 1.536332342265846e-06, "loss": 0.5757, "step": 24513 }, { "epoch": 0.7513178864778718, "grad_norm": 1.5372320887637108, "learning_rate": 1.5359744180663184e-06, "loss": 0.684, "step": 24514 }, { "epoch": 0.7513485350006129, "grad_norm": 1.2901401025529902, "learning_rate": 1.5356165279987222e-06, "loss": 0.5715, "step": 24515 }, { "epoch": 0.7513791835233542, "grad_norm": 1.368988859601672, "learning_rate": 1.5352586720665835e-06, "loss": 0.5731, "step": 24516 }, { "epoch": 0.7514098320460953, "grad_norm": 1.359294244461992, "learning_rate": 1.5349008502734242e-06, "loss": 0.6498, "step": 24517 }, { "epoch": 0.7514404805688366, "grad_norm": 0.4444508129074725, "learning_rate": 1.5345430626227763e-06, "loss": 0.4008, "step": 24518 }, { "epoch": 0.7514711290915778, "grad_norm": 1.2543320582804074, "learning_rate": 1.5341853091181624e-06, "loss": 0.5792, "step": 24519 }, { "epoch": 0.751501777614319, "grad_norm": 1.2908718061103506, "learning_rate": 1.533827589763106e-06, "loss": 0.6754, "step": 24520 }, { "epoch": 0.7515324261370602, "grad_norm": 0.4780435967200781, "learning_rate": 1.533469904561133e-06, "loss": 0.3882, "step": 24521 }, { "epoch": 0.7515630746598014, "grad_norm": 0.4659879579740302, "learning_rate": 1.5331122535157677e-06, "loss": 0.3961, "step": 24522 }, { "epoch": 0.7515937231825426, "grad_norm": 1.3375228110695587, "learning_rate": 1.5327546366305368e-06, "loss": 0.6116, "step": 24523 }, { "epoch": 0.7516243717052838, "grad_norm": 1.2840448901101493, "learning_rate": 1.5323970539089595e-06, "loss": 0.5724, "step": 24524 }, { "epoch": 0.751655020228025, "grad_norm": 0.4452204091839924, "learning_rate": 1.5320395053545612e-06, "loss": 0.3974, "step": 24525 }, { "epoch": 0.7516856687507663, "grad_norm": 1.3435016602224141, "learning_rate": 1.5316819909708668e-06, "loss": 0.5795, "step": 24526 }, { "epoch": 0.7517163172735074, "grad_norm": 1.223208844695986, "learning_rate": 1.5313245107613967e-06, "loss": 0.6867, "step": 24527 }, { "epoch": 0.7517469657962487, "grad_norm": 1.4747955628064453, "learning_rate": 1.530967064729671e-06, "loss": 0.624, "step": 24528 }, { "epoch": 0.7517776143189898, "grad_norm": 0.4437243992874632, "learning_rate": 1.5306096528792175e-06, "loss": 0.3929, "step": 24529 }, { "epoch": 0.7518082628417311, "grad_norm": 1.3359171900231248, "learning_rate": 1.5302522752135546e-06, "loss": 0.5045, "step": 24530 }, { "epoch": 0.7518389113644722, "grad_norm": 0.4637503715534285, "learning_rate": 1.5298949317362022e-06, "loss": 0.3891, "step": 24531 }, { "epoch": 0.7518695598872135, "grad_norm": 1.3224747740342468, "learning_rate": 1.529537622450683e-06, "loss": 0.5049, "step": 24532 }, { "epoch": 0.7519002084099546, "grad_norm": 1.2876597102934606, "learning_rate": 1.5291803473605176e-06, "loss": 0.63, "step": 24533 }, { "epoch": 0.7519308569326958, "grad_norm": 1.5171851477723013, "learning_rate": 1.5288231064692277e-06, "loss": 0.5927, "step": 24534 }, { "epoch": 0.751961505455437, "grad_norm": 1.3775008203656696, "learning_rate": 1.528465899780331e-06, "loss": 0.5989, "step": 24535 }, { "epoch": 0.7519921539781782, "grad_norm": 1.3938332149683457, "learning_rate": 1.5281087272973471e-06, "loss": 0.6639, "step": 24536 }, { "epoch": 0.7520228025009195, "grad_norm": 1.2932885043641345, "learning_rate": 1.527751589023798e-06, "loss": 0.6016, "step": 24537 }, { "epoch": 0.7520534510236606, "grad_norm": 1.4376450862519432, "learning_rate": 1.527394484963201e-06, "loss": 0.6283, "step": 24538 }, { "epoch": 0.7520840995464019, "grad_norm": 1.2554966153533302, "learning_rate": 1.527037415119072e-06, "loss": 0.5944, "step": 24539 }, { "epoch": 0.752114748069143, "grad_norm": 1.2605408826248616, "learning_rate": 1.5266803794949343e-06, "loss": 0.647, "step": 24540 }, { "epoch": 0.7521453965918843, "grad_norm": 1.5935954760418425, "learning_rate": 1.5263233780943027e-06, "loss": 0.598, "step": 24541 }, { "epoch": 0.7521760451146254, "grad_norm": 1.4160492313632642, "learning_rate": 1.5259664109206966e-06, "loss": 0.6296, "step": 24542 }, { "epoch": 0.7522066936373667, "grad_norm": 1.418162247798849, "learning_rate": 1.5256094779776308e-06, "loss": 0.5146, "step": 24543 }, { "epoch": 0.7522373421601078, "grad_norm": 1.2214048223845164, "learning_rate": 1.525252579268624e-06, "loss": 0.5391, "step": 24544 }, { "epoch": 0.7522679906828491, "grad_norm": 1.2883632027581506, "learning_rate": 1.5248957147971939e-06, "loss": 0.6796, "step": 24545 }, { "epoch": 0.7522986392055903, "grad_norm": 0.44348627828802983, "learning_rate": 1.5245388845668541e-06, "loss": 0.3959, "step": 24546 }, { "epoch": 0.7523292877283315, "grad_norm": 1.6626878881258564, "learning_rate": 1.524182088581122e-06, "loss": 0.6323, "step": 24547 }, { "epoch": 0.7523599362510727, "grad_norm": 1.3605125487439895, "learning_rate": 1.5238253268435143e-06, "loss": 0.5904, "step": 24548 }, { "epoch": 0.7523905847738139, "grad_norm": 1.396078092470032, "learning_rate": 1.5234685993575439e-06, "loss": 0.6464, "step": 24549 }, { "epoch": 0.7524212332965551, "grad_norm": 1.3357839327112888, "learning_rate": 1.5231119061267268e-06, "loss": 0.6172, "step": 24550 }, { "epoch": 0.7524518818192963, "grad_norm": 1.2914950656097057, "learning_rate": 1.5227552471545793e-06, "loss": 0.6146, "step": 24551 }, { "epoch": 0.7524825303420375, "grad_norm": 1.4231735703102788, "learning_rate": 1.5223986224446124e-06, "loss": 0.5771, "step": 24552 }, { "epoch": 0.7525131788647788, "grad_norm": 1.290306440830291, "learning_rate": 1.5220420320003433e-06, "loss": 0.5734, "step": 24553 }, { "epoch": 0.7525438273875199, "grad_norm": 1.5133190996661845, "learning_rate": 1.521685475825282e-06, "loss": 0.686, "step": 24554 }, { "epoch": 0.7525744759102612, "grad_norm": 1.5202007536392066, "learning_rate": 1.5213289539229443e-06, "loss": 0.6234, "step": 24555 }, { "epoch": 0.7526051244330023, "grad_norm": 1.2755598783230309, "learning_rate": 1.5209724662968433e-06, "loss": 0.5746, "step": 24556 }, { "epoch": 0.7526357729557436, "grad_norm": 1.4731747565166926, "learning_rate": 1.52061601295049e-06, "loss": 0.6645, "step": 24557 }, { "epoch": 0.7526664214784847, "grad_norm": 1.3865012389874118, "learning_rate": 1.5202595938873965e-06, "loss": 0.5854, "step": 24558 }, { "epoch": 0.752697070001226, "grad_norm": 0.41761087069295844, "learning_rate": 1.5199032091110777e-06, "loss": 0.3866, "step": 24559 }, { "epoch": 0.7527277185239671, "grad_norm": 0.47498583491276025, "learning_rate": 1.5195468586250412e-06, "loss": 0.4155, "step": 24560 }, { "epoch": 0.7527583670467084, "grad_norm": 1.2721404171666373, "learning_rate": 1.5191905424328019e-06, "loss": 0.6287, "step": 24561 }, { "epoch": 0.7527890155694495, "grad_norm": 0.4653802959957408, "learning_rate": 1.518834260537867e-06, "loss": 0.4039, "step": 24562 }, { "epoch": 0.7528196640921908, "grad_norm": 1.376665319897957, "learning_rate": 1.5184780129437487e-06, "loss": 0.6456, "step": 24563 }, { "epoch": 0.752850312614932, "grad_norm": 0.45017252305335764, "learning_rate": 1.5181217996539589e-06, "loss": 0.3858, "step": 24564 }, { "epoch": 0.7528809611376731, "grad_norm": 1.4226869660112251, "learning_rate": 1.5177656206720043e-06, "loss": 0.5696, "step": 24565 }, { "epoch": 0.7529116096604144, "grad_norm": 1.2159437560800697, "learning_rate": 1.5174094760013963e-06, "loss": 0.5482, "step": 24566 }, { "epoch": 0.7529422581831555, "grad_norm": 0.4470034524005248, "learning_rate": 1.517053365645645e-06, "loss": 0.388, "step": 24567 }, { "epoch": 0.7529729067058968, "grad_norm": 1.3714902754202138, "learning_rate": 1.5166972896082565e-06, "loss": 0.6635, "step": 24568 }, { "epoch": 0.7530035552286379, "grad_norm": 1.2126758627974379, "learning_rate": 1.5163412478927408e-06, "loss": 0.4423, "step": 24569 }, { "epoch": 0.7530342037513792, "grad_norm": 1.3682272479736586, "learning_rate": 1.5159852405026082e-06, "loss": 0.6401, "step": 24570 }, { "epoch": 0.7530648522741203, "grad_norm": 1.2656718946962966, "learning_rate": 1.5156292674413625e-06, "loss": 0.5926, "step": 24571 }, { "epoch": 0.7530955007968616, "grad_norm": 1.4679755407008859, "learning_rate": 1.515273328712515e-06, "loss": 0.6095, "step": 24572 }, { "epoch": 0.7531261493196028, "grad_norm": 1.6397481860255927, "learning_rate": 1.5149174243195692e-06, "loss": 0.6423, "step": 24573 }, { "epoch": 0.753156797842344, "grad_norm": 1.3784428573808445, "learning_rate": 1.5145615542660335e-06, "loss": 0.6206, "step": 24574 }, { "epoch": 0.7531874463650852, "grad_norm": 1.470785485552886, "learning_rate": 1.5142057185554165e-06, "loss": 0.6349, "step": 24575 }, { "epoch": 0.7532180948878264, "grad_norm": 1.3314687328164387, "learning_rate": 1.5138499171912208e-06, "loss": 0.5718, "step": 24576 }, { "epoch": 0.7532487434105676, "grad_norm": 1.54113695787892, "learning_rate": 1.5134941501769534e-06, "loss": 0.5932, "step": 24577 }, { "epoch": 0.7532793919333088, "grad_norm": 1.250188255852778, "learning_rate": 1.5131384175161223e-06, "loss": 0.6254, "step": 24578 }, { "epoch": 0.75331004045605, "grad_norm": 1.3930095214998168, "learning_rate": 1.512782719212229e-06, "loss": 0.6577, "step": 24579 }, { "epoch": 0.7533406889787913, "grad_norm": 1.4904257473977855, "learning_rate": 1.5124270552687798e-06, "loss": 0.6484, "step": 24580 }, { "epoch": 0.7533713375015324, "grad_norm": 1.1798071646907164, "learning_rate": 1.5120714256892804e-06, "loss": 0.5603, "step": 24581 }, { "epoch": 0.7534019860242737, "grad_norm": 1.2710690085710867, "learning_rate": 1.5117158304772322e-06, "loss": 0.5798, "step": 24582 }, { "epoch": 0.7534326345470148, "grad_norm": 1.1981954165334627, "learning_rate": 1.5113602696361419e-06, "loss": 0.5941, "step": 24583 }, { "epoch": 0.7534632830697561, "grad_norm": 1.2514417298399314, "learning_rate": 1.51100474316951e-06, "loss": 0.5743, "step": 24584 }, { "epoch": 0.7534939315924972, "grad_norm": 1.3557475127363368, "learning_rate": 1.5106492510808413e-06, "loss": 0.5849, "step": 24585 }, { "epoch": 0.7535245801152385, "grad_norm": 1.4364069230359422, "learning_rate": 1.5102937933736394e-06, "loss": 0.585, "step": 24586 }, { "epoch": 0.7535552286379796, "grad_norm": 1.329493270787136, "learning_rate": 1.5099383700514047e-06, "loss": 0.5556, "step": 24587 }, { "epoch": 0.7535858771607209, "grad_norm": 1.4100623506575227, "learning_rate": 1.5095829811176399e-06, "loss": 0.5895, "step": 24588 }, { "epoch": 0.753616525683462, "grad_norm": 1.3364173754909807, "learning_rate": 1.5092276265758483e-06, "loss": 0.5886, "step": 24589 }, { "epoch": 0.7536471742062033, "grad_norm": 1.2368276959881153, "learning_rate": 1.5088723064295291e-06, "loss": 0.612, "step": 24590 }, { "epoch": 0.7536778227289445, "grad_norm": 1.4395688485042382, "learning_rate": 1.5085170206821836e-06, "loss": 0.6339, "step": 24591 }, { "epoch": 0.7537084712516857, "grad_norm": 1.3054280082624188, "learning_rate": 1.5081617693373151e-06, "loss": 0.5272, "step": 24592 }, { "epoch": 0.7537391197744269, "grad_norm": 1.5662706494877565, "learning_rate": 1.5078065523984208e-06, "loss": 0.6762, "step": 24593 }, { "epoch": 0.7537697682971681, "grad_norm": 1.2841563314078401, "learning_rate": 1.5074513698690036e-06, "loss": 0.5664, "step": 24594 }, { "epoch": 0.7538004168199093, "grad_norm": 0.44608581850661316, "learning_rate": 1.5070962217525582e-06, "loss": 0.3994, "step": 24595 }, { "epoch": 0.7538310653426504, "grad_norm": 1.2625721458845092, "learning_rate": 1.5067411080525907e-06, "loss": 0.5759, "step": 24596 }, { "epoch": 0.7538617138653917, "grad_norm": 0.43309488917492667, "learning_rate": 1.5063860287725968e-06, "loss": 0.3851, "step": 24597 }, { "epoch": 0.7538923623881328, "grad_norm": 1.1511356524771275, "learning_rate": 1.5060309839160737e-06, "loss": 0.4979, "step": 24598 }, { "epoch": 0.7539230109108741, "grad_norm": 1.3039128123416341, "learning_rate": 1.5056759734865218e-06, "loss": 0.4762, "step": 24599 }, { "epoch": 0.7539536594336153, "grad_norm": 1.383773504615984, "learning_rate": 1.5053209974874393e-06, "loss": 0.6562, "step": 24600 }, { "epoch": 0.7539843079563565, "grad_norm": 1.4955092823552734, "learning_rate": 1.5049660559223223e-06, "loss": 0.6732, "step": 24601 }, { "epoch": 0.7540149564790977, "grad_norm": 1.352558506745212, "learning_rate": 1.5046111487946685e-06, "loss": 0.6707, "step": 24602 }, { "epoch": 0.7540456050018389, "grad_norm": 0.4297032226144705, "learning_rate": 1.5042562761079755e-06, "loss": 0.3877, "step": 24603 }, { "epoch": 0.7540762535245801, "grad_norm": 1.4833868855620789, "learning_rate": 1.5039014378657413e-06, "loss": 0.6291, "step": 24604 }, { "epoch": 0.7541069020473213, "grad_norm": 1.41215067518874, "learning_rate": 1.503546634071461e-06, "loss": 0.5802, "step": 24605 }, { "epoch": 0.7541375505700625, "grad_norm": 1.340241943899838, "learning_rate": 1.5031918647286287e-06, "loss": 0.5716, "step": 24606 }, { "epoch": 0.7541681990928037, "grad_norm": 1.455420937877892, "learning_rate": 1.5028371298407418e-06, "loss": 0.6452, "step": 24607 }, { "epoch": 0.7541988476155449, "grad_norm": 1.3606533347766312, "learning_rate": 1.5024824294112967e-06, "loss": 0.5717, "step": 24608 }, { "epoch": 0.7542294961382862, "grad_norm": 1.231774922147087, "learning_rate": 1.5021277634437858e-06, "loss": 0.6114, "step": 24609 }, { "epoch": 0.7542601446610273, "grad_norm": 1.35906624595545, "learning_rate": 1.501773131941705e-06, "loss": 0.6315, "step": 24610 }, { "epoch": 0.7542907931837686, "grad_norm": 1.298350790989829, "learning_rate": 1.5014185349085498e-06, "loss": 0.6602, "step": 24611 }, { "epoch": 0.7543214417065097, "grad_norm": 1.3103057846878827, "learning_rate": 1.5010639723478115e-06, "loss": 0.5967, "step": 24612 }, { "epoch": 0.754352090229251, "grad_norm": 1.345534143249251, "learning_rate": 1.500709444262987e-06, "loss": 0.6808, "step": 24613 }, { "epoch": 0.7543827387519921, "grad_norm": 1.2240746675315497, "learning_rate": 1.5003549506575643e-06, "loss": 0.5655, "step": 24614 }, { "epoch": 0.7544133872747334, "grad_norm": 1.4394257081834336, "learning_rate": 1.500000491535043e-06, "loss": 0.6893, "step": 24615 }, { "epoch": 0.7544440357974745, "grad_norm": 1.3145754670236014, "learning_rate": 1.499646066898912e-06, "loss": 0.5421, "step": 24616 }, { "epoch": 0.7544746843202158, "grad_norm": 1.3871984432298892, "learning_rate": 1.4992916767526627e-06, "loss": 0.6443, "step": 24617 }, { "epoch": 0.754505332842957, "grad_norm": 1.273008163674339, "learning_rate": 1.4989373210997882e-06, "loss": 0.5959, "step": 24618 }, { "epoch": 0.7545359813656982, "grad_norm": 1.2269591132419904, "learning_rate": 1.4985829999437813e-06, "loss": 0.5715, "step": 24619 }, { "epoch": 0.7545666298884394, "grad_norm": 1.3756974627978438, "learning_rate": 1.4982287132881307e-06, "loss": 0.6282, "step": 24620 }, { "epoch": 0.7545972784111806, "grad_norm": 1.2743033562198836, "learning_rate": 1.4978744611363284e-06, "loss": 0.6238, "step": 24621 }, { "epoch": 0.7546279269339218, "grad_norm": 1.3501344227560876, "learning_rate": 1.4975202434918652e-06, "loss": 0.7048, "step": 24622 }, { "epoch": 0.754658575456663, "grad_norm": 1.2480159771113757, "learning_rate": 1.4971660603582328e-06, "loss": 0.5864, "step": 24623 }, { "epoch": 0.7546892239794042, "grad_norm": 1.2504592891330368, "learning_rate": 1.4968119117389197e-06, "loss": 0.5629, "step": 24624 }, { "epoch": 0.7547198725021455, "grad_norm": 1.2474809423936228, "learning_rate": 1.4964577976374112e-06, "loss": 0.5393, "step": 24625 }, { "epoch": 0.7547505210248866, "grad_norm": 1.2327469631488246, "learning_rate": 1.496103718057204e-06, "loss": 0.5853, "step": 24626 }, { "epoch": 0.7547811695476278, "grad_norm": 1.3948482838021607, "learning_rate": 1.4957496730017834e-06, "loss": 0.6384, "step": 24627 }, { "epoch": 0.754811818070369, "grad_norm": 0.456262336043014, "learning_rate": 1.4953956624746369e-06, "loss": 0.3778, "step": 24628 }, { "epoch": 0.7548424665931102, "grad_norm": 1.2767655410247551, "learning_rate": 1.4950416864792528e-06, "loss": 0.5869, "step": 24629 }, { "epoch": 0.7548731151158514, "grad_norm": 1.3025538775129961, "learning_rate": 1.49468774501912e-06, "loss": 0.7461, "step": 24630 }, { "epoch": 0.7549037636385926, "grad_norm": 1.4940237482492071, "learning_rate": 1.4943338380977274e-06, "loss": 0.6346, "step": 24631 }, { "epoch": 0.7549344121613338, "grad_norm": 1.284054637762776, "learning_rate": 1.4939799657185589e-06, "loss": 0.5162, "step": 24632 }, { "epoch": 0.754965060684075, "grad_norm": 0.47010165133439136, "learning_rate": 1.4936261278851028e-06, "loss": 0.3891, "step": 24633 }, { "epoch": 0.7549957092068162, "grad_norm": 1.3288787123756203, "learning_rate": 1.4932723246008468e-06, "loss": 0.618, "step": 24634 }, { "epoch": 0.7550263577295574, "grad_norm": 0.4517487582869902, "learning_rate": 1.4929185558692767e-06, "loss": 0.4044, "step": 24635 }, { "epoch": 0.7550570062522987, "grad_norm": 1.4155756081886142, "learning_rate": 1.4925648216938732e-06, "loss": 0.6195, "step": 24636 }, { "epoch": 0.7550876547750398, "grad_norm": 1.6002110711880722, "learning_rate": 1.4922111220781299e-06, "loss": 0.5039, "step": 24637 }, { "epoch": 0.7551183032977811, "grad_norm": 1.2348012723342645, "learning_rate": 1.4918574570255272e-06, "loss": 0.587, "step": 24638 }, { "epoch": 0.7551489518205222, "grad_norm": 1.4399668077288639, "learning_rate": 1.4915038265395498e-06, "loss": 0.6291, "step": 24639 }, { "epoch": 0.7551796003432635, "grad_norm": 1.4711993729294153, "learning_rate": 1.4911502306236825e-06, "loss": 0.5979, "step": 24640 }, { "epoch": 0.7552102488660046, "grad_norm": 1.325063396413445, "learning_rate": 1.4907966692814096e-06, "loss": 0.6781, "step": 24641 }, { "epoch": 0.7552408973887459, "grad_norm": 1.5365478538415436, "learning_rate": 1.4904431425162164e-06, "loss": 0.6522, "step": 24642 }, { "epoch": 0.755271545911487, "grad_norm": 1.5652050182433153, "learning_rate": 1.4900896503315836e-06, "loss": 0.6401, "step": 24643 }, { "epoch": 0.7553021944342283, "grad_norm": 1.295744693978576, "learning_rate": 1.4897361927309955e-06, "loss": 0.6603, "step": 24644 }, { "epoch": 0.7553328429569695, "grad_norm": 1.3337473091907386, "learning_rate": 1.489382769717937e-06, "loss": 0.6689, "step": 24645 }, { "epoch": 0.7553634914797107, "grad_norm": 1.3490759039370424, "learning_rate": 1.4890293812958873e-06, "loss": 0.6296, "step": 24646 }, { "epoch": 0.7553941400024519, "grad_norm": 1.2828400010311283, "learning_rate": 1.488676027468327e-06, "loss": 0.6078, "step": 24647 }, { "epoch": 0.7554247885251931, "grad_norm": 1.4051023959982203, "learning_rate": 1.488322708238743e-06, "loss": 0.5906, "step": 24648 }, { "epoch": 0.7554554370479343, "grad_norm": 1.4241132184921352, "learning_rate": 1.4879694236106124e-06, "loss": 0.6354, "step": 24649 }, { "epoch": 0.7554860855706755, "grad_norm": 1.4206678418912935, "learning_rate": 1.487616173587419e-06, "loss": 0.5474, "step": 24650 }, { "epoch": 0.7555167340934167, "grad_norm": 0.46152067337324815, "learning_rate": 1.4872629581726406e-06, "loss": 0.4161, "step": 24651 }, { "epoch": 0.755547382616158, "grad_norm": 1.2888953253096458, "learning_rate": 1.4869097773697594e-06, "loss": 0.5923, "step": 24652 }, { "epoch": 0.7555780311388991, "grad_norm": 1.2606264136993859, "learning_rate": 1.4865566311822565e-06, "loss": 0.6112, "step": 24653 }, { "epoch": 0.7556086796616404, "grad_norm": 1.310957431064801, "learning_rate": 1.4862035196136083e-06, "loss": 0.5961, "step": 24654 }, { "epoch": 0.7556393281843815, "grad_norm": 1.4169518063890563, "learning_rate": 1.485850442667296e-06, "loss": 0.681, "step": 24655 }, { "epoch": 0.7556699767071228, "grad_norm": 1.308540724229823, "learning_rate": 1.4854974003467997e-06, "loss": 0.5887, "step": 24656 }, { "epoch": 0.7557006252298639, "grad_norm": 1.209141759396481, "learning_rate": 1.4851443926555953e-06, "loss": 0.5925, "step": 24657 }, { "epoch": 0.7557312737526051, "grad_norm": 1.120876603656003, "learning_rate": 1.484791419597164e-06, "loss": 0.5273, "step": 24658 }, { "epoch": 0.7557619222753463, "grad_norm": 1.3998661583325553, "learning_rate": 1.4844384811749812e-06, "loss": 0.6048, "step": 24659 }, { "epoch": 0.7557925707980875, "grad_norm": 1.188330217685695, "learning_rate": 1.484085577392525e-06, "loss": 0.649, "step": 24660 }, { "epoch": 0.7558232193208287, "grad_norm": 1.262096821156257, "learning_rate": 1.4837327082532755e-06, "loss": 0.5557, "step": 24661 }, { "epoch": 0.7558538678435699, "grad_norm": 1.2416188865121474, "learning_rate": 1.4833798737607046e-06, "loss": 0.6039, "step": 24662 }, { "epoch": 0.7558845163663112, "grad_norm": 0.4403973230853494, "learning_rate": 1.4830270739182922e-06, "loss": 0.3887, "step": 24663 }, { "epoch": 0.7559151648890523, "grad_norm": 1.1298402199805604, "learning_rate": 1.482674308729516e-06, "loss": 0.5538, "step": 24664 }, { "epoch": 0.7559458134117936, "grad_norm": 1.1667127801967068, "learning_rate": 1.482321578197849e-06, "loss": 0.5876, "step": 24665 }, { "epoch": 0.7559764619345347, "grad_norm": 1.3745086355675882, "learning_rate": 1.4819688823267647e-06, "loss": 0.5574, "step": 24666 }, { "epoch": 0.756007110457276, "grad_norm": 1.2446672655403548, "learning_rate": 1.4816162211197438e-06, "loss": 0.5967, "step": 24667 }, { "epoch": 0.7560377589800171, "grad_norm": 0.42741175762328965, "learning_rate": 1.4812635945802574e-06, "loss": 0.3851, "step": 24668 }, { "epoch": 0.7560684075027584, "grad_norm": 0.45210948651062777, "learning_rate": 1.480911002711783e-06, "loss": 0.3999, "step": 24669 }, { "epoch": 0.7560990560254995, "grad_norm": 1.4066728136093738, "learning_rate": 1.4805584455177908e-06, "loss": 0.5845, "step": 24670 }, { "epoch": 0.7561297045482408, "grad_norm": 0.4549734485966965, "learning_rate": 1.4802059230017568e-06, "loss": 0.3888, "step": 24671 }, { "epoch": 0.756160353070982, "grad_norm": 0.48413788691948906, "learning_rate": 1.4798534351671556e-06, "loss": 0.4095, "step": 24672 }, { "epoch": 0.7561910015937232, "grad_norm": 1.2363832197404305, "learning_rate": 1.4795009820174584e-06, "loss": 0.6678, "step": 24673 }, { "epoch": 0.7562216501164644, "grad_norm": 1.3989778143591804, "learning_rate": 1.4791485635561382e-06, "loss": 0.6397, "step": 24674 }, { "epoch": 0.7562522986392056, "grad_norm": 0.4400655720908745, "learning_rate": 1.4787961797866695e-06, "loss": 0.3909, "step": 24675 }, { "epoch": 0.7562829471619468, "grad_norm": 1.3345524502423307, "learning_rate": 1.478443830712522e-06, "loss": 0.6025, "step": 24676 }, { "epoch": 0.756313595684688, "grad_norm": 1.222934240963766, "learning_rate": 1.4780915163371685e-06, "loss": 0.5724, "step": 24677 }, { "epoch": 0.7563442442074292, "grad_norm": 1.2904413904361938, "learning_rate": 1.4777392366640813e-06, "loss": 0.5524, "step": 24678 }, { "epoch": 0.7563748927301704, "grad_norm": 1.3422644977481073, "learning_rate": 1.4773869916967292e-06, "loss": 0.6011, "step": 24679 }, { "epoch": 0.7564055412529116, "grad_norm": 1.3219441708868978, "learning_rate": 1.4770347814385865e-06, "loss": 0.5993, "step": 24680 }, { "epoch": 0.7564361897756529, "grad_norm": 1.2250907862884255, "learning_rate": 1.4766826058931195e-06, "loss": 0.6683, "step": 24681 }, { "epoch": 0.756466838298394, "grad_norm": 1.228418108064305, "learning_rate": 1.4763304650638e-06, "loss": 0.6649, "step": 24682 }, { "epoch": 0.7564974868211353, "grad_norm": 1.4677881299205546, "learning_rate": 1.4759783589541e-06, "loss": 0.6037, "step": 24683 }, { "epoch": 0.7565281353438764, "grad_norm": 1.312179426948573, "learning_rate": 1.4756262875674855e-06, "loss": 0.5178, "step": 24684 }, { "epoch": 0.7565587838666177, "grad_norm": 1.2857120370550528, "learning_rate": 1.4752742509074265e-06, "loss": 0.5786, "step": 24685 }, { "epoch": 0.7565894323893588, "grad_norm": 0.4578696638257975, "learning_rate": 1.474922248977394e-06, "loss": 0.4071, "step": 24686 }, { "epoch": 0.7566200809121001, "grad_norm": 1.2748998691270796, "learning_rate": 1.4745702817808527e-06, "loss": 0.5663, "step": 24687 }, { "epoch": 0.7566507294348412, "grad_norm": 1.2896598276274136, "learning_rate": 1.4742183493212726e-06, "loss": 0.6922, "step": 24688 }, { "epoch": 0.7566813779575824, "grad_norm": 1.4813935348302913, "learning_rate": 1.4738664516021228e-06, "loss": 0.6756, "step": 24689 }, { "epoch": 0.7567120264803237, "grad_norm": 1.5111270894146278, "learning_rate": 1.473514588626867e-06, "loss": 0.7741, "step": 24690 }, { "epoch": 0.7567426750030648, "grad_norm": 0.45548313510698896, "learning_rate": 1.4731627603989762e-06, "loss": 0.4048, "step": 24691 }, { "epoch": 0.7567733235258061, "grad_norm": 1.3923044950871282, "learning_rate": 1.4728109669219137e-06, "loss": 0.6375, "step": 24692 }, { "epoch": 0.7568039720485472, "grad_norm": 1.3850341891172289, "learning_rate": 1.4724592081991473e-06, "loss": 0.6143, "step": 24693 }, { "epoch": 0.7568346205712885, "grad_norm": 0.44147750939918445, "learning_rate": 1.4721074842341444e-06, "loss": 0.3965, "step": 24694 }, { "epoch": 0.7568652690940296, "grad_norm": 1.2530771096336621, "learning_rate": 1.4717557950303669e-06, "loss": 0.6439, "step": 24695 }, { "epoch": 0.7568959176167709, "grad_norm": 1.283425707276583, "learning_rate": 1.4714041405912828e-06, "loss": 0.6133, "step": 24696 }, { "epoch": 0.756926566139512, "grad_norm": 1.4950086289465474, "learning_rate": 1.471052520920358e-06, "loss": 0.5985, "step": 24697 }, { "epoch": 0.7569572146622533, "grad_norm": 1.3317885850794906, "learning_rate": 1.4707009360210544e-06, "loss": 0.6152, "step": 24698 }, { "epoch": 0.7569878631849944, "grad_norm": 1.3732910431688, "learning_rate": 1.470349385896837e-06, "loss": 0.6006, "step": 24699 }, { "epoch": 0.7570185117077357, "grad_norm": 1.3101745606963635, "learning_rate": 1.469997870551172e-06, "loss": 0.6182, "step": 24700 }, { "epoch": 0.7570491602304769, "grad_norm": 1.3882504997089244, "learning_rate": 1.4696463899875196e-06, "loss": 0.6872, "step": 24701 }, { "epoch": 0.7570798087532181, "grad_norm": 1.2942712241259802, "learning_rate": 1.4692949442093462e-06, "loss": 0.5343, "step": 24702 }, { "epoch": 0.7571104572759593, "grad_norm": 1.431137507482919, "learning_rate": 1.4689435332201113e-06, "loss": 0.5741, "step": 24703 }, { "epoch": 0.7571411057987005, "grad_norm": 1.266833924289402, "learning_rate": 1.4685921570232797e-06, "loss": 0.5664, "step": 24704 }, { "epoch": 0.7571717543214417, "grad_norm": 1.4362273472947045, "learning_rate": 1.4682408156223144e-06, "loss": 0.6467, "step": 24705 }, { "epoch": 0.7572024028441829, "grad_norm": 1.3448489895906082, "learning_rate": 1.467889509020674e-06, "loss": 0.5745, "step": 24706 }, { "epoch": 0.7572330513669241, "grad_norm": 1.554042888599496, "learning_rate": 1.4675382372218227e-06, "loss": 0.6003, "step": 24707 }, { "epoch": 0.7572636998896654, "grad_norm": 1.2128629482766051, "learning_rate": 1.4671870002292221e-06, "loss": 0.7086, "step": 24708 }, { "epoch": 0.7572943484124065, "grad_norm": 1.2834853264172765, "learning_rate": 1.4668357980463305e-06, "loss": 0.5703, "step": 24709 }, { "epoch": 0.7573249969351478, "grad_norm": 1.6003893591643796, "learning_rate": 1.4664846306766112e-06, "loss": 0.691, "step": 24710 }, { "epoch": 0.7573556454578889, "grad_norm": 0.4263707947374193, "learning_rate": 1.4661334981235215e-06, "loss": 0.3775, "step": 24711 }, { "epoch": 0.7573862939806302, "grad_norm": 1.1500123049654976, "learning_rate": 1.4657824003905225e-06, "loss": 0.502, "step": 24712 }, { "epoch": 0.7574169425033713, "grad_norm": 1.3686040088465663, "learning_rate": 1.4654313374810752e-06, "loss": 0.6551, "step": 24713 }, { "epoch": 0.7574475910261126, "grad_norm": 1.4634249314221675, "learning_rate": 1.465080309398636e-06, "loss": 0.6152, "step": 24714 }, { "epoch": 0.7574782395488537, "grad_norm": 1.4913947683359776, "learning_rate": 1.4647293161466652e-06, "loss": 0.6261, "step": 24715 }, { "epoch": 0.757508888071595, "grad_norm": 1.2528879015587344, "learning_rate": 1.4643783577286225e-06, "loss": 0.6063, "step": 24716 }, { "epoch": 0.7575395365943361, "grad_norm": 1.3464651935140008, "learning_rate": 1.4640274341479621e-06, "loss": 0.5966, "step": 24717 }, { "epoch": 0.7575701851170774, "grad_norm": 1.4167182110754877, "learning_rate": 1.4636765454081447e-06, "loss": 0.6056, "step": 24718 }, { "epoch": 0.7576008336398186, "grad_norm": 1.2560267753947854, "learning_rate": 1.4633256915126287e-06, "loss": 0.5913, "step": 24719 }, { "epoch": 0.7576314821625597, "grad_norm": 1.3938804999538652, "learning_rate": 1.462974872464868e-06, "loss": 0.6607, "step": 24720 }, { "epoch": 0.757662130685301, "grad_norm": 2.1623129733006268, "learning_rate": 1.4626240882683224e-06, "loss": 0.5484, "step": 24721 }, { "epoch": 0.7576927792080421, "grad_norm": 1.4090280329629514, "learning_rate": 1.4622733389264438e-06, "loss": 0.6452, "step": 24722 }, { "epoch": 0.7577234277307834, "grad_norm": 1.1811065261550007, "learning_rate": 1.4619226244426938e-06, "loss": 0.6012, "step": 24723 }, { "epoch": 0.7577540762535245, "grad_norm": 1.3274081015200232, "learning_rate": 1.4615719448205257e-06, "loss": 0.6103, "step": 24724 }, { "epoch": 0.7577847247762658, "grad_norm": 1.397289750839159, "learning_rate": 1.4612213000633929e-06, "loss": 0.5988, "step": 24725 }, { "epoch": 0.757815373299007, "grad_norm": 1.5591203015727866, "learning_rate": 1.4608706901747527e-06, "loss": 0.7112, "step": 24726 }, { "epoch": 0.7578460218217482, "grad_norm": 1.1619962108864332, "learning_rate": 1.46052011515806e-06, "loss": 0.6681, "step": 24727 }, { "epoch": 0.7578766703444894, "grad_norm": 0.42763491466141434, "learning_rate": 1.4601695750167667e-06, "loss": 0.3729, "step": 24728 }, { "epoch": 0.7579073188672306, "grad_norm": 1.3666106393654303, "learning_rate": 1.4598190697543286e-06, "loss": 0.6167, "step": 24729 }, { "epoch": 0.7579379673899718, "grad_norm": 1.3494585203028, "learning_rate": 1.4594685993741987e-06, "loss": 0.6824, "step": 24730 }, { "epoch": 0.757968615912713, "grad_norm": 1.3749302561738304, "learning_rate": 1.4591181638798318e-06, "loss": 0.6344, "step": 24731 }, { "epoch": 0.7579992644354542, "grad_norm": 1.3588495856827636, "learning_rate": 1.4587677632746805e-06, "loss": 0.5672, "step": 24732 }, { "epoch": 0.7580299129581954, "grad_norm": 1.282781370068477, "learning_rate": 1.458417397562193e-06, "loss": 0.7485, "step": 24733 }, { "epoch": 0.7580605614809366, "grad_norm": 1.38929518240609, "learning_rate": 1.458067066745828e-06, "loss": 0.6143, "step": 24734 }, { "epoch": 0.7580912100036779, "grad_norm": 1.4075217153787498, "learning_rate": 1.457716770829034e-06, "loss": 0.6293, "step": 24735 }, { "epoch": 0.758121858526419, "grad_norm": 1.6505664593167022, "learning_rate": 1.4573665098152622e-06, "loss": 0.6386, "step": 24736 }, { "epoch": 0.7581525070491603, "grad_norm": 1.2778378490840843, "learning_rate": 1.4570162837079644e-06, "loss": 0.6403, "step": 24737 }, { "epoch": 0.7581831555719014, "grad_norm": 0.4561715054982324, "learning_rate": 1.456666092510593e-06, "loss": 0.4009, "step": 24738 }, { "epoch": 0.7582138040946427, "grad_norm": 0.4445329311483142, "learning_rate": 1.456315936226596e-06, "loss": 0.385, "step": 24739 }, { "epoch": 0.7582444526173838, "grad_norm": 0.4589607492139304, "learning_rate": 1.4559658148594246e-06, "loss": 0.3885, "step": 24740 }, { "epoch": 0.7582751011401251, "grad_norm": 1.3579413519687866, "learning_rate": 1.4556157284125283e-06, "loss": 0.5555, "step": 24741 }, { "epoch": 0.7583057496628662, "grad_norm": 1.3064263396776377, "learning_rate": 1.4552656768893593e-06, "loss": 0.5476, "step": 24742 }, { "epoch": 0.7583363981856075, "grad_norm": 0.46432621758488735, "learning_rate": 1.4549156602933644e-06, "loss": 0.3897, "step": 24743 }, { "epoch": 0.7583670467083486, "grad_norm": 1.3254825951854687, "learning_rate": 1.4545656786279894e-06, "loss": 0.6495, "step": 24744 }, { "epoch": 0.7583976952310899, "grad_norm": 1.2847062821594561, "learning_rate": 1.4542157318966894e-06, "loss": 0.5804, "step": 24745 }, { "epoch": 0.7584283437538311, "grad_norm": 1.4376304103843491, "learning_rate": 1.4538658201029082e-06, "loss": 0.6684, "step": 24746 }, { "epoch": 0.7584589922765723, "grad_norm": 0.44123956860121777, "learning_rate": 1.4535159432500933e-06, "loss": 0.3815, "step": 24747 }, { "epoch": 0.7584896407993135, "grad_norm": 1.2783405104932535, "learning_rate": 1.4531661013416932e-06, "loss": 0.5508, "step": 24748 }, { "epoch": 0.7585202893220547, "grad_norm": 1.2865257876975678, "learning_rate": 1.4528162943811552e-06, "loss": 0.5639, "step": 24749 }, { "epoch": 0.7585509378447959, "grad_norm": 1.2431670504024133, "learning_rate": 1.452466522371927e-06, "loss": 0.6291, "step": 24750 }, { "epoch": 0.758581586367537, "grad_norm": 1.4141586751169903, "learning_rate": 1.4521167853174523e-06, "loss": 0.6058, "step": 24751 }, { "epoch": 0.7586122348902783, "grad_norm": 1.4462040330998476, "learning_rate": 1.4517670832211789e-06, "loss": 0.563, "step": 24752 }, { "epoch": 0.7586428834130194, "grad_norm": 1.2095387150741035, "learning_rate": 1.4514174160865541e-06, "loss": 0.5772, "step": 24753 }, { "epoch": 0.7586735319357607, "grad_norm": 1.3848867527251492, "learning_rate": 1.4510677839170213e-06, "loss": 0.5736, "step": 24754 }, { "epoch": 0.7587041804585019, "grad_norm": 1.299877431852836, "learning_rate": 1.4507181867160237e-06, "loss": 0.6263, "step": 24755 }, { "epoch": 0.7587348289812431, "grad_norm": 1.4603412359944878, "learning_rate": 1.450368624487008e-06, "loss": 0.6184, "step": 24756 }, { "epoch": 0.7587654775039843, "grad_norm": 0.4509027623151251, "learning_rate": 1.4500190972334188e-06, "loss": 0.4064, "step": 24757 }, { "epoch": 0.7587961260267255, "grad_norm": 1.3832402892199143, "learning_rate": 1.449669604958701e-06, "loss": 0.555, "step": 24758 }, { "epoch": 0.7588267745494667, "grad_norm": 1.2766384595975973, "learning_rate": 1.4493201476662956e-06, "loss": 0.5199, "step": 24759 }, { "epoch": 0.7588574230722079, "grad_norm": 1.3636859622886432, "learning_rate": 1.4489707253596468e-06, "loss": 0.6495, "step": 24760 }, { "epoch": 0.7588880715949491, "grad_norm": 1.4280638161216308, "learning_rate": 1.4486213380422003e-06, "loss": 0.6625, "step": 24761 }, { "epoch": 0.7589187201176903, "grad_norm": 1.398026886209658, "learning_rate": 1.4482719857173961e-06, "loss": 0.6608, "step": 24762 }, { "epoch": 0.7589493686404315, "grad_norm": 0.46760577406819787, "learning_rate": 1.4479226683886731e-06, "loss": 0.3707, "step": 24763 }, { "epoch": 0.7589800171631728, "grad_norm": 1.57944099451035, "learning_rate": 1.447573386059481e-06, "loss": 0.6701, "step": 24764 }, { "epoch": 0.7590106656859139, "grad_norm": 1.2248022053037357, "learning_rate": 1.4472241387332565e-06, "loss": 0.6015, "step": 24765 }, { "epoch": 0.7590413142086552, "grad_norm": 1.4340446673426004, "learning_rate": 1.4468749264134401e-06, "loss": 0.587, "step": 24766 }, { "epoch": 0.7590719627313963, "grad_norm": 1.1935143010081097, "learning_rate": 1.4465257491034746e-06, "loss": 0.4649, "step": 24767 }, { "epoch": 0.7591026112541376, "grad_norm": 1.3230887797152455, "learning_rate": 1.4461766068068e-06, "loss": 0.6051, "step": 24768 }, { "epoch": 0.7591332597768787, "grad_norm": 1.2911486053727101, "learning_rate": 1.4458274995268578e-06, "loss": 0.6464, "step": 24769 }, { "epoch": 0.75916390829962, "grad_norm": 1.2896714515176324, "learning_rate": 1.4454784272670851e-06, "loss": 0.6411, "step": 24770 }, { "epoch": 0.7591945568223611, "grad_norm": 1.3394357151310434, "learning_rate": 1.4451293900309233e-06, "loss": 0.5707, "step": 24771 }, { "epoch": 0.7592252053451024, "grad_norm": 1.2677970862610555, "learning_rate": 1.4447803878218126e-06, "loss": 0.6013, "step": 24772 }, { "epoch": 0.7592558538678436, "grad_norm": 1.4340609469572443, "learning_rate": 1.4444314206431904e-06, "loss": 0.6204, "step": 24773 }, { "epoch": 0.7592865023905848, "grad_norm": 0.4389354078690737, "learning_rate": 1.4440824884984917e-06, "loss": 0.3919, "step": 24774 }, { "epoch": 0.759317150913326, "grad_norm": 1.3679343768309566, "learning_rate": 1.4437335913911615e-06, "loss": 0.6402, "step": 24775 }, { "epoch": 0.7593477994360672, "grad_norm": 1.407898143786826, "learning_rate": 1.4433847293246322e-06, "loss": 0.5956, "step": 24776 }, { "epoch": 0.7593784479588084, "grad_norm": 1.4526797519764592, "learning_rate": 1.443035902302345e-06, "loss": 0.6758, "step": 24777 }, { "epoch": 0.7594090964815496, "grad_norm": 1.3167905725497684, "learning_rate": 1.4426871103277334e-06, "loss": 0.6421, "step": 24778 }, { "epoch": 0.7594397450042908, "grad_norm": 1.3276022586889578, "learning_rate": 1.4423383534042361e-06, "loss": 0.5789, "step": 24779 }, { "epoch": 0.759470393527032, "grad_norm": 1.353319721125845, "learning_rate": 1.441989631535291e-06, "loss": 0.604, "step": 24780 }, { "epoch": 0.7595010420497732, "grad_norm": 0.4277398020689207, "learning_rate": 1.4416409447243307e-06, "loss": 0.3849, "step": 24781 }, { "epoch": 0.7595316905725144, "grad_norm": 1.341589311924744, "learning_rate": 1.4412922929747925e-06, "loss": 0.5697, "step": 24782 }, { "epoch": 0.7595623390952556, "grad_norm": 1.1262791405839905, "learning_rate": 1.4409436762901135e-06, "loss": 0.5129, "step": 24783 }, { "epoch": 0.7595929876179968, "grad_norm": 1.1412688895355363, "learning_rate": 1.4405950946737258e-06, "loss": 0.5396, "step": 24784 }, { "epoch": 0.759623636140738, "grad_norm": 1.3846303644096603, "learning_rate": 1.4402465481290646e-06, "loss": 0.6156, "step": 24785 }, { "epoch": 0.7596542846634792, "grad_norm": 1.4496485925115015, "learning_rate": 1.4398980366595672e-06, "loss": 0.6883, "step": 24786 }, { "epoch": 0.7596849331862204, "grad_norm": 1.466676223611541, "learning_rate": 1.4395495602686631e-06, "loss": 0.6177, "step": 24787 }, { "epoch": 0.7597155817089616, "grad_norm": 1.3760691416125377, "learning_rate": 1.4392011189597903e-06, "loss": 0.6333, "step": 24788 }, { "epoch": 0.7597462302317028, "grad_norm": 0.43392500783048793, "learning_rate": 1.4388527127363784e-06, "loss": 0.3724, "step": 24789 }, { "epoch": 0.759776878754444, "grad_norm": 1.290482792597084, "learning_rate": 1.4385043416018618e-06, "loss": 0.6795, "step": 24790 }, { "epoch": 0.7598075272771853, "grad_norm": 1.2302613529870197, "learning_rate": 1.438156005559675e-06, "loss": 0.595, "step": 24791 }, { "epoch": 0.7598381757999264, "grad_norm": 0.4279297645255291, "learning_rate": 1.437807704613246e-06, "loss": 0.3979, "step": 24792 }, { "epoch": 0.7598688243226677, "grad_norm": 0.4684799636491209, "learning_rate": 1.4374594387660096e-06, "loss": 0.3847, "step": 24793 }, { "epoch": 0.7598994728454088, "grad_norm": 1.2832394509107212, "learning_rate": 1.437111208021399e-06, "loss": 0.6348, "step": 24794 }, { "epoch": 0.7599301213681501, "grad_norm": 1.1679604366826575, "learning_rate": 1.4367630123828407e-06, "loss": 0.5913, "step": 24795 }, { "epoch": 0.7599607698908912, "grad_norm": 0.4737241225960355, "learning_rate": 1.4364148518537685e-06, "loss": 0.4061, "step": 24796 }, { "epoch": 0.7599914184136325, "grad_norm": 0.4652148639626316, "learning_rate": 1.4360667264376144e-06, "loss": 0.3954, "step": 24797 }, { "epoch": 0.7600220669363736, "grad_norm": 1.3466282937722684, "learning_rate": 1.435718636137805e-06, "loss": 0.6177, "step": 24798 }, { "epoch": 0.7600527154591149, "grad_norm": 0.4718288256286233, "learning_rate": 1.4353705809577735e-06, "loss": 0.3894, "step": 24799 }, { "epoch": 0.760083363981856, "grad_norm": 1.2000162900960318, "learning_rate": 1.435022560900946e-06, "loss": 0.5102, "step": 24800 }, { "epoch": 0.7601140125045973, "grad_norm": 0.4299298794114601, "learning_rate": 1.4346745759707531e-06, "loss": 0.3879, "step": 24801 }, { "epoch": 0.7601446610273385, "grad_norm": 1.273194239164218, "learning_rate": 1.4343266261706257e-06, "loss": 0.5684, "step": 24802 }, { "epoch": 0.7601753095500797, "grad_norm": 1.3645758635308856, "learning_rate": 1.4339787115039887e-06, "loss": 0.5937, "step": 24803 }, { "epoch": 0.7602059580728209, "grad_norm": 0.4303988080315333, "learning_rate": 1.433630831974272e-06, "loss": 0.3987, "step": 24804 }, { "epoch": 0.7602366065955621, "grad_norm": 1.2566544297947275, "learning_rate": 1.4332829875849047e-06, "loss": 0.5293, "step": 24805 }, { "epoch": 0.7602672551183033, "grad_norm": 1.3007527348916523, "learning_rate": 1.4329351783393114e-06, "loss": 0.6011, "step": 24806 }, { "epoch": 0.7602979036410445, "grad_norm": 1.1749002116096225, "learning_rate": 1.4325874042409215e-06, "loss": 0.5433, "step": 24807 }, { "epoch": 0.7603285521637857, "grad_norm": 1.4316158814706643, "learning_rate": 1.4322396652931592e-06, "loss": 0.6622, "step": 24808 }, { "epoch": 0.760359200686527, "grad_norm": 0.4560914429003443, "learning_rate": 1.4318919614994525e-06, "loss": 0.3851, "step": 24809 }, { "epoch": 0.7603898492092681, "grad_norm": 0.4406623349748698, "learning_rate": 1.431544292863229e-06, "loss": 0.3761, "step": 24810 }, { "epoch": 0.7604204977320094, "grad_norm": 1.235782166433903, "learning_rate": 1.4311966593879106e-06, "loss": 0.5545, "step": 24811 }, { "epoch": 0.7604511462547505, "grad_norm": 0.4562473478083081, "learning_rate": 1.430849061076925e-06, "loss": 0.3892, "step": 24812 }, { "epoch": 0.7604817947774917, "grad_norm": 1.2751760177804392, "learning_rate": 1.4305014979336983e-06, "loss": 0.6157, "step": 24813 }, { "epoch": 0.7605124433002329, "grad_norm": 0.4477237150564611, "learning_rate": 1.4301539699616523e-06, "loss": 0.3984, "step": 24814 }, { "epoch": 0.7605430918229741, "grad_norm": 1.245363873615608, "learning_rate": 1.4298064771642123e-06, "loss": 0.6019, "step": 24815 }, { "epoch": 0.7605737403457153, "grad_norm": 1.5120849451442577, "learning_rate": 1.4294590195448039e-06, "loss": 0.6263, "step": 24816 }, { "epoch": 0.7606043888684565, "grad_norm": 0.46196835134840664, "learning_rate": 1.4291115971068482e-06, "loss": 0.4172, "step": 24817 }, { "epoch": 0.7606350373911978, "grad_norm": 1.2859666286502454, "learning_rate": 1.4287642098537707e-06, "loss": 0.6402, "step": 24818 }, { "epoch": 0.7606656859139389, "grad_norm": 1.2482189094494127, "learning_rate": 1.4284168577889924e-06, "loss": 0.6106, "step": 24819 }, { "epoch": 0.7606963344366802, "grad_norm": 1.593311791031888, "learning_rate": 1.4280695409159357e-06, "loss": 0.6373, "step": 24820 }, { "epoch": 0.7607269829594213, "grad_norm": 1.5824365595296848, "learning_rate": 1.4277222592380259e-06, "loss": 0.7042, "step": 24821 }, { "epoch": 0.7607576314821626, "grad_norm": 0.4338612566791379, "learning_rate": 1.4273750127586811e-06, "loss": 0.375, "step": 24822 }, { "epoch": 0.7607882800049037, "grad_norm": 1.4258918878183897, "learning_rate": 1.4270278014813244e-06, "loss": 0.5618, "step": 24823 }, { "epoch": 0.760818928527645, "grad_norm": 1.4357914215452459, "learning_rate": 1.426680625409378e-06, "loss": 0.6371, "step": 24824 }, { "epoch": 0.7608495770503861, "grad_norm": 1.448038759351698, "learning_rate": 1.4263334845462606e-06, "loss": 0.6194, "step": 24825 }, { "epoch": 0.7608802255731274, "grad_norm": 1.4887993650315565, "learning_rate": 1.4259863788953938e-06, "loss": 0.6103, "step": 24826 }, { "epoch": 0.7609108740958686, "grad_norm": 1.5418242084014968, "learning_rate": 1.4256393084601987e-06, "loss": 0.5607, "step": 24827 }, { "epoch": 0.7609415226186098, "grad_norm": 1.2919620297304044, "learning_rate": 1.4252922732440926e-06, "loss": 0.6295, "step": 24828 }, { "epoch": 0.760972171141351, "grad_norm": 0.4501322972040814, "learning_rate": 1.4249452732504987e-06, "loss": 0.3889, "step": 24829 }, { "epoch": 0.7610028196640922, "grad_norm": 1.3487878035000487, "learning_rate": 1.4245983084828296e-06, "loss": 0.606, "step": 24830 }, { "epoch": 0.7610334681868334, "grad_norm": 0.4360206789416697, "learning_rate": 1.4242513789445117e-06, "loss": 0.3996, "step": 24831 }, { "epoch": 0.7610641167095746, "grad_norm": 0.43961427955672694, "learning_rate": 1.42390448463896e-06, "loss": 0.3737, "step": 24832 }, { "epoch": 0.7610947652323158, "grad_norm": 1.2872550472302537, "learning_rate": 1.423557625569591e-06, "loss": 0.5809, "step": 24833 }, { "epoch": 0.761125413755057, "grad_norm": 0.4505391046276534, "learning_rate": 1.4232108017398232e-06, "loss": 0.3684, "step": 24834 }, { "epoch": 0.7611560622777982, "grad_norm": 1.3896392801390538, "learning_rate": 1.4228640131530764e-06, "loss": 0.5421, "step": 24835 }, { "epoch": 0.7611867108005395, "grad_norm": 1.2854682700745919, "learning_rate": 1.4225172598127645e-06, "loss": 0.6639, "step": 24836 }, { "epoch": 0.7612173593232806, "grad_norm": 1.3479771934856888, "learning_rate": 1.4221705417223047e-06, "loss": 0.678, "step": 24837 }, { "epoch": 0.7612480078460219, "grad_norm": 1.1663852276432778, "learning_rate": 1.4218238588851147e-06, "loss": 0.5638, "step": 24838 }, { "epoch": 0.761278656368763, "grad_norm": 1.4569796466950238, "learning_rate": 1.421477211304611e-06, "loss": 0.5839, "step": 24839 }, { "epoch": 0.7613093048915043, "grad_norm": 1.4174406442353378, "learning_rate": 1.4211305989842079e-06, "loss": 0.635, "step": 24840 }, { "epoch": 0.7613399534142454, "grad_norm": 1.4423000524825427, "learning_rate": 1.4207840219273172e-06, "loss": 0.6158, "step": 24841 }, { "epoch": 0.7613706019369867, "grad_norm": 1.3374432510008376, "learning_rate": 1.420437480137361e-06, "loss": 0.4019, "step": 24842 }, { "epoch": 0.7614012504597278, "grad_norm": 1.179687915498994, "learning_rate": 1.4200909736177497e-06, "loss": 0.5745, "step": 24843 }, { "epoch": 0.761431898982469, "grad_norm": 1.203916364418372, "learning_rate": 1.419744502371897e-06, "loss": 0.6393, "step": 24844 }, { "epoch": 0.7614625475052103, "grad_norm": 1.4819483103467446, "learning_rate": 1.4193980664032176e-06, "loss": 0.5844, "step": 24845 }, { "epoch": 0.7614931960279514, "grad_norm": 1.183623128081032, "learning_rate": 1.4190516657151266e-06, "loss": 0.5806, "step": 24846 }, { "epoch": 0.7615238445506927, "grad_norm": 1.350903841249583, "learning_rate": 1.4187053003110341e-06, "loss": 0.6002, "step": 24847 }, { "epoch": 0.7615544930734338, "grad_norm": 1.3826540970934253, "learning_rate": 1.4183589701943545e-06, "loss": 0.6252, "step": 24848 }, { "epoch": 0.7615851415961751, "grad_norm": 1.229446546304377, "learning_rate": 1.4180126753685008e-06, "loss": 0.5802, "step": 24849 }, { "epoch": 0.7616157901189162, "grad_norm": 1.323311794695586, "learning_rate": 1.4176664158368857e-06, "loss": 0.6218, "step": 24850 }, { "epoch": 0.7616464386416575, "grad_norm": 1.3451960039473352, "learning_rate": 1.4173201916029206e-06, "loss": 0.5901, "step": 24851 }, { "epoch": 0.7616770871643986, "grad_norm": 1.317276161717139, "learning_rate": 1.4169740026700146e-06, "loss": 0.5968, "step": 24852 }, { "epoch": 0.7617077356871399, "grad_norm": 1.2658156970115142, "learning_rate": 1.4166278490415802e-06, "loss": 0.6126, "step": 24853 }, { "epoch": 0.761738384209881, "grad_norm": 1.3025790730593094, "learning_rate": 1.4162817307210303e-06, "loss": 0.5498, "step": 24854 }, { "epoch": 0.7617690327326223, "grad_norm": 1.4358924707246303, "learning_rate": 1.4159356477117714e-06, "loss": 0.6474, "step": 24855 }, { "epoch": 0.7617996812553635, "grad_norm": 1.4284277598374269, "learning_rate": 1.4155896000172164e-06, "loss": 0.6963, "step": 24856 }, { "epoch": 0.7618303297781047, "grad_norm": 1.2420517571351148, "learning_rate": 1.4152435876407733e-06, "loss": 0.5457, "step": 24857 }, { "epoch": 0.7618609783008459, "grad_norm": 1.3568824483282376, "learning_rate": 1.4148976105858548e-06, "loss": 0.5839, "step": 24858 }, { "epoch": 0.7618916268235871, "grad_norm": 1.4194734114100211, "learning_rate": 1.4145516688558669e-06, "loss": 0.5364, "step": 24859 }, { "epoch": 0.7619222753463283, "grad_norm": 1.2776409620720288, "learning_rate": 1.4142057624542156e-06, "loss": 0.6745, "step": 24860 }, { "epoch": 0.7619529238690695, "grad_norm": 1.28602620451456, "learning_rate": 1.4138598913843154e-06, "loss": 0.5619, "step": 24861 }, { "epoch": 0.7619835723918107, "grad_norm": 2.0035201456928498, "learning_rate": 1.413514055649572e-06, "loss": 0.6387, "step": 24862 }, { "epoch": 0.762014220914552, "grad_norm": 1.4335967142787203, "learning_rate": 1.41316825525339e-06, "loss": 0.6301, "step": 24863 }, { "epoch": 0.7620448694372931, "grad_norm": 1.3793699483414101, "learning_rate": 1.4128224901991788e-06, "loss": 0.7068, "step": 24864 }, { "epoch": 0.7620755179600344, "grad_norm": 1.2432715640285346, "learning_rate": 1.4124767604903472e-06, "loss": 0.6195, "step": 24865 }, { "epoch": 0.7621061664827755, "grad_norm": 1.3170039234579998, "learning_rate": 1.412131066130299e-06, "loss": 0.6144, "step": 24866 }, { "epoch": 0.7621368150055168, "grad_norm": 1.3456662974098768, "learning_rate": 1.4117854071224408e-06, "loss": 0.6147, "step": 24867 }, { "epoch": 0.7621674635282579, "grad_norm": 1.2646229055321627, "learning_rate": 1.4114397834701788e-06, "loss": 0.6105, "step": 24868 }, { "epoch": 0.7621981120509992, "grad_norm": 9.670556955618247, "learning_rate": 1.4110941951769208e-06, "loss": 0.5906, "step": 24869 }, { "epoch": 0.7622287605737403, "grad_norm": 1.3588061058700711, "learning_rate": 1.4107486422460698e-06, "loss": 0.6831, "step": 24870 }, { "epoch": 0.7622594090964816, "grad_norm": 1.3944080480434085, "learning_rate": 1.4104031246810278e-06, "loss": 0.6363, "step": 24871 }, { "epoch": 0.7622900576192227, "grad_norm": 1.3481159224488064, "learning_rate": 1.4100576424852052e-06, "loss": 0.6073, "step": 24872 }, { "epoch": 0.762320706141964, "grad_norm": 1.4641919909913972, "learning_rate": 1.4097121956620036e-06, "loss": 0.6513, "step": 24873 }, { "epoch": 0.7623513546647052, "grad_norm": 1.1935200444922833, "learning_rate": 1.4093667842148245e-06, "loss": 0.6069, "step": 24874 }, { "epoch": 0.7623820031874463, "grad_norm": 0.46267715055961967, "learning_rate": 1.4090214081470726e-06, "loss": 0.3911, "step": 24875 }, { "epoch": 0.7624126517101876, "grad_norm": 1.212464520482611, "learning_rate": 1.4086760674621525e-06, "loss": 0.6252, "step": 24876 }, { "epoch": 0.7624433002329287, "grad_norm": 1.245737465539616, "learning_rate": 1.4083307621634673e-06, "loss": 0.565, "step": 24877 }, { "epoch": 0.76247394875567, "grad_norm": 1.4618576688535532, "learning_rate": 1.407985492254416e-06, "loss": 0.587, "step": 24878 }, { "epoch": 0.7625045972784111, "grad_norm": 1.4450379943433016, "learning_rate": 1.407640257738403e-06, "loss": 0.7335, "step": 24879 }, { "epoch": 0.7625352458011524, "grad_norm": 1.5079410112171652, "learning_rate": 1.4072950586188316e-06, "loss": 0.5758, "step": 24880 }, { "epoch": 0.7625658943238935, "grad_norm": 1.436520211170188, "learning_rate": 1.4069498948991007e-06, "loss": 0.5792, "step": 24881 }, { "epoch": 0.7625965428466348, "grad_norm": 1.113921474871897, "learning_rate": 1.4066047665826089e-06, "loss": 0.5403, "step": 24882 }, { "epoch": 0.762627191369376, "grad_norm": 1.3227617431382575, "learning_rate": 1.406259673672763e-06, "loss": 0.6357, "step": 24883 }, { "epoch": 0.7626578398921172, "grad_norm": 1.3843622416009964, "learning_rate": 1.405914616172958e-06, "loss": 0.5619, "step": 24884 }, { "epoch": 0.7626884884148584, "grad_norm": 1.4012001733327852, "learning_rate": 1.4055695940865983e-06, "loss": 0.6678, "step": 24885 }, { "epoch": 0.7627191369375996, "grad_norm": 1.3856395069624285, "learning_rate": 1.405224607417079e-06, "loss": 0.603, "step": 24886 }, { "epoch": 0.7627497854603408, "grad_norm": 1.3314741415171767, "learning_rate": 1.4048796561678012e-06, "loss": 0.6446, "step": 24887 }, { "epoch": 0.762780433983082, "grad_norm": 1.4894868420436524, "learning_rate": 1.4045347403421656e-06, "loss": 0.6263, "step": 24888 }, { "epoch": 0.7628110825058232, "grad_norm": 1.3685389210194552, "learning_rate": 1.4041898599435678e-06, "loss": 0.642, "step": 24889 }, { "epoch": 0.7628417310285645, "grad_norm": 1.3570002611555405, "learning_rate": 1.4038450149754074e-06, "loss": 0.6676, "step": 24890 }, { "epoch": 0.7628723795513056, "grad_norm": 0.4432115663457321, "learning_rate": 1.4035002054410835e-06, "loss": 0.365, "step": 24891 }, { "epoch": 0.7629030280740469, "grad_norm": 1.4179195380263185, "learning_rate": 1.4031554313439921e-06, "loss": 0.6836, "step": 24892 }, { "epoch": 0.762933676596788, "grad_norm": 1.2022344271989571, "learning_rate": 1.402810692687528e-06, "loss": 0.5714, "step": 24893 }, { "epoch": 0.7629643251195293, "grad_norm": 1.3026696853030517, "learning_rate": 1.402465989475093e-06, "loss": 0.5519, "step": 24894 }, { "epoch": 0.7629949736422704, "grad_norm": 1.3169142630338522, "learning_rate": 1.4021213217100805e-06, "loss": 0.5566, "step": 24895 }, { "epoch": 0.7630256221650117, "grad_norm": 1.2819970723045098, "learning_rate": 1.4017766893958878e-06, "loss": 0.6399, "step": 24896 }, { "epoch": 0.7630562706877528, "grad_norm": 1.4636355762792166, "learning_rate": 1.4014320925359086e-06, "loss": 0.6786, "step": 24897 }, { "epoch": 0.7630869192104941, "grad_norm": 1.424743552189608, "learning_rate": 1.4010875311335398e-06, "loss": 0.5879, "step": 24898 }, { "epoch": 0.7631175677332352, "grad_norm": 0.44361454342725115, "learning_rate": 1.4007430051921783e-06, "loss": 0.392, "step": 24899 }, { "epoch": 0.7631482162559765, "grad_norm": 0.4477065027557198, "learning_rate": 1.400398514715215e-06, "loss": 0.4011, "step": 24900 }, { "epoch": 0.7631788647787177, "grad_norm": 0.46122047103922265, "learning_rate": 1.4000540597060463e-06, "loss": 0.4183, "step": 24901 }, { "epoch": 0.7632095133014589, "grad_norm": 1.3412597720028032, "learning_rate": 1.3997096401680672e-06, "loss": 0.6587, "step": 24902 }, { "epoch": 0.7632401618242001, "grad_norm": 1.3319130100541843, "learning_rate": 1.399365256104669e-06, "loss": 0.5502, "step": 24903 }, { "epoch": 0.7632708103469413, "grad_norm": 1.1771593061842256, "learning_rate": 1.3990209075192473e-06, "loss": 0.5101, "step": 24904 }, { "epoch": 0.7633014588696825, "grad_norm": 1.4095941355176025, "learning_rate": 1.3986765944151932e-06, "loss": 0.6866, "step": 24905 }, { "epoch": 0.7633321073924236, "grad_norm": 1.2868929341718995, "learning_rate": 1.3983323167958996e-06, "loss": 0.6531, "step": 24906 }, { "epoch": 0.7633627559151649, "grad_norm": 1.221858484621197, "learning_rate": 1.3979880746647607e-06, "loss": 0.6359, "step": 24907 }, { "epoch": 0.763393404437906, "grad_norm": 0.45599673291179904, "learning_rate": 1.3976438680251652e-06, "loss": 0.4085, "step": 24908 }, { "epoch": 0.7634240529606473, "grad_norm": 1.3576360011011717, "learning_rate": 1.397299696880507e-06, "loss": 0.5111, "step": 24909 }, { "epoch": 0.7634547014833885, "grad_norm": 0.44975857355954, "learning_rate": 1.3969555612341773e-06, "loss": 0.3839, "step": 24910 }, { "epoch": 0.7634853500061297, "grad_norm": 0.43848549642941403, "learning_rate": 1.3966114610895653e-06, "loss": 0.4039, "step": 24911 }, { "epoch": 0.7635159985288709, "grad_norm": 1.2309612548641184, "learning_rate": 1.3962673964500629e-06, "loss": 0.604, "step": 24912 }, { "epoch": 0.7635466470516121, "grad_norm": 1.650333012850069, "learning_rate": 1.3959233673190608e-06, "loss": 0.7026, "step": 24913 }, { "epoch": 0.7635772955743533, "grad_norm": 1.2888489264739724, "learning_rate": 1.3955793736999468e-06, "loss": 0.5586, "step": 24914 }, { "epoch": 0.7636079440970945, "grad_norm": 1.3360970485803125, "learning_rate": 1.3952354155961128e-06, "loss": 0.6113, "step": 24915 }, { "epoch": 0.7636385926198357, "grad_norm": 1.3118702967817792, "learning_rate": 1.3948914930109453e-06, "loss": 0.564, "step": 24916 }, { "epoch": 0.763669241142577, "grad_norm": 1.3727926021312837, "learning_rate": 1.3945476059478336e-06, "loss": 0.7156, "step": 24917 }, { "epoch": 0.7636998896653181, "grad_norm": 1.2204338574609543, "learning_rate": 1.394203754410169e-06, "loss": 0.5191, "step": 24918 }, { "epoch": 0.7637305381880594, "grad_norm": 1.2338021068240321, "learning_rate": 1.3938599384013357e-06, "loss": 0.5385, "step": 24919 }, { "epoch": 0.7637611867108005, "grad_norm": 1.2458039233512102, "learning_rate": 1.393516157924723e-06, "loss": 0.6291, "step": 24920 }, { "epoch": 0.7637918352335418, "grad_norm": 1.3711256919945563, "learning_rate": 1.3931724129837205e-06, "loss": 0.6382, "step": 24921 }, { "epoch": 0.7638224837562829, "grad_norm": 1.3095891114666314, "learning_rate": 1.3928287035817105e-06, "loss": 0.5846, "step": 24922 }, { "epoch": 0.7638531322790242, "grad_norm": 1.2650348536056346, "learning_rate": 1.3924850297220831e-06, "loss": 0.5565, "step": 24923 }, { "epoch": 0.7638837808017653, "grad_norm": 0.44221143063138707, "learning_rate": 1.3921413914082249e-06, "loss": 0.3771, "step": 24924 }, { "epoch": 0.7639144293245066, "grad_norm": 1.316315201898059, "learning_rate": 1.391797788643519e-06, "loss": 0.5777, "step": 24925 }, { "epoch": 0.7639450778472477, "grad_norm": 1.5377044294448698, "learning_rate": 1.3914542214313547e-06, "loss": 0.7609, "step": 24926 }, { "epoch": 0.763975726369989, "grad_norm": 1.2804533836202696, "learning_rate": 1.391110689775113e-06, "loss": 0.4822, "step": 24927 }, { "epoch": 0.7640063748927302, "grad_norm": 1.234013638748844, "learning_rate": 1.3907671936781814e-06, "loss": 0.5004, "step": 24928 }, { "epoch": 0.7640370234154714, "grad_norm": 0.4225906585648548, "learning_rate": 1.3904237331439457e-06, "loss": 0.3737, "step": 24929 }, { "epoch": 0.7640676719382126, "grad_norm": 1.2258097397555725, "learning_rate": 1.3900803081757875e-06, "loss": 0.5914, "step": 24930 }, { "epoch": 0.7640983204609538, "grad_norm": 1.3212206905925514, "learning_rate": 1.389736918777091e-06, "loss": 0.6618, "step": 24931 }, { "epoch": 0.764128968983695, "grad_norm": 0.4451830631564208, "learning_rate": 1.3893935649512419e-06, "loss": 0.3771, "step": 24932 }, { "epoch": 0.7641596175064362, "grad_norm": 1.2436019661434143, "learning_rate": 1.3890502467016204e-06, "loss": 0.5335, "step": 24933 }, { "epoch": 0.7641902660291774, "grad_norm": 1.3287354991818396, "learning_rate": 1.3887069640316104e-06, "loss": 0.6089, "step": 24934 }, { "epoch": 0.7642209145519187, "grad_norm": 1.3808675369030443, "learning_rate": 1.3883637169445967e-06, "loss": 0.5743, "step": 24935 }, { "epoch": 0.7642515630746598, "grad_norm": 1.3008807128714674, "learning_rate": 1.3880205054439578e-06, "loss": 0.5937, "step": 24936 }, { "epoch": 0.764282211597401, "grad_norm": 1.1927818921676403, "learning_rate": 1.3876773295330782e-06, "loss": 0.5766, "step": 24937 }, { "epoch": 0.7643128601201422, "grad_norm": 1.55422038406651, "learning_rate": 1.3873341892153353e-06, "loss": 0.6129, "step": 24938 }, { "epoch": 0.7643435086428834, "grad_norm": 1.4736597636569977, "learning_rate": 1.386991084494116e-06, "loss": 0.6533, "step": 24939 }, { "epoch": 0.7643741571656246, "grad_norm": 1.4017322289500203, "learning_rate": 1.3866480153727978e-06, "loss": 0.5973, "step": 24940 }, { "epoch": 0.7644048056883658, "grad_norm": 1.4930338579472813, "learning_rate": 1.3863049818547604e-06, "loss": 0.7042, "step": 24941 }, { "epoch": 0.764435454211107, "grad_norm": 1.2883445976217216, "learning_rate": 1.3859619839433836e-06, "loss": 0.5632, "step": 24942 }, { "epoch": 0.7644661027338482, "grad_norm": 1.1810886590586156, "learning_rate": 1.3856190216420501e-06, "loss": 0.5703, "step": 24943 }, { "epoch": 0.7644967512565894, "grad_norm": 1.5013935922868233, "learning_rate": 1.385276094954136e-06, "loss": 0.614, "step": 24944 }, { "epoch": 0.7645273997793306, "grad_norm": 1.3634739625264025, "learning_rate": 1.384933203883021e-06, "loss": 0.6185, "step": 24945 }, { "epoch": 0.7645580483020719, "grad_norm": 1.217001464414872, "learning_rate": 1.3845903484320855e-06, "loss": 0.6275, "step": 24946 }, { "epoch": 0.764588696824813, "grad_norm": 1.33524726468825, "learning_rate": 1.3842475286047047e-06, "loss": 0.5657, "step": 24947 }, { "epoch": 0.7646193453475543, "grad_norm": 1.3929368869488827, "learning_rate": 1.3839047444042603e-06, "loss": 0.5561, "step": 24948 }, { "epoch": 0.7646499938702954, "grad_norm": 1.3025957400837913, "learning_rate": 1.3835619958341257e-06, "loss": 0.547, "step": 24949 }, { "epoch": 0.7646806423930367, "grad_norm": 1.276665192424639, "learning_rate": 1.3832192828976798e-06, "loss": 0.6715, "step": 24950 }, { "epoch": 0.7647112909157778, "grad_norm": 1.35499195523827, "learning_rate": 1.3828766055983018e-06, "loss": 0.6214, "step": 24951 }, { "epoch": 0.7647419394385191, "grad_norm": 1.5219821145517662, "learning_rate": 1.3825339639393642e-06, "loss": 0.6543, "step": 24952 }, { "epoch": 0.7647725879612602, "grad_norm": 1.3845361371187557, "learning_rate": 1.3821913579242453e-06, "loss": 0.6389, "step": 24953 }, { "epoch": 0.7648032364840015, "grad_norm": 1.3622244585015042, "learning_rate": 1.3818487875563218e-06, "loss": 0.6063, "step": 24954 }, { "epoch": 0.7648338850067427, "grad_norm": 1.3463280053421665, "learning_rate": 1.3815062528389667e-06, "loss": 0.6645, "step": 24955 }, { "epoch": 0.7648645335294839, "grad_norm": 1.237255514928612, "learning_rate": 1.381163753775558e-06, "loss": 0.5296, "step": 24956 }, { "epoch": 0.7648951820522251, "grad_norm": 1.4373716673878185, "learning_rate": 1.380821290369465e-06, "loss": 0.6403, "step": 24957 }, { "epoch": 0.7649258305749663, "grad_norm": 1.3333625640614568, "learning_rate": 1.3804788626240695e-06, "loss": 0.6766, "step": 24958 }, { "epoch": 0.7649564790977075, "grad_norm": 1.3285335949513888, "learning_rate": 1.3801364705427417e-06, "loss": 0.6265, "step": 24959 }, { "epoch": 0.7649871276204487, "grad_norm": 1.1687255716362364, "learning_rate": 1.379794114128854e-06, "loss": 0.5445, "step": 24960 }, { "epoch": 0.7650177761431899, "grad_norm": 1.4536554375083761, "learning_rate": 1.379451793385781e-06, "loss": 0.5778, "step": 24961 }, { "epoch": 0.7650484246659311, "grad_norm": 1.2468820158225593, "learning_rate": 1.379109508316897e-06, "loss": 0.4986, "step": 24962 }, { "epoch": 0.7650790731886723, "grad_norm": 1.1908880802043582, "learning_rate": 1.3787672589255724e-06, "loss": 0.6184, "step": 24963 }, { "epoch": 0.7651097217114136, "grad_norm": 1.3749931850671073, "learning_rate": 1.3784250452151804e-06, "loss": 0.6071, "step": 24964 }, { "epoch": 0.7651403702341547, "grad_norm": 1.6131807169280998, "learning_rate": 1.3780828671890933e-06, "loss": 0.6184, "step": 24965 }, { "epoch": 0.765171018756896, "grad_norm": 1.253486564088027, "learning_rate": 1.3777407248506835e-06, "loss": 0.579, "step": 24966 }, { "epoch": 0.7652016672796371, "grad_norm": 1.206571134646994, "learning_rate": 1.3773986182033216e-06, "loss": 0.6344, "step": 24967 }, { "epoch": 0.7652323158023783, "grad_norm": 1.4378236986083788, "learning_rate": 1.377056547250375e-06, "loss": 0.6003, "step": 24968 }, { "epoch": 0.7652629643251195, "grad_norm": 1.2097810315599005, "learning_rate": 1.3767145119952196e-06, "loss": 0.6047, "step": 24969 }, { "epoch": 0.7652936128478607, "grad_norm": 1.2463533815793977, "learning_rate": 1.3763725124412236e-06, "loss": 0.5815, "step": 24970 }, { "epoch": 0.7653242613706019, "grad_norm": 0.45620867654260794, "learning_rate": 1.3760305485917553e-06, "loss": 0.3798, "step": 24971 }, { "epoch": 0.7653549098933431, "grad_norm": 1.424137988808555, "learning_rate": 1.3756886204501845e-06, "loss": 0.6648, "step": 24972 }, { "epoch": 0.7653855584160844, "grad_norm": 1.7148924145781783, "learning_rate": 1.375346728019883e-06, "loss": 0.551, "step": 24973 }, { "epoch": 0.7654162069388255, "grad_norm": 1.2740026762978116, "learning_rate": 1.3750048713042159e-06, "loss": 0.6186, "step": 24974 }, { "epoch": 0.7654468554615668, "grad_norm": 1.3478891569933984, "learning_rate": 1.3746630503065532e-06, "loss": 0.6005, "step": 24975 }, { "epoch": 0.7654775039843079, "grad_norm": 1.1801264025939187, "learning_rate": 1.3743212650302629e-06, "loss": 0.4842, "step": 24976 }, { "epoch": 0.7655081525070492, "grad_norm": 1.49336125092283, "learning_rate": 1.373979515478715e-06, "loss": 0.5821, "step": 24977 }, { "epoch": 0.7655388010297903, "grad_norm": 1.4037330212229615, "learning_rate": 1.3736378016552743e-06, "loss": 0.5738, "step": 24978 }, { "epoch": 0.7655694495525316, "grad_norm": 1.3779976226758697, "learning_rate": 1.3732961235633047e-06, "loss": 0.5429, "step": 24979 }, { "epoch": 0.7656000980752727, "grad_norm": 1.3067036614774055, "learning_rate": 1.37295448120618e-06, "loss": 0.5375, "step": 24980 }, { "epoch": 0.765630746598014, "grad_norm": 1.3495265987502967, "learning_rate": 1.3726128745872625e-06, "loss": 0.6004, "step": 24981 }, { "epoch": 0.7656613951207552, "grad_norm": 1.4951602990152648, "learning_rate": 1.3722713037099173e-06, "loss": 0.6586, "step": 24982 }, { "epoch": 0.7656920436434964, "grad_norm": 1.2409207639712845, "learning_rate": 1.3719297685775108e-06, "loss": 0.572, "step": 24983 }, { "epoch": 0.7657226921662376, "grad_norm": 1.177161409277213, "learning_rate": 1.3715882691934086e-06, "loss": 0.6147, "step": 24984 }, { "epoch": 0.7657533406889788, "grad_norm": 1.3333838881930682, "learning_rate": 1.3712468055609774e-06, "loss": 0.6666, "step": 24985 }, { "epoch": 0.76578398921172, "grad_norm": 1.2290721878099782, "learning_rate": 1.3709053776835778e-06, "loss": 0.5655, "step": 24986 }, { "epoch": 0.7658146377344612, "grad_norm": 0.44143829753482067, "learning_rate": 1.3705639855645764e-06, "loss": 0.3861, "step": 24987 }, { "epoch": 0.7658452862572024, "grad_norm": 1.3633571301442222, "learning_rate": 1.3702226292073385e-06, "loss": 0.64, "step": 24988 }, { "epoch": 0.7658759347799436, "grad_norm": 1.3095027808328943, "learning_rate": 1.3698813086152252e-06, "loss": 0.562, "step": 24989 }, { "epoch": 0.7659065833026848, "grad_norm": 1.428232986769734, "learning_rate": 1.3695400237915974e-06, "loss": 0.7017, "step": 24990 }, { "epoch": 0.7659372318254261, "grad_norm": 1.3447561870946874, "learning_rate": 1.369198774739824e-06, "loss": 0.6448, "step": 24991 }, { "epoch": 0.7659678803481672, "grad_norm": 1.3132429228076798, "learning_rate": 1.3688575614632616e-06, "loss": 0.6081, "step": 24992 }, { "epoch": 0.7659985288709085, "grad_norm": 1.384268253462113, "learning_rate": 1.368516383965277e-06, "loss": 0.6102, "step": 24993 }, { "epoch": 0.7660291773936496, "grad_norm": 0.44705684552896374, "learning_rate": 1.3681752422492279e-06, "loss": 0.3879, "step": 24994 }, { "epoch": 0.7660598259163909, "grad_norm": 1.3290629361434538, "learning_rate": 1.3678341363184772e-06, "loss": 0.6028, "step": 24995 }, { "epoch": 0.766090474439132, "grad_norm": 1.240521774727948, "learning_rate": 1.367493066176388e-06, "loss": 0.5811, "step": 24996 }, { "epoch": 0.7661211229618733, "grad_norm": 1.4450101532301027, "learning_rate": 1.367152031826317e-06, "loss": 0.5642, "step": 24997 }, { "epoch": 0.7661517714846144, "grad_norm": 1.2911697464529417, "learning_rate": 1.3668110332716272e-06, "loss": 0.5688, "step": 24998 }, { "epoch": 0.7661824200073556, "grad_norm": 1.295536865676379, "learning_rate": 1.3664700705156792e-06, "loss": 0.5984, "step": 24999 }, { "epoch": 0.7662130685300969, "grad_norm": 1.3089262421960048, "learning_rate": 1.3661291435618307e-06, "loss": 0.5604, "step": 25000 }, { "epoch": 0.766243717052838, "grad_norm": 0.43803407596839805, "learning_rate": 1.3657882524134409e-06, "loss": 0.3829, "step": 25001 }, { "epoch": 0.7662743655755793, "grad_norm": 1.4322387535283603, "learning_rate": 1.3654473970738684e-06, "loss": 0.6446, "step": 25002 }, { "epoch": 0.7663050140983204, "grad_norm": 1.2309613702185611, "learning_rate": 1.3651065775464734e-06, "loss": 0.6112, "step": 25003 }, { "epoch": 0.7663356626210617, "grad_norm": 0.4396374408946637, "learning_rate": 1.3647657938346143e-06, "loss": 0.3859, "step": 25004 }, { "epoch": 0.7663663111438028, "grad_norm": 1.4568539323074967, "learning_rate": 1.3644250459416464e-06, "loss": 0.6279, "step": 25005 }, { "epoch": 0.7663969596665441, "grad_norm": 1.4239718541316033, "learning_rate": 1.3640843338709292e-06, "loss": 0.5665, "step": 25006 }, { "epoch": 0.7664276081892852, "grad_norm": 1.3847848619822316, "learning_rate": 1.3637436576258206e-06, "loss": 0.6573, "step": 25007 }, { "epoch": 0.7664582567120265, "grad_norm": 1.2740190504579727, "learning_rate": 1.3634030172096762e-06, "loss": 0.5556, "step": 25008 }, { "epoch": 0.7664889052347676, "grad_norm": 1.3633857505415237, "learning_rate": 1.363062412625849e-06, "loss": 0.6129, "step": 25009 }, { "epoch": 0.7665195537575089, "grad_norm": 1.2790038097352174, "learning_rate": 1.3627218438777017e-06, "loss": 0.5303, "step": 25010 }, { "epoch": 0.7665502022802501, "grad_norm": 1.361694893368704, "learning_rate": 1.3623813109685852e-06, "loss": 0.5737, "step": 25011 }, { "epoch": 0.7665808508029913, "grad_norm": 0.4494218271172679, "learning_rate": 1.3620408139018575e-06, "loss": 0.3993, "step": 25012 }, { "epoch": 0.7666114993257325, "grad_norm": 1.2382652394849865, "learning_rate": 1.3617003526808714e-06, "loss": 0.5927, "step": 25013 }, { "epoch": 0.7666421478484737, "grad_norm": 1.4632821257526243, "learning_rate": 1.3613599273089828e-06, "loss": 0.6539, "step": 25014 }, { "epoch": 0.7666727963712149, "grad_norm": 1.3374302952848032, "learning_rate": 1.361019537789547e-06, "loss": 0.5613, "step": 25015 }, { "epoch": 0.7667034448939561, "grad_norm": 1.185146202965188, "learning_rate": 1.3606791841259153e-06, "loss": 0.5805, "step": 25016 }, { "epoch": 0.7667340934166973, "grad_norm": 0.47663265326637394, "learning_rate": 1.360338866321443e-06, "loss": 0.4115, "step": 25017 }, { "epoch": 0.7667647419394386, "grad_norm": 1.86562468446211, "learning_rate": 1.359998584379484e-06, "loss": 0.7316, "step": 25018 }, { "epoch": 0.7667953904621797, "grad_norm": 1.3707164742785127, "learning_rate": 1.3596583383033906e-06, "loss": 0.6457, "step": 25019 }, { "epoch": 0.766826038984921, "grad_norm": 1.2970809633290796, "learning_rate": 1.3593181280965124e-06, "loss": 0.6478, "step": 25020 }, { "epoch": 0.7668566875076621, "grad_norm": 1.4885958647087862, "learning_rate": 1.3589779537622066e-06, "loss": 0.6705, "step": 25021 }, { "epoch": 0.7668873360304034, "grad_norm": 1.5102188218822528, "learning_rate": 1.3586378153038215e-06, "loss": 0.5998, "step": 25022 }, { "epoch": 0.7669179845531445, "grad_norm": 1.3439734713200397, "learning_rate": 1.358297712724711e-06, "loss": 0.5808, "step": 25023 }, { "epoch": 0.7669486330758858, "grad_norm": 0.4381637686713802, "learning_rate": 1.3579576460282234e-06, "loss": 0.3828, "step": 25024 }, { "epoch": 0.7669792815986269, "grad_norm": 1.0974342588985395, "learning_rate": 1.3576176152177112e-06, "loss": 0.526, "step": 25025 }, { "epoch": 0.7670099301213682, "grad_norm": 1.228593070698442, "learning_rate": 1.357277620296526e-06, "loss": 0.5993, "step": 25026 }, { "epoch": 0.7670405786441093, "grad_norm": 1.322133766682814, "learning_rate": 1.3569376612680146e-06, "loss": 0.5676, "step": 25027 }, { "epoch": 0.7670712271668506, "grad_norm": 1.5062697594128067, "learning_rate": 1.3565977381355288e-06, "loss": 0.6186, "step": 25028 }, { "epoch": 0.7671018756895918, "grad_norm": 0.44894379218784597, "learning_rate": 1.3562578509024194e-06, "loss": 0.383, "step": 25029 }, { "epoch": 0.7671325242123329, "grad_norm": 1.450044685853455, "learning_rate": 1.3559179995720318e-06, "loss": 0.6188, "step": 25030 }, { "epoch": 0.7671631727350742, "grad_norm": 1.3342318135850195, "learning_rate": 1.3555781841477167e-06, "loss": 0.6199, "step": 25031 }, { "epoch": 0.7671938212578153, "grad_norm": 1.4796689926578526, "learning_rate": 1.3552384046328238e-06, "loss": 0.6118, "step": 25032 }, { "epoch": 0.7672244697805566, "grad_norm": 1.3278805562943352, "learning_rate": 1.354898661030698e-06, "loss": 0.603, "step": 25033 }, { "epoch": 0.7672551183032977, "grad_norm": 1.2542109762871476, "learning_rate": 1.3545589533446897e-06, "loss": 0.6454, "step": 25034 }, { "epoch": 0.767285766826039, "grad_norm": 1.2759891968684776, "learning_rate": 1.354219281578143e-06, "loss": 0.5587, "step": 25035 }, { "epoch": 0.7673164153487801, "grad_norm": 1.5154324126205871, "learning_rate": 1.3538796457344066e-06, "loss": 0.5939, "step": 25036 }, { "epoch": 0.7673470638715214, "grad_norm": 1.2798406441936956, "learning_rate": 1.3535400458168292e-06, "loss": 0.6389, "step": 25037 }, { "epoch": 0.7673777123942626, "grad_norm": 1.2447485554134898, "learning_rate": 1.3532004818287525e-06, "loss": 0.6127, "step": 25038 }, { "epoch": 0.7674083609170038, "grad_norm": 1.2247104865923044, "learning_rate": 1.3528609537735244e-06, "loss": 0.6233, "step": 25039 }, { "epoch": 0.767439009439745, "grad_norm": 1.493420660189531, "learning_rate": 1.3525214616544924e-06, "loss": 0.6775, "step": 25040 }, { "epoch": 0.7674696579624862, "grad_norm": 1.4598394125993588, "learning_rate": 1.3521820054749979e-06, "loss": 0.6471, "step": 25041 }, { "epoch": 0.7675003064852274, "grad_norm": 0.452861687680195, "learning_rate": 1.3518425852383876e-06, "loss": 0.4037, "step": 25042 }, { "epoch": 0.7675309550079686, "grad_norm": 1.2717166833263238, "learning_rate": 1.3515032009480072e-06, "loss": 0.5709, "step": 25043 }, { "epoch": 0.7675616035307098, "grad_norm": 1.322191912446814, "learning_rate": 1.351163852607198e-06, "loss": 0.6139, "step": 25044 }, { "epoch": 0.767592252053451, "grad_norm": 1.402209080276725, "learning_rate": 1.3508245402193065e-06, "loss": 0.6415, "step": 25045 }, { "epoch": 0.7676229005761922, "grad_norm": 1.3368016595876966, "learning_rate": 1.3504852637876725e-06, "loss": 0.5403, "step": 25046 }, { "epoch": 0.7676535490989335, "grad_norm": 1.5151471328071653, "learning_rate": 1.350146023315641e-06, "loss": 0.6035, "step": 25047 }, { "epoch": 0.7676841976216746, "grad_norm": 1.3558662743822096, "learning_rate": 1.3498068188065566e-06, "loss": 0.6238, "step": 25048 }, { "epoch": 0.7677148461444159, "grad_norm": 1.3893671811667454, "learning_rate": 1.3494676502637577e-06, "loss": 0.6244, "step": 25049 }, { "epoch": 0.767745494667157, "grad_norm": 1.2441891002942294, "learning_rate": 1.3491285176905882e-06, "loss": 0.6336, "step": 25050 }, { "epoch": 0.7677761431898983, "grad_norm": 0.4474393199262211, "learning_rate": 1.3487894210903907e-06, "loss": 0.3858, "step": 25051 }, { "epoch": 0.7678067917126394, "grad_norm": 1.5440647731316361, "learning_rate": 1.3484503604665038e-06, "loss": 0.6174, "step": 25052 }, { "epoch": 0.7678374402353807, "grad_norm": 0.4359748741704177, "learning_rate": 1.3481113358222718e-06, "loss": 0.3716, "step": 25053 }, { "epoch": 0.7678680887581218, "grad_norm": 0.4315379930600054, "learning_rate": 1.3477723471610315e-06, "loss": 0.3946, "step": 25054 }, { "epoch": 0.7678987372808631, "grad_norm": 1.3778425484396064, "learning_rate": 1.3474333944861245e-06, "loss": 0.6838, "step": 25055 }, { "epoch": 0.7679293858036043, "grad_norm": 1.4418521235430102, "learning_rate": 1.347094477800892e-06, "loss": 0.6616, "step": 25056 }, { "epoch": 0.7679600343263455, "grad_norm": 1.4832192582023978, "learning_rate": 1.3467555971086715e-06, "loss": 0.5706, "step": 25057 }, { "epoch": 0.7679906828490867, "grad_norm": 1.313840201889296, "learning_rate": 1.3464167524128025e-06, "loss": 0.5839, "step": 25058 }, { "epoch": 0.7680213313718279, "grad_norm": 1.505031280092992, "learning_rate": 1.3460779437166255e-06, "loss": 0.6816, "step": 25059 }, { "epoch": 0.7680519798945691, "grad_norm": 1.3597774756560201, "learning_rate": 1.3457391710234757e-06, "loss": 0.6273, "step": 25060 }, { "epoch": 0.7680826284173102, "grad_norm": 1.2783763157682606, "learning_rate": 1.3454004343366932e-06, "loss": 0.6075, "step": 25061 }, { "epoch": 0.7681132769400515, "grad_norm": 1.3036880784180063, "learning_rate": 1.3450617336596166e-06, "loss": 0.6403, "step": 25062 }, { "epoch": 0.7681439254627926, "grad_norm": 1.344223099287563, "learning_rate": 1.3447230689955803e-06, "loss": 0.6427, "step": 25063 }, { "epoch": 0.7681745739855339, "grad_norm": 1.6447741518508745, "learning_rate": 1.3443844403479244e-06, "loss": 0.638, "step": 25064 }, { "epoch": 0.768205222508275, "grad_norm": 1.3085882857748417, "learning_rate": 1.3440458477199813e-06, "loss": 0.6349, "step": 25065 }, { "epoch": 0.7682358710310163, "grad_norm": 1.2539060757599632, "learning_rate": 1.3437072911150927e-06, "loss": 0.5495, "step": 25066 }, { "epoch": 0.7682665195537575, "grad_norm": 1.1810248098092997, "learning_rate": 1.3433687705365917e-06, "loss": 0.519, "step": 25067 }, { "epoch": 0.7682971680764987, "grad_norm": 1.321483367519917, "learning_rate": 1.3430302859878119e-06, "loss": 0.6755, "step": 25068 }, { "epoch": 0.7683278165992399, "grad_norm": 1.4291106974065806, "learning_rate": 1.3426918374720904e-06, "loss": 0.6174, "step": 25069 }, { "epoch": 0.7683584651219811, "grad_norm": 1.456243284448302, "learning_rate": 1.342353424992764e-06, "loss": 0.6197, "step": 25070 }, { "epoch": 0.7683891136447223, "grad_norm": 1.4018722011142828, "learning_rate": 1.342015048553163e-06, "loss": 0.5713, "step": 25071 }, { "epoch": 0.7684197621674635, "grad_norm": 1.5341413205134615, "learning_rate": 1.3416767081566244e-06, "loss": 0.6335, "step": 25072 }, { "epoch": 0.7684504106902047, "grad_norm": 1.3830773675768882, "learning_rate": 1.3413384038064826e-06, "loss": 0.5912, "step": 25073 }, { "epoch": 0.768481059212946, "grad_norm": 0.43452391669051654, "learning_rate": 1.3410001355060676e-06, "loss": 0.3799, "step": 25074 }, { "epoch": 0.7685117077356871, "grad_norm": 1.3354952948129313, "learning_rate": 1.340661903258716e-06, "loss": 0.593, "step": 25075 }, { "epoch": 0.7685423562584284, "grad_norm": 1.2509449674218613, "learning_rate": 1.340323707067756e-06, "loss": 0.5931, "step": 25076 }, { "epoch": 0.7685730047811695, "grad_norm": 1.345682998893742, "learning_rate": 1.3399855469365264e-06, "loss": 0.6164, "step": 25077 }, { "epoch": 0.7686036533039108, "grad_norm": 1.2551291490805894, "learning_rate": 1.3396474228683554e-06, "loss": 0.5789, "step": 25078 }, { "epoch": 0.7686343018266519, "grad_norm": 0.4660854354997984, "learning_rate": 1.3393093348665732e-06, "loss": 0.3874, "step": 25079 }, { "epoch": 0.7686649503493932, "grad_norm": 1.3856966340265555, "learning_rate": 1.338971282934513e-06, "loss": 0.6179, "step": 25080 }, { "epoch": 0.7686955988721343, "grad_norm": 1.4842288596937392, "learning_rate": 1.3386332670755065e-06, "loss": 0.6233, "step": 25081 }, { "epoch": 0.7687262473948756, "grad_norm": 1.47177935810936, "learning_rate": 1.3382952872928823e-06, "loss": 0.6651, "step": 25082 }, { "epoch": 0.7687568959176168, "grad_norm": 1.1971743330239903, "learning_rate": 1.3379573435899713e-06, "loss": 0.5818, "step": 25083 }, { "epoch": 0.768787544440358, "grad_norm": 1.305149192240076, "learning_rate": 1.3376194359701034e-06, "loss": 0.5598, "step": 25084 }, { "epoch": 0.7688181929630992, "grad_norm": 0.44904594791136854, "learning_rate": 1.33728156443661e-06, "loss": 0.3666, "step": 25085 }, { "epoch": 0.7688488414858404, "grad_norm": 1.1465093163756692, "learning_rate": 1.3369437289928184e-06, "loss": 0.6464, "step": 25086 }, { "epoch": 0.7688794900085816, "grad_norm": 0.44600790222341047, "learning_rate": 1.336605929642056e-06, "loss": 0.3843, "step": 25087 }, { "epoch": 0.7689101385313228, "grad_norm": 1.2845347579258712, "learning_rate": 1.3362681663876526e-06, "loss": 0.5401, "step": 25088 }, { "epoch": 0.768940787054064, "grad_norm": 1.44030068888815, "learning_rate": 1.3359304392329374e-06, "loss": 0.6468, "step": 25089 }, { "epoch": 0.7689714355768053, "grad_norm": 1.1838873869387023, "learning_rate": 1.3355927481812364e-06, "loss": 0.5448, "step": 25090 }, { "epoch": 0.7690020840995464, "grad_norm": 1.2777586578783007, "learning_rate": 1.335255093235877e-06, "loss": 0.554, "step": 25091 }, { "epoch": 0.7690327326222876, "grad_norm": 1.3363706427372481, "learning_rate": 1.3349174744001875e-06, "loss": 0.722, "step": 25092 }, { "epoch": 0.7690633811450288, "grad_norm": 0.4427802864037495, "learning_rate": 1.334579891677495e-06, "loss": 0.3782, "step": 25093 }, { "epoch": 0.76909402966777, "grad_norm": 1.3375932866387858, "learning_rate": 1.3342423450711246e-06, "loss": 0.5765, "step": 25094 }, { "epoch": 0.7691246781905112, "grad_norm": 0.47910750583711853, "learning_rate": 1.3339048345843992e-06, "loss": 0.4172, "step": 25095 }, { "epoch": 0.7691553267132524, "grad_norm": 1.1425106424040923, "learning_rate": 1.3335673602206506e-06, "loss": 0.5924, "step": 25096 }, { "epoch": 0.7691859752359936, "grad_norm": 1.2999396065887738, "learning_rate": 1.3332299219832011e-06, "loss": 0.5415, "step": 25097 }, { "epoch": 0.7692166237587348, "grad_norm": 1.272976390006132, "learning_rate": 1.3328925198753739e-06, "loss": 0.6466, "step": 25098 }, { "epoch": 0.769247272281476, "grad_norm": 1.3332628808701383, "learning_rate": 1.332555153900495e-06, "loss": 0.5655, "step": 25099 }, { "epoch": 0.7692779208042172, "grad_norm": 1.312175171594525, "learning_rate": 1.3322178240618893e-06, "loss": 0.5414, "step": 25100 }, { "epoch": 0.7693085693269585, "grad_norm": 1.3220644900173433, "learning_rate": 1.331880530362879e-06, "loss": 0.5622, "step": 25101 }, { "epoch": 0.7693392178496996, "grad_norm": 0.4527237982320385, "learning_rate": 1.3315432728067884e-06, "loss": 0.4119, "step": 25102 }, { "epoch": 0.7693698663724409, "grad_norm": 1.2290845456896144, "learning_rate": 1.3312060513969399e-06, "loss": 0.5714, "step": 25103 }, { "epoch": 0.769400514895182, "grad_norm": 1.3186793657673233, "learning_rate": 1.3308688661366592e-06, "loss": 0.5275, "step": 25104 }, { "epoch": 0.7694311634179233, "grad_norm": 1.2316349385909018, "learning_rate": 1.3305317170292658e-06, "loss": 0.6157, "step": 25105 }, { "epoch": 0.7694618119406644, "grad_norm": 1.1962829849976577, "learning_rate": 1.3301946040780794e-06, "loss": 0.5234, "step": 25106 }, { "epoch": 0.7694924604634057, "grad_norm": 1.5151017884353954, "learning_rate": 1.3298575272864277e-06, "loss": 0.6727, "step": 25107 }, { "epoch": 0.7695231089861468, "grad_norm": 0.4349417307280407, "learning_rate": 1.3295204866576289e-06, "loss": 0.4122, "step": 25108 }, { "epoch": 0.7695537575088881, "grad_norm": 1.3938658058814464, "learning_rate": 1.3291834821950017e-06, "loss": 0.582, "step": 25109 }, { "epoch": 0.7695844060316293, "grad_norm": 1.2624204127797476, "learning_rate": 1.3288465139018696e-06, "loss": 0.6757, "step": 25110 }, { "epoch": 0.7696150545543705, "grad_norm": 0.44434993328730105, "learning_rate": 1.3285095817815518e-06, "loss": 0.3856, "step": 25111 }, { "epoch": 0.7696457030771117, "grad_norm": 1.3826962893218224, "learning_rate": 1.32817268583737e-06, "loss": 0.574, "step": 25112 }, { "epoch": 0.7696763515998529, "grad_norm": 1.3121374430292363, "learning_rate": 1.3278358260726409e-06, "loss": 0.6622, "step": 25113 }, { "epoch": 0.7697070001225941, "grad_norm": 1.3443527583478436, "learning_rate": 1.3274990024906847e-06, "loss": 0.6119, "step": 25114 }, { "epoch": 0.7697376486453353, "grad_norm": 1.3613037220184414, "learning_rate": 1.3271622150948222e-06, "loss": 0.5905, "step": 25115 }, { "epoch": 0.7697682971680765, "grad_norm": 0.47132462379824713, "learning_rate": 1.32682546388837e-06, "loss": 0.4097, "step": 25116 }, { "epoch": 0.7697989456908177, "grad_norm": 1.260428085172335, "learning_rate": 1.326488748874643e-06, "loss": 0.6076, "step": 25117 }, { "epoch": 0.7698295942135589, "grad_norm": 0.4563654492305339, "learning_rate": 1.3261520700569658e-06, "loss": 0.3844, "step": 25118 }, { "epoch": 0.7698602427363002, "grad_norm": 1.293287882523445, "learning_rate": 1.3258154274386502e-06, "loss": 0.4948, "step": 25119 }, { "epoch": 0.7698908912590413, "grad_norm": 0.4353342365235056, "learning_rate": 1.3254788210230175e-06, "loss": 0.3991, "step": 25120 }, { "epoch": 0.7699215397817826, "grad_norm": 1.424716328954567, "learning_rate": 1.3251422508133805e-06, "loss": 0.6301, "step": 25121 }, { "epoch": 0.7699521883045237, "grad_norm": 1.137111504986673, "learning_rate": 1.3248057168130567e-06, "loss": 0.5206, "step": 25122 }, { "epoch": 0.7699828368272649, "grad_norm": 0.4579042562435939, "learning_rate": 1.3244692190253644e-06, "loss": 0.3998, "step": 25123 }, { "epoch": 0.7700134853500061, "grad_norm": 1.3727017410232458, "learning_rate": 1.3241327574536157e-06, "loss": 0.5618, "step": 25124 }, { "epoch": 0.7700441338727473, "grad_norm": 1.4347747809271407, "learning_rate": 1.3237963321011272e-06, "loss": 0.6029, "step": 25125 }, { "epoch": 0.7700747823954885, "grad_norm": 1.3490587790683535, "learning_rate": 1.3234599429712159e-06, "loss": 0.5616, "step": 25126 }, { "epoch": 0.7701054309182297, "grad_norm": 1.4133111433655392, "learning_rate": 1.323123590067194e-06, "loss": 0.6402, "step": 25127 }, { "epoch": 0.770136079440971, "grad_norm": 1.3028447797344167, "learning_rate": 1.3227872733923736e-06, "loss": 0.6447, "step": 25128 }, { "epoch": 0.7701667279637121, "grad_norm": 1.2540018975520542, "learning_rate": 1.322450992950074e-06, "loss": 0.5409, "step": 25129 }, { "epoch": 0.7701973764864534, "grad_norm": 1.5653123719723596, "learning_rate": 1.3221147487436036e-06, "loss": 0.6868, "step": 25130 }, { "epoch": 0.7702280250091945, "grad_norm": 1.3176325906000412, "learning_rate": 1.3217785407762801e-06, "loss": 0.5799, "step": 25131 }, { "epoch": 0.7702586735319358, "grad_norm": 1.4458293236078534, "learning_rate": 1.3214423690514117e-06, "loss": 0.5617, "step": 25132 }, { "epoch": 0.7702893220546769, "grad_norm": 1.1909086293378783, "learning_rate": 1.3211062335723128e-06, "loss": 0.6027, "step": 25133 }, { "epoch": 0.7703199705774182, "grad_norm": 1.221875613318634, "learning_rate": 1.3207701343422968e-06, "loss": 0.6254, "step": 25134 }, { "epoch": 0.7703506191001593, "grad_norm": 1.1257566531191825, "learning_rate": 1.320434071364673e-06, "loss": 0.5998, "step": 25135 }, { "epoch": 0.7703812676229006, "grad_norm": 1.354015325963268, "learning_rate": 1.3200980446427536e-06, "loss": 0.63, "step": 25136 }, { "epoch": 0.7704119161456418, "grad_norm": 1.088732552706419, "learning_rate": 1.3197620541798512e-06, "loss": 0.4714, "step": 25137 }, { "epoch": 0.770442564668383, "grad_norm": 1.319706814127855, "learning_rate": 1.319426099979273e-06, "loss": 0.6049, "step": 25138 }, { "epoch": 0.7704732131911242, "grad_norm": 1.4321714103214156, "learning_rate": 1.319090182044333e-06, "loss": 0.6009, "step": 25139 }, { "epoch": 0.7705038617138654, "grad_norm": 1.2688656472055988, "learning_rate": 1.3187543003783383e-06, "loss": 0.5731, "step": 25140 }, { "epoch": 0.7705345102366066, "grad_norm": 1.4428483378421426, "learning_rate": 1.3184184549845985e-06, "loss": 0.6278, "step": 25141 }, { "epoch": 0.7705651587593478, "grad_norm": 1.34865117614321, "learning_rate": 1.3180826458664253e-06, "loss": 0.6208, "step": 25142 }, { "epoch": 0.770595807282089, "grad_norm": 1.4153957177442078, "learning_rate": 1.3177468730271247e-06, "loss": 0.5982, "step": 25143 }, { "epoch": 0.7706264558048302, "grad_norm": 1.3286284504552075, "learning_rate": 1.317411136470006e-06, "loss": 0.5384, "step": 25144 }, { "epoch": 0.7706571043275714, "grad_norm": 1.3151035492447838, "learning_rate": 1.317075436198379e-06, "loss": 0.5998, "step": 25145 }, { "epoch": 0.7706877528503127, "grad_norm": 1.2215856508195044, "learning_rate": 1.316739772215549e-06, "loss": 0.5467, "step": 25146 }, { "epoch": 0.7707184013730538, "grad_norm": 1.1019010271674547, "learning_rate": 1.3164041445248244e-06, "loss": 0.469, "step": 25147 }, { "epoch": 0.7707490498957951, "grad_norm": 0.4390778763390949, "learning_rate": 1.3160685531295132e-06, "loss": 0.3891, "step": 25148 }, { "epoch": 0.7707796984185362, "grad_norm": 1.583094029254399, "learning_rate": 1.3157329980329204e-06, "loss": 0.6509, "step": 25149 }, { "epoch": 0.7708103469412775, "grad_norm": 1.2876406285827267, "learning_rate": 1.315397479238354e-06, "loss": 0.5413, "step": 25150 }, { "epoch": 0.7708409954640186, "grad_norm": 1.484601993000658, "learning_rate": 1.3150619967491173e-06, "loss": 0.6228, "step": 25151 }, { "epoch": 0.7708716439867599, "grad_norm": 0.4505002251470367, "learning_rate": 1.314726550568518e-06, "loss": 0.4164, "step": 25152 }, { "epoch": 0.770902292509501, "grad_norm": 0.44835793258493073, "learning_rate": 1.3143911406998621e-06, "loss": 0.3838, "step": 25153 }, { "epoch": 0.7709329410322422, "grad_norm": 0.46329751589914486, "learning_rate": 1.3140557671464522e-06, "loss": 0.4045, "step": 25154 }, { "epoch": 0.7709635895549835, "grad_norm": 1.3710114001495688, "learning_rate": 1.3137204299115935e-06, "loss": 0.5951, "step": 25155 }, { "epoch": 0.7709942380777246, "grad_norm": 1.2973572676399687, "learning_rate": 1.3133851289985922e-06, "loss": 0.6003, "step": 25156 }, { "epoch": 0.7710248866004659, "grad_norm": 0.457254600169827, "learning_rate": 1.3130498644107492e-06, "loss": 0.399, "step": 25157 }, { "epoch": 0.771055535123207, "grad_norm": 1.325456001207911, "learning_rate": 1.3127146361513687e-06, "loss": 0.6765, "step": 25158 }, { "epoch": 0.7710861836459483, "grad_norm": 1.6257434464027078, "learning_rate": 1.3123794442237564e-06, "loss": 0.5891, "step": 25159 }, { "epoch": 0.7711168321686894, "grad_norm": 1.2625820236051233, "learning_rate": 1.3120442886312113e-06, "loss": 0.5901, "step": 25160 }, { "epoch": 0.7711474806914307, "grad_norm": 1.3498495892973024, "learning_rate": 1.311709169377039e-06, "loss": 0.6027, "step": 25161 }, { "epoch": 0.7711781292141718, "grad_norm": 1.1747555360620907, "learning_rate": 1.311374086464538e-06, "loss": 0.5716, "step": 25162 }, { "epoch": 0.7712087777369131, "grad_norm": 0.45210857528121423, "learning_rate": 1.3110390398970124e-06, "loss": 0.3979, "step": 25163 }, { "epoch": 0.7712394262596542, "grad_norm": 1.43164787657351, "learning_rate": 1.3107040296777645e-06, "loss": 0.6261, "step": 25164 }, { "epoch": 0.7712700747823955, "grad_norm": 1.2943081587954868, "learning_rate": 1.3103690558100918e-06, "loss": 0.6285, "step": 25165 }, { "epoch": 0.7713007233051367, "grad_norm": 1.3231699240117394, "learning_rate": 1.310034118297297e-06, "loss": 0.5698, "step": 25166 }, { "epoch": 0.7713313718278779, "grad_norm": 1.1876624776992304, "learning_rate": 1.309699217142682e-06, "loss": 0.6215, "step": 25167 }, { "epoch": 0.7713620203506191, "grad_norm": 1.344292486503927, "learning_rate": 1.3093643523495432e-06, "loss": 0.5015, "step": 25168 }, { "epoch": 0.7713926688733603, "grad_norm": 1.4956606921060913, "learning_rate": 1.3090295239211815e-06, "loss": 0.5929, "step": 25169 }, { "epoch": 0.7714233173961015, "grad_norm": 1.84890064543124, "learning_rate": 1.3086947318608977e-06, "loss": 0.5729, "step": 25170 }, { "epoch": 0.7714539659188427, "grad_norm": 1.28926680948671, "learning_rate": 1.3083599761719878e-06, "loss": 0.6535, "step": 25171 }, { "epoch": 0.7714846144415839, "grad_norm": 1.4659993002102691, "learning_rate": 1.3080252568577534e-06, "loss": 0.6562, "step": 25172 }, { "epoch": 0.7715152629643252, "grad_norm": 0.44845636393638805, "learning_rate": 1.3076905739214873e-06, "loss": 0.3952, "step": 25173 }, { "epoch": 0.7715459114870663, "grad_norm": 1.2770805233462013, "learning_rate": 1.3073559273664937e-06, "loss": 0.6577, "step": 25174 }, { "epoch": 0.7715765600098076, "grad_norm": 1.4978631155157607, "learning_rate": 1.3070213171960672e-06, "loss": 0.6722, "step": 25175 }, { "epoch": 0.7716072085325487, "grad_norm": 0.45685870763376685, "learning_rate": 1.3066867434135033e-06, "loss": 0.3907, "step": 25176 }, { "epoch": 0.77163785705529, "grad_norm": 0.46771922366032587, "learning_rate": 1.3063522060220995e-06, "loss": 0.4157, "step": 25177 }, { "epoch": 0.7716685055780311, "grad_norm": 1.174497208092628, "learning_rate": 1.3060177050251537e-06, "loss": 0.5745, "step": 25178 }, { "epoch": 0.7716991541007724, "grad_norm": 1.2822794970454252, "learning_rate": 1.3056832404259596e-06, "loss": 0.6242, "step": 25179 }, { "epoch": 0.7717298026235135, "grad_norm": 1.1877588637657173, "learning_rate": 1.3053488122278136e-06, "loss": 0.6238, "step": 25180 }, { "epoch": 0.7717604511462548, "grad_norm": 1.3546801245725058, "learning_rate": 1.3050144204340127e-06, "loss": 0.5985, "step": 25181 }, { "epoch": 0.771791099668996, "grad_norm": 1.405398225675052, "learning_rate": 1.3046800650478487e-06, "loss": 0.5916, "step": 25182 }, { "epoch": 0.7718217481917372, "grad_norm": 1.377353207277407, "learning_rate": 1.304345746072619e-06, "loss": 0.5742, "step": 25183 }, { "epoch": 0.7718523967144784, "grad_norm": 1.4163467141776822, "learning_rate": 1.3040114635116141e-06, "loss": 0.6304, "step": 25184 }, { "epoch": 0.7718830452372195, "grad_norm": 0.4206118742742047, "learning_rate": 1.3036772173681306e-06, "loss": 0.388, "step": 25185 }, { "epoch": 0.7719136937599608, "grad_norm": 1.2461309214151632, "learning_rate": 1.3033430076454623e-06, "loss": 0.5717, "step": 25186 }, { "epoch": 0.7719443422827019, "grad_norm": 1.3330765362078798, "learning_rate": 1.3030088343469e-06, "loss": 0.6387, "step": 25187 }, { "epoch": 0.7719749908054432, "grad_norm": 1.2516993425761493, "learning_rate": 1.3026746974757376e-06, "loss": 0.663, "step": 25188 }, { "epoch": 0.7720056393281843, "grad_norm": 0.45010453643723997, "learning_rate": 1.3023405970352688e-06, "loss": 0.4075, "step": 25189 }, { "epoch": 0.7720362878509256, "grad_norm": 1.309514552530957, "learning_rate": 1.3020065330287823e-06, "loss": 0.6388, "step": 25190 }, { "epoch": 0.7720669363736667, "grad_norm": 0.4348891590684084, "learning_rate": 1.3016725054595737e-06, "loss": 0.3901, "step": 25191 }, { "epoch": 0.772097584896408, "grad_norm": 1.4067602143732794, "learning_rate": 1.3013385143309287e-06, "loss": 0.5658, "step": 25192 }, { "epoch": 0.7721282334191492, "grad_norm": 0.4476723546930039, "learning_rate": 1.3010045596461451e-06, "loss": 0.3994, "step": 25193 }, { "epoch": 0.7721588819418904, "grad_norm": 0.45172965180673125, "learning_rate": 1.3006706414085096e-06, "loss": 0.387, "step": 25194 }, { "epoch": 0.7721895304646316, "grad_norm": 0.44938944409772524, "learning_rate": 1.3003367596213113e-06, "loss": 0.3864, "step": 25195 }, { "epoch": 0.7722201789873728, "grad_norm": 0.4326340750660849, "learning_rate": 1.3000029142878417e-06, "loss": 0.3954, "step": 25196 }, { "epoch": 0.772250827510114, "grad_norm": 1.3836275507886813, "learning_rate": 1.2996691054113913e-06, "loss": 0.7643, "step": 25197 }, { "epoch": 0.7722814760328552, "grad_norm": 1.462077668837892, "learning_rate": 1.2993353329952468e-06, "loss": 0.613, "step": 25198 }, { "epoch": 0.7723121245555964, "grad_norm": 1.1232704889148293, "learning_rate": 1.2990015970426984e-06, "loss": 0.5253, "step": 25199 }, { "epoch": 0.7723427730783377, "grad_norm": 1.2467603853763227, "learning_rate": 1.298667897557035e-06, "loss": 0.6093, "step": 25200 }, { "epoch": 0.7723734216010788, "grad_norm": 1.4568683372739697, "learning_rate": 1.298334234541543e-06, "loss": 0.5555, "step": 25201 }, { "epoch": 0.7724040701238201, "grad_norm": 1.3859048043875597, "learning_rate": 1.2980006079995117e-06, "loss": 0.6958, "step": 25202 }, { "epoch": 0.7724347186465612, "grad_norm": 1.420272369620239, "learning_rate": 1.2976670179342248e-06, "loss": 0.6465, "step": 25203 }, { "epoch": 0.7724653671693025, "grad_norm": 1.3377016407622566, "learning_rate": 1.2973334643489754e-06, "loss": 0.5783, "step": 25204 }, { "epoch": 0.7724960156920436, "grad_norm": 1.3492065915133507, "learning_rate": 1.2969999472470468e-06, "loss": 0.6552, "step": 25205 }, { "epoch": 0.7725266642147849, "grad_norm": 1.4535266096627983, "learning_rate": 1.2966664666317237e-06, "loss": 0.6245, "step": 25206 }, { "epoch": 0.772557312737526, "grad_norm": 0.45439991046370865, "learning_rate": 1.2963330225062925e-06, "loss": 0.3964, "step": 25207 }, { "epoch": 0.7725879612602673, "grad_norm": 1.2075585637033226, "learning_rate": 1.2959996148740423e-06, "loss": 0.5397, "step": 25208 }, { "epoch": 0.7726186097830084, "grad_norm": 1.1834860922996429, "learning_rate": 1.2956662437382534e-06, "loss": 0.5897, "step": 25209 }, { "epoch": 0.7726492583057497, "grad_norm": 1.3227573044008551, "learning_rate": 1.2953329091022131e-06, "loss": 0.6471, "step": 25210 }, { "epoch": 0.7726799068284909, "grad_norm": 1.3107025783346502, "learning_rate": 1.2949996109692053e-06, "loss": 0.6285, "step": 25211 }, { "epoch": 0.7727105553512321, "grad_norm": 0.4422554438205205, "learning_rate": 1.294666349342516e-06, "loss": 0.4076, "step": 25212 }, { "epoch": 0.7727412038739733, "grad_norm": 1.3193986807209792, "learning_rate": 1.294333124225427e-06, "loss": 0.6475, "step": 25213 }, { "epoch": 0.7727718523967145, "grad_norm": 1.273935876462875, "learning_rate": 1.2939999356212191e-06, "loss": 0.6435, "step": 25214 }, { "epoch": 0.7728025009194557, "grad_norm": 1.409142183864007, "learning_rate": 1.2936667835331813e-06, "loss": 0.6096, "step": 25215 }, { "epoch": 0.7728331494421968, "grad_norm": 1.3001872903214289, "learning_rate": 1.2933336679645925e-06, "loss": 0.5647, "step": 25216 }, { "epoch": 0.7728637979649381, "grad_norm": 1.3318657153920477, "learning_rate": 1.2930005889187342e-06, "loss": 0.5052, "step": 25217 }, { "epoch": 0.7728944464876792, "grad_norm": 1.4330890154882339, "learning_rate": 1.2926675463988898e-06, "loss": 0.6006, "step": 25218 }, { "epoch": 0.7729250950104205, "grad_norm": 0.44611950885088186, "learning_rate": 1.2923345404083398e-06, "loss": 0.3904, "step": 25219 }, { "epoch": 0.7729557435331617, "grad_norm": 1.475697839437932, "learning_rate": 1.2920015709503687e-06, "loss": 0.638, "step": 25220 }, { "epoch": 0.7729863920559029, "grad_norm": 0.4462456350254563, "learning_rate": 1.2916686380282528e-06, "loss": 0.3845, "step": 25221 }, { "epoch": 0.7730170405786441, "grad_norm": 1.1275323043424046, "learning_rate": 1.291335741645275e-06, "loss": 0.603, "step": 25222 }, { "epoch": 0.7730476891013853, "grad_norm": 1.388668959232969, "learning_rate": 1.291002881804716e-06, "loss": 0.5678, "step": 25223 }, { "epoch": 0.7730783376241265, "grad_norm": 1.2587280850415663, "learning_rate": 1.2906700585098548e-06, "loss": 0.519, "step": 25224 }, { "epoch": 0.7731089861468677, "grad_norm": 1.2425383349661636, "learning_rate": 1.2903372717639678e-06, "loss": 0.6015, "step": 25225 }, { "epoch": 0.7731396346696089, "grad_norm": 1.27843103216512, "learning_rate": 1.2900045215703394e-06, "loss": 0.5743, "step": 25226 }, { "epoch": 0.7731702831923501, "grad_norm": 1.4103377018376886, "learning_rate": 1.2896718079322462e-06, "loss": 0.6927, "step": 25227 }, { "epoch": 0.7732009317150913, "grad_norm": 1.2805707873476848, "learning_rate": 1.289339130852964e-06, "loss": 0.632, "step": 25228 }, { "epoch": 0.7732315802378326, "grad_norm": 1.228729572629983, "learning_rate": 1.2890064903357729e-06, "loss": 0.6617, "step": 25229 }, { "epoch": 0.7732622287605737, "grad_norm": 1.409166410499816, "learning_rate": 1.288673886383951e-06, "loss": 0.6451, "step": 25230 }, { "epoch": 0.773292877283315, "grad_norm": 1.374284415227052, "learning_rate": 1.2883413190007753e-06, "loss": 0.6621, "step": 25231 }, { "epoch": 0.7733235258060561, "grad_norm": 1.4493499199024198, "learning_rate": 1.2880087881895214e-06, "loss": 0.6962, "step": 25232 }, { "epoch": 0.7733541743287974, "grad_norm": 1.3397150914237321, "learning_rate": 1.2876762939534665e-06, "loss": 0.6511, "step": 25233 }, { "epoch": 0.7733848228515385, "grad_norm": 1.438506973196593, "learning_rate": 1.2873438362958884e-06, "loss": 0.6104, "step": 25234 }, { "epoch": 0.7734154713742798, "grad_norm": 1.5183713718720941, "learning_rate": 1.2870114152200618e-06, "loss": 0.6576, "step": 25235 }, { "epoch": 0.773446119897021, "grad_norm": 1.2818466481333264, "learning_rate": 1.2866790307292599e-06, "loss": 0.62, "step": 25236 }, { "epoch": 0.7734767684197622, "grad_norm": 1.425544184851114, "learning_rate": 1.2863466828267596e-06, "loss": 0.5915, "step": 25237 }, { "epoch": 0.7735074169425034, "grad_norm": 0.4426414024863141, "learning_rate": 1.2860143715158359e-06, "loss": 0.4008, "step": 25238 }, { "epoch": 0.7735380654652446, "grad_norm": 1.3213090396046736, "learning_rate": 1.2856820967997642e-06, "loss": 0.6126, "step": 25239 }, { "epoch": 0.7735687139879858, "grad_norm": 1.3428884313162563, "learning_rate": 1.2853498586818154e-06, "loss": 0.7791, "step": 25240 }, { "epoch": 0.773599362510727, "grad_norm": 1.2056754406427577, "learning_rate": 1.285017657165265e-06, "loss": 0.5992, "step": 25241 }, { "epoch": 0.7736300110334682, "grad_norm": 1.4770859477369565, "learning_rate": 1.2846854922533874e-06, "loss": 0.5653, "step": 25242 }, { "epoch": 0.7736606595562094, "grad_norm": 1.3295505079880745, "learning_rate": 1.284353363949455e-06, "loss": 0.5424, "step": 25243 }, { "epoch": 0.7736913080789506, "grad_norm": 1.3787225788033823, "learning_rate": 1.2840212722567359e-06, "loss": 0.648, "step": 25244 }, { "epoch": 0.7737219566016919, "grad_norm": 1.3566197425114672, "learning_rate": 1.2836892171785093e-06, "loss": 0.6126, "step": 25245 }, { "epoch": 0.773752605124433, "grad_norm": 1.3782021066260417, "learning_rate": 1.2833571987180421e-06, "loss": 0.6104, "step": 25246 }, { "epoch": 0.7737832536471742, "grad_norm": 1.4275011618202933, "learning_rate": 1.2830252168786089e-06, "loss": 0.7148, "step": 25247 }, { "epoch": 0.7738139021699154, "grad_norm": 1.2755103912005281, "learning_rate": 1.2826932716634776e-06, "loss": 0.5684, "step": 25248 }, { "epoch": 0.7738445506926566, "grad_norm": 1.3166362561633416, "learning_rate": 1.2823613630759208e-06, "loss": 0.6264, "step": 25249 }, { "epoch": 0.7738751992153978, "grad_norm": 1.3364461622908106, "learning_rate": 1.2820294911192098e-06, "loss": 0.5968, "step": 25250 }, { "epoch": 0.773905847738139, "grad_norm": 1.3637398068085729, "learning_rate": 1.2816976557966127e-06, "loss": 0.5772, "step": 25251 }, { "epoch": 0.7739364962608802, "grad_norm": 1.2818930569435536, "learning_rate": 1.2813658571113997e-06, "loss": 0.561, "step": 25252 }, { "epoch": 0.7739671447836214, "grad_norm": 0.45542851920028926, "learning_rate": 1.2810340950668415e-06, "loss": 0.4086, "step": 25253 }, { "epoch": 0.7739977933063626, "grad_norm": 1.2215439919727353, "learning_rate": 1.2807023696662063e-06, "loss": 0.5459, "step": 25254 }, { "epoch": 0.7740284418291038, "grad_norm": 1.3510384624996754, "learning_rate": 1.280370680912759e-06, "loss": 0.6376, "step": 25255 }, { "epoch": 0.7740590903518451, "grad_norm": 1.2063603947688606, "learning_rate": 1.2800390288097742e-06, "loss": 0.618, "step": 25256 }, { "epoch": 0.7740897388745862, "grad_norm": 0.43632291131735, "learning_rate": 1.2797074133605153e-06, "loss": 0.3666, "step": 25257 }, { "epoch": 0.7741203873973275, "grad_norm": 1.4401661482014398, "learning_rate": 1.2793758345682522e-06, "loss": 0.586, "step": 25258 }, { "epoch": 0.7741510359200686, "grad_norm": 1.4015673930989492, "learning_rate": 1.27904429243625e-06, "loss": 0.5315, "step": 25259 }, { "epoch": 0.7741816844428099, "grad_norm": 1.3644506760846151, "learning_rate": 1.2787127869677762e-06, "loss": 0.6232, "step": 25260 }, { "epoch": 0.774212332965551, "grad_norm": 1.320819538146972, "learning_rate": 1.2783813181660986e-06, "loss": 0.5088, "step": 25261 }, { "epoch": 0.7742429814882923, "grad_norm": 1.425228917288908, "learning_rate": 1.2780498860344814e-06, "loss": 0.6003, "step": 25262 }, { "epoch": 0.7742736300110334, "grad_norm": 1.1153769761000405, "learning_rate": 1.2777184905761901e-06, "loss": 0.5677, "step": 25263 }, { "epoch": 0.7743042785337747, "grad_norm": 1.3252899985076678, "learning_rate": 1.277387131794493e-06, "loss": 0.6086, "step": 25264 }, { "epoch": 0.7743349270565159, "grad_norm": 1.4313848118316115, "learning_rate": 1.2770558096926512e-06, "loss": 0.5617, "step": 25265 }, { "epoch": 0.7743655755792571, "grad_norm": 1.2846520575454934, "learning_rate": 1.2767245242739313e-06, "loss": 0.5778, "step": 25266 }, { "epoch": 0.7743962241019983, "grad_norm": 1.2334775450904905, "learning_rate": 1.2763932755415986e-06, "loss": 0.5344, "step": 25267 }, { "epoch": 0.7744268726247395, "grad_norm": 1.1851116405189368, "learning_rate": 1.2760620634989141e-06, "loss": 0.6585, "step": 25268 }, { "epoch": 0.7744575211474807, "grad_norm": 1.365070323456032, "learning_rate": 1.2757308881491449e-06, "loss": 0.6322, "step": 25269 }, { "epoch": 0.7744881696702219, "grad_norm": 1.2426857877780417, "learning_rate": 1.27539974949555e-06, "loss": 0.5663, "step": 25270 }, { "epoch": 0.7745188181929631, "grad_norm": 1.287478731656898, "learning_rate": 1.2750686475413948e-06, "loss": 0.6627, "step": 25271 }, { "epoch": 0.7745494667157043, "grad_norm": 1.4149258106844427, "learning_rate": 1.2747375822899421e-06, "loss": 0.6054, "step": 25272 }, { "epoch": 0.7745801152384455, "grad_norm": 1.2547631152248626, "learning_rate": 1.2744065537444522e-06, "loss": 0.5453, "step": 25273 }, { "epoch": 0.7746107637611868, "grad_norm": 1.398156333803978, "learning_rate": 1.2740755619081879e-06, "loss": 0.6515, "step": 25274 }, { "epoch": 0.7746414122839279, "grad_norm": 1.391601264838834, "learning_rate": 1.2737446067844116e-06, "loss": 0.6334, "step": 25275 }, { "epoch": 0.7746720608066692, "grad_norm": 1.1410052354617424, "learning_rate": 1.2734136883763821e-06, "loss": 0.5197, "step": 25276 }, { "epoch": 0.7747027093294103, "grad_norm": 1.2829013825087852, "learning_rate": 1.2730828066873603e-06, "loss": 0.6402, "step": 25277 }, { "epoch": 0.7747333578521515, "grad_norm": 0.4507590199525265, "learning_rate": 1.272751961720609e-06, "loss": 0.3721, "step": 25278 }, { "epoch": 0.7747640063748927, "grad_norm": 1.3899374358663015, "learning_rate": 1.2724211534793851e-06, "loss": 0.6202, "step": 25279 }, { "epoch": 0.7747946548976339, "grad_norm": 1.4134205234431205, "learning_rate": 1.2720903819669506e-06, "loss": 0.6425, "step": 25280 }, { "epoch": 0.7748253034203751, "grad_norm": 1.3085558237851733, "learning_rate": 1.2717596471865619e-06, "loss": 0.5849, "step": 25281 }, { "epoch": 0.7748559519431163, "grad_norm": 1.4411568774495975, "learning_rate": 1.271428949141479e-06, "loss": 0.6352, "step": 25282 }, { "epoch": 0.7748866004658576, "grad_norm": 1.4618230653833744, "learning_rate": 1.2710982878349621e-06, "loss": 0.6081, "step": 25283 }, { "epoch": 0.7749172489885987, "grad_norm": 1.35564535414405, "learning_rate": 1.2707676632702665e-06, "loss": 0.535, "step": 25284 }, { "epoch": 0.77494789751134, "grad_norm": 1.4516865340322331, "learning_rate": 1.2704370754506517e-06, "loss": 0.6067, "step": 25285 }, { "epoch": 0.7749785460340811, "grad_norm": 1.2839775253476144, "learning_rate": 1.270106524379376e-06, "loss": 0.6359, "step": 25286 }, { "epoch": 0.7750091945568224, "grad_norm": 1.374234881615715, "learning_rate": 1.2697760100596929e-06, "loss": 0.5597, "step": 25287 }, { "epoch": 0.7750398430795635, "grad_norm": 1.2788668061729553, "learning_rate": 1.2694455324948634e-06, "loss": 0.6285, "step": 25288 }, { "epoch": 0.7750704916023048, "grad_norm": 1.3777514033329394, "learning_rate": 1.26911509168814e-06, "loss": 0.6438, "step": 25289 }, { "epoch": 0.7751011401250459, "grad_norm": 1.2568529837459341, "learning_rate": 1.2687846876427801e-06, "loss": 0.5272, "step": 25290 }, { "epoch": 0.7751317886477872, "grad_norm": 1.1001086291519648, "learning_rate": 1.2684543203620402e-06, "loss": 0.4661, "step": 25291 }, { "epoch": 0.7751624371705284, "grad_norm": 1.609493685991199, "learning_rate": 1.2681239898491743e-06, "loss": 0.7366, "step": 25292 }, { "epoch": 0.7751930856932696, "grad_norm": 1.389858780941433, "learning_rate": 1.2677936961074366e-06, "loss": 0.6672, "step": 25293 }, { "epoch": 0.7752237342160108, "grad_norm": 1.3194151658526587, "learning_rate": 1.2674634391400848e-06, "loss": 0.5805, "step": 25294 }, { "epoch": 0.775254382738752, "grad_norm": 1.3633383097579856, "learning_rate": 1.2671332189503688e-06, "loss": 0.5575, "step": 25295 }, { "epoch": 0.7752850312614932, "grad_norm": 1.3352834162650369, "learning_rate": 1.2668030355415446e-06, "loss": 0.6393, "step": 25296 }, { "epoch": 0.7753156797842344, "grad_norm": 1.216433981612852, "learning_rate": 1.266472888916866e-06, "loss": 0.5503, "step": 25297 }, { "epoch": 0.7753463283069756, "grad_norm": 0.4486250262608649, "learning_rate": 1.2661427790795844e-06, "loss": 0.4066, "step": 25298 }, { "epoch": 0.7753769768297168, "grad_norm": 1.3628749035227932, "learning_rate": 1.265812706032955e-06, "loss": 0.5417, "step": 25299 }, { "epoch": 0.775407625352458, "grad_norm": 0.4381631183936933, "learning_rate": 1.2654826697802253e-06, "loss": 0.3846, "step": 25300 }, { "epoch": 0.7754382738751993, "grad_norm": 1.3488774660813052, "learning_rate": 1.2651526703246531e-06, "loss": 0.5567, "step": 25301 }, { "epoch": 0.7754689223979404, "grad_norm": 1.3416860765168896, "learning_rate": 1.2648227076694875e-06, "loss": 0.4597, "step": 25302 }, { "epoch": 0.7754995709206817, "grad_norm": 1.2982457827979754, "learning_rate": 1.2644927818179775e-06, "loss": 0.589, "step": 25303 }, { "epoch": 0.7755302194434228, "grad_norm": 0.46474418432822057, "learning_rate": 1.2641628927733768e-06, "loss": 0.3993, "step": 25304 }, { "epoch": 0.7755608679661641, "grad_norm": 1.2506679929322069, "learning_rate": 1.2638330405389354e-06, "loss": 0.5744, "step": 25305 }, { "epoch": 0.7755915164889052, "grad_norm": 0.4134066322681091, "learning_rate": 1.2635032251179025e-06, "loss": 0.3774, "step": 25306 }, { "epoch": 0.7756221650116465, "grad_norm": 0.42289693194268335, "learning_rate": 1.2631734465135275e-06, "loss": 0.3654, "step": 25307 }, { "epoch": 0.7756528135343876, "grad_norm": 1.279590030404347, "learning_rate": 1.2628437047290626e-06, "loss": 0.5563, "step": 25308 }, { "epoch": 0.7756834620571288, "grad_norm": 1.281023906996295, "learning_rate": 1.2625139997677533e-06, "loss": 0.5512, "step": 25309 }, { "epoch": 0.77571411057987, "grad_norm": 1.439460272327775, "learning_rate": 1.2621843316328513e-06, "loss": 0.624, "step": 25310 }, { "epoch": 0.7757447591026112, "grad_norm": 1.275936489732658, "learning_rate": 1.2618547003276005e-06, "loss": 0.5053, "step": 25311 }, { "epoch": 0.7757754076253525, "grad_norm": 0.45592684448271614, "learning_rate": 1.2615251058552547e-06, "loss": 0.4012, "step": 25312 }, { "epoch": 0.7758060561480936, "grad_norm": 1.3592301526713957, "learning_rate": 1.2611955482190586e-06, "loss": 0.6091, "step": 25313 }, { "epoch": 0.7758367046708349, "grad_norm": 1.3923225752899382, "learning_rate": 1.2608660274222578e-06, "loss": 0.6538, "step": 25314 }, { "epoch": 0.775867353193576, "grad_norm": 1.2871647471050713, "learning_rate": 1.260536543468101e-06, "loss": 0.6875, "step": 25315 }, { "epoch": 0.7758980017163173, "grad_norm": 1.453865969540117, "learning_rate": 1.2602070963598356e-06, "loss": 0.6004, "step": 25316 }, { "epoch": 0.7759286502390584, "grad_norm": 0.43942165317559556, "learning_rate": 1.259877686100705e-06, "loss": 0.3832, "step": 25317 }, { "epoch": 0.7759592987617997, "grad_norm": 0.4579714945925896, "learning_rate": 1.2595483126939572e-06, "loss": 0.4098, "step": 25318 }, { "epoch": 0.7759899472845408, "grad_norm": 1.2083355968218894, "learning_rate": 1.2592189761428364e-06, "loss": 0.5261, "step": 25319 }, { "epoch": 0.7760205958072821, "grad_norm": 1.2271618011878982, "learning_rate": 1.2588896764505893e-06, "loss": 0.6957, "step": 25320 }, { "epoch": 0.7760512443300233, "grad_norm": 0.4618593017139349, "learning_rate": 1.2585604136204599e-06, "loss": 0.4065, "step": 25321 }, { "epoch": 0.7760818928527645, "grad_norm": 1.381133741020211, "learning_rate": 1.258231187655689e-06, "loss": 0.5833, "step": 25322 }, { "epoch": 0.7761125413755057, "grad_norm": 1.3442356202428412, "learning_rate": 1.2579019985595264e-06, "loss": 0.5935, "step": 25323 }, { "epoch": 0.7761431898982469, "grad_norm": 1.4015686769774895, "learning_rate": 1.2575728463352127e-06, "loss": 0.6329, "step": 25324 }, { "epoch": 0.7761738384209881, "grad_norm": 1.3691689983389685, "learning_rate": 1.2572437309859902e-06, "loss": 0.5636, "step": 25325 }, { "epoch": 0.7762044869437293, "grad_norm": 1.304412138204561, "learning_rate": 1.2569146525151027e-06, "loss": 0.5462, "step": 25326 }, { "epoch": 0.7762351354664705, "grad_norm": 1.3825802980096653, "learning_rate": 1.2565856109257929e-06, "loss": 0.5699, "step": 25327 }, { "epoch": 0.7762657839892118, "grad_norm": 1.4365730546226885, "learning_rate": 1.2562566062213044e-06, "loss": 0.5939, "step": 25328 }, { "epoch": 0.7762964325119529, "grad_norm": 1.4169088304093065, "learning_rate": 1.2559276384048758e-06, "loss": 0.6902, "step": 25329 }, { "epoch": 0.7763270810346942, "grad_norm": 1.1851840693427727, "learning_rate": 1.2555987074797499e-06, "loss": 0.6292, "step": 25330 }, { "epoch": 0.7763577295574353, "grad_norm": 1.413170708892993, "learning_rate": 1.2552698134491697e-06, "loss": 0.7071, "step": 25331 }, { "epoch": 0.7763883780801766, "grad_norm": 1.2795650862622128, "learning_rate": 1.2549409563163744e-06, "loss": 0.6662, "step": 25332 }, { "epoch": 0.7764190266029177, "grad_norm": 1.388517298061067, "learning_rate": 1.2546121360846025e-06, "loss": 0.6936, "step": 25333 }, { "epoch": 0.776449675125659, "grad_norm": 1.247973262492555, "learning_rate": 1.2542833527570952e-06, "loss": 0.6203, "step": 25334 }, { "epoch": 0.7764803236484001, "grad_norm": 1.486990670858699, "learning_rate": 1.2539546063370944e-06, "loss": 0.6455, "step": 25335 }, { "epoch": 0.7765109721711414, "grad_norm": 1.1953851225231493, "learning_rate": 1.2536258968278352e-06, "loss": 0.4948, "step": 25336 }, { "epoch": 0.7765416206938826, "grad_norm": 1.2898314263485597, "learning_rate": 1.2532972242325593e-06, "loss": 0.5441, "step": 25337 }, { "epoch": 0.7765722692166238, "grad_norm": 0.4520092025726155, "learning_rate": 1.252968588554504e-06, "loss": 0.3955, "step": 25338 }, { "epoch": 0.776602917739365, "grad_norm": 1.296183923293375, "learning_rate": 1.2526399897969093e-06, "loss": 0.5286, "step": 25339 }, { "epoch": 0.7766335662621061, "grad_norm": 1.3294892181039226, "learning_rate": 1.2523114279630122e-06, "loss": 0.5982, "step": 25340 }, { "epoch": 0.7766642147848474, "grad_norm": 1.3702212904081101, "learning_rate": 1.251982903056046e-06, "loss": 0.6257, "step": 25341 }, { "epoch": 0.7766948633075885, "grad_norm": 1.322355503712757, "learning_rate": 1.2516544150792543e-06, "loss": 0.6446, "step": 25342 }, { "epoch": 0.7767255118303298, "grad_norm": 1.257252324046026, "learning_rate": 1.2513259640358705e-06, "loss": 0.5972, "step": 25343 }, { "epoch": 0.7767561603530709, "grad_norm": 0.45097437320410527, "learning_rate": 1.25099754992913e-06, "loss": 0.3671, "step": 25344 }, { "epoch": 0.7767868088758122, "grad_norm": 1.515311686558353, "learning_rate": 1.2506691727622699e-06, "loss": 0.6301, "step": 25345 }, { "epoch": 0.7768174573985533, "grad_norm": 1.3099422281269728, "learning_rate": 1.2503408325385251e-06, "loss": 0.5558, "step": 25346 }, { "epoch": 0.7768481059212946, "grad_norm": 0.4601041275365213, "learning_rate": 1.2500125292611336e-06, "loss": 0.3893, "step": 25347 }, { "epoch": 0.7768787544440358, "grad_norm": 1.2532430395736855, "learning_rate": 1.2496842629333267e-06, "loss": 0.5605, "step": 25348 }, { "epoch": 0.776909402966777, "grad_norm": 1.1892026158984084, "learning_rate": 1.2493560335583399e-06, "loss": 0.5399, "step": 25349 }, { "epoch": 0.7769400514895182, "grad_norm": 1.2105733617520167, "learning_rate": 1.2490278411394097e-06, "loss": 0.626, "step": 25350 }, { "epoch": 0.7769707000122594, "grad_norm": 1.265249325885817, "learning_rate": 1.2486996856797673e-06, "loss": 0.5854, "step": 25351 }, { "epoch": 0.7770013485350006, "grad_norm": 1.4345571329319295, "learning_rate": 1.248371567182644e-06, "loss": 0.6245, "step": 25352 }, { "epoch": 0.7770319970577418, "grad_norm": 1.3364809744799409, "learning_rate": 1.2480434856512786e-06, "loss": 0.5266, "step": 25353 }, { "epoch": 0.777062645580483, "grad_norm": 1.4590133113167933, "learning_rate": 1.2477154410888992e-06, "loss": 0.6189, "step": 25354 }, { "epoch": 0.7770932941032243, "grad_norm": 1.3383895195636668, "learning_rate": 1.2473874334987412e-06, "loss": 0.6752, "step": 25355 }, { "epoch": 0.7771239426259654, "grad_norm": 1.3937783431888524, "learning_rate": 1.2470594628840333e-06, "loss": 0.5911, "step": 25356 }, { "epoch": 0.7771545911487067, "grad_norm": 1.435845598198028, "learning_rate": 1.2467315292480093e-06, "loss": 0.6384, "step": 25357 }, { "epoch": 0.7771852396714478, "grad_norm": 1.3054390676123577, "learning_rate": 1.2464036325939004e-06, "loss": 0.5647, "step": 25358 }, { "epoch": 0.7772158881941891, "grad_norm": 1.3182883893000106, "learning_rate": 1.2460757729249363e-06, "loss": 0.5828, "step": 25359 }, { "epoch": 0.7772465367169302, "grad_norm": 1.30776115781216, "learning_rate": 1.2457479502443475e-06, "loss": 0.5659, "step": 25360 }, { "epoch": 0.7772771852396715, "grad_norm": 0.4338901185783296, "learning_rate": 1.2454201645553665e-06, "loss": 0.3948, "step": 25361 }, { "epoch": 0.7773078337624126, "grad_norm": 1.40801534899405, "learning_rate": 1.245092415861221e-06, "loss": 0.6381, "step": 25362 }, { "epoch": 0.7773384822851539, "grad_norm": 1.587209169739112, "learning_rate": 1.2447647041651378e-06, "loss": 0.699, "step": 25363 }, { "epoch": 0.777369130807895, "grad_norm": 1.5258579757919273, "learning_rate": 1.2444370294703517e-06, "loss": 0.6783, "step": 25364 }, { "epoch": 0.7773997793306363, "grad_norm": 0.452963897058892, "learning_rate": 1.2441093917800872e-06, "loss": 0.3906, "step": 25365 }, { "epoch": 0.7774304278533775, "grad_norm": 1.420270612739031, "learning_rate": 1.2437817910975752e-06, "loss": 0.5501, "step": 25366 }, { "epoch": 0.7774610763761187, "grad_norm": 1.333804183842481, "learning_rate": 1.2434542274260408e-06, "loss": 0.625, "step": 25367 }, { "epoch": 0.7774917248988599, "grad_norm": 1.6532451370150998, "learning_rate": 1.2431267007687132e-06, "loss": 0.6244, "step": 25368 }, { "epoch": 0.7775223734216011, "grad_norm": 1.2314918678216193, "learning_rate": 1.2427992111288206e-06, "loss": 0.6341, "step": 25369 }, { "epoch": 0.7775530219443423, "grad_norm": 1.282630602932639, "learning_rate": 1.2424717585095875e-06, "loss": 0.5427, "step": 25370 }, { "epoch": 0.7775836704670834, "grad_norm": 1.2974331954139329, "learning_rate": 1.2421443429142415e-06, "loss": 0.5546, "step": 25371 }, { "epoch": 0.7776143189898247, "grad_norm": 1.255855807549231, "learning_rate": 1.2418169643460098e-06, "loss": 0.5252, "step": 25372 }, { "epoch": 0.7776449675125658, "grad_norm": 1.3792056629678486, "learning_rate": 1.2414896228081164e-06, "loss": 0.6654, "step": 25373 }, { "epoch": 0.7776756160353071, "grad_norm": 1.2916048367436974, "learning_rate": 1.2411623183037869e-06, "loss": 0.6713, "step": 25374 }, { "epoch": 0.7777062645580483, "grad_norm": 1.1814742948106798, "learning_rate": 1.2408350508362489e-06, "loss": 0.5854, "step": 25375 }, { "epoch": 0.7777369130807895, "grad_norm": 1.2777726887770287, "learning_rate": 1.2405078204087228e-06, "loss": 0.6245, "step": 25376 }, { "epoch": 0.7777675616035307, "grad_norm": 1.264169857455456, "learning_rate": 1.2401806270244366e-06, "loss": 0.6049, "step": 25377 }, { "epoch": 0.7777982101262719, "grad_norm": 1.2394519024988282, "learning_rate": 1.2398534706866116e-06, "loss": 0.523, "step": 25378 }, { "epoch": 0.7778288586490131, "grad_norm": 1.2688977339688605, "learning_rate": 1.2395263513984724e-06, "loss": 0.5512, "step": 25379 }, { "epoch": 0.7778595071717543, "grad_norm": 1.4329754826292345, "learning_rate": 1.239199269163243e-06, "loss": 0.656, "step": 25380 }, { "epoch": 0.7778901556944955, "grad_norm": 1.3355642965596661, "learning_rate": 1.238872223984145e-06, "loss": 0.568, "step": 25381 }, { "epoch": 0.7779208042172367, "grad_norm": 1.2254579319422934, "learning_rate": 1.2385452158644006e-06, "loss": 0.5949, "step": 25382 }, { "epoch": 0.7779514527399779, "grad_norm": 1.430698887611979, "learning_rate": 1.2382182448072344e-06, "loss": 0.6409, "step": 25383 }, { "epoch": 0.7779821012627192, "grad_norm": 1.5105363917710124, "learning_rate": 1.2378913108158647e-06, "loss": 0.6559, "step": 25384 }, { "epoch": 0.7780127497854603, "grad_norm": 1.211090330504155, "learning_rate": 1.2375644138935156e-06, "loss": 0.6892, "step": 25385 }, { "epoch": 0.7780433983082016, "grad_norm": 1.228253798687807, "learning_rate": 1.2372375540434063e-06, "loss": 0.5685, "step": 25386 }, { "epoch": 0.7780740468309427, "grad_norm": 1.2981853847916363, "learning_rate": 1.2369107312687572e-06, "loss": 0.5353, "step": 25387 }, { "epoch": 0.778104695353684, "grad_norm": 1.3529252464958437, "learning_rate": 1.2365839455727919e-06, "loss": 0.5749, "step": 25388 }, { "epoch": 0.7781353438764251, "grad_norm": 0.44413678576198967, "learning_rate": 1.2362571969587255e-06, "loss": 0.4045, "step": 25389 }, { "epoch": 0.7781659923991664, "grad_norm": 1.3050498539803808, "learning_rate": 1.235930485429781e-06, "loss": 0.5697, "step": 25390 }, { "epoch": 0.7781966409219075, "grad_norm": 0.4778929200645413, "learning_rate": 1.235603810989177e-06, "loss": 0.3924, "step": 25391 }, { "epoch": 0.7782272894446488, "grad_norm": 1.41242041794418, "learning_rate": 1.235277173640131e-06, "loss": 0.7377, "step": 25392 }, { "epoch": 0.77825793796739, "grad_norm": 1.3379490383333936, "learning_rate": 1.2349505733858618e-06, "loss": 0.5559, "step": 25393 }, { "epoch": 0.7782885864901312, "grad_norm": 1.238323429059095, "learning_rate": 1.2346240102295898e-06, "loss": 0.5334, "step": 25394 }, { "epoch": 0.7783192350128724, "grad_norm": 1.2369510667785086, "learning_rate": 1.2342974841745292e-06, "loss": 0.6168, "step": 25395 }, { "epoch": 0.7783498835356136, "grad_norm": 1.3066205062708545, "learning_rate": 1.2339709952239003e-06, "loss": 0.5923, "step": 25396 }, { "epoch": 0.7783805320583548, "grad_norm": 1.335366242023717, "learning_rate": 1.2336445433809175e-06, "loss": 0.5413, "step": 25397 }, { "epoch": 0.778411180581096, "grad_norm": 0.4374043444620145, "learning_rate": 1.2333181286487982e-06, "loss": 0.3967, "step": 25398 }, { "epoch": 0.7784418291038372, "grad_norm": 1.4022513320701924, "learning_rate": 1.2329917510307616e-06, "loss": 0.6277, "step": 25399 }, { "epoch": 0.7784724776265785, "grad_norm": 0.438812137847455, "learning_rate": 1.232665410530019e-06, "loss": 0.3906, "step": 25400 }, { "epoch": 0.7785031261493196, "grad_norm": 1.53940625795313, "learning_rate": 1.2323391071497882e-06, "loss": 0.5925, "step": 25401 }, { "epoch": 0.7785337746720608, "grad_norm": 1.2619444456052167, "learning_rate": 1.2320128408932852e-06, "loss": 0.4644, "step": 25402 }, { "epoch": 0.778564423194802, "grad_norm": 1.538255075401763, "learning_rate": 1.2316866117637226e-06, "loss": 0.6233, "step": 25403 }, { "epoch": 0.7785950717175432, "grad_norm": 1.180941770619705, "learning_rate": 1.2313604197643158e-06, "loss": 0.5835, "step": 25404 }, { "epoch": 0.7786257202402844, "grad_norm": 1.4013766523677946, "learning_rate": 1.231034264898281e-06, "loss": 0.6498, "step": 25405 }, { "epoch": 0.7786563687630256, "grad_norm": 1.3416447172958919, "learning_rate": 1.2307081471688282e-06, "loss": 0.6126, "step": 25406 }, { "epoch": 0.7786870172857668, "grad_norm": 0.42815431253176006, "learning_rate": 1.2303820665791739e-06, "loss": 0.4063, "step": 25407 }, { "epoch": 0.778717665808508, "grad_norm": 1.351968916728813, "learning_rate": 1.2300560231325275e-06, "loss": 0.5883, "step": 25408 }, { "epoch": 0.7787483143312492, "grad_norm": 1.3035198191556916, "learning_rate": 1.2297300168321047e-06, "loss": 0.5736, "step": 25409 }, { "epoch": 0.7787789628539904, "grad_norm": 0.4737311456804331, "learning_rate": 1.2294040476811176e-06, "loss": 0.4051, "step": 25410 }, { "epoch": 0.7788096113767317, "grad_norm": 1.3983103622714737, "learning_rate": 1.2290781156827758e-06, "loss": 0.588, "step": 25411 }, { "epoch": 0.7788402598994728, "grad_norm": 1.5287514990195028, "learning_rate": 1.228752220840292e-06, "loss": 0.6842, "step": 25412 }, { "epoch": 0.7788709084222141, "grad_norm": 1.292818554082161, "learning_rate": 1.2284263631568794e-06, "loss": 0.4598, "step": 25413 }, { "epoch": 0.7789015569449552, "grad_norm": 1.3851062209155858, "learning_rate": 1.228100542635745e-06, "loss": 0.5599, "step": 25414 }, { "epoch": 0.7789322054676965, "grad_norm": 1.559178594234385, "learning_rate": 1.227774759280101e-06, "loss": 0.7025, "step": 25415 }, { "epoch": 0.7789628539904376, "grad_norm": 1.4129731519353526, "learning_rate": 1.2274490130931593e-06, "loss": 0.6788, "step": 25416 }, { "epoch": 0.7789935025131789, "grad_norm": 1.3039488976130036, "learning_rate": 1.227123304078126e-06, "loss": 0.5377, "step": 25417 }, { "epoch": 0.77902415103592, "grad_norm": 1.3646530991868013, "learning_rate": 1.2267976322382136e-06, "loss": 0.5737, "step": 25418 }, { "epoch": 0.7790547995586613, "grad_norm": 1.286805634486783, "learning_rate": 1.2264719975766266e-06, "loss": 0.6347, "step": 25419 }, { "epoch": 0.7790854480814025, "grad_norm": 1.4696126120684438, "learning_rate": 1.2261464000965795e-06, "loss": 0.6206, "step": 25420 }, { "epoch": 0.7791160966041437, "grad_norm": 1.2898856975446151, "learning_rate": 1.2258208398012772e-06, "loss": 0.6285, "step": 25421 }, { "epoch": 0.7791467451268849, "grad_norm": 1.3974572090291437, "learning_rate": 1.2254953166939266e-06, "loss": 0.5917, "step": 25422 }, { "epoch": 0.7791773936496261, "grad_norm": 1.3179379918342649, "learning_rate": 1.2251698307777365e-06, "loss": 0.6886, "step": 25423 }, { "epoch": 0.7792080421723673, "grad_norm": 1.2463298203350055, "learning_rate": 1.2248443820559154e-06, "loss": 0.6111, "step": 25424 }, { "epoch": 0.7792386906951085, "grad_norm": 1.4667911537216178, "learning_rate": 1.2245189705316668e-06, "loss": 0.5678, "step": 25425 }, { "epoch": 0.7792693392178497, "grad_norm": 0.46114162523748414, "learning_rate": 1.2241935962081991e-06, "loss": 0.4041, "step": 25426 }, { "epoch": 0.779299987740591, "grad_norm": 1.3715688618295712, "learning_rate": 1.2238682590887174e-06, "loss": 0.6296, "step": 25427 }, { "epoch": 0.7793306362633321, "grad_norm": 1.167236495292335, "learning_rate": 1.2235429591764303e-06, "loss": 0.5345, "step": 25428 }, { "epoch": 0.7793612847860734, "grad_norm": 1.3784689741757081, "learning_rate": 1.22321769647454e-06, "loss": 0.5977, "step": 25429 }, { "epoch": 0.7793919333088145, "grad_norm": 1.4373857773544607, "learning_rate": 1.2228924709862506e-06, "loss": 0.603, "step": 25430 }, { "epoch": 0.7794225818315558, "grad_norm": 1.3031012254934229, "learning_rate": 1.2225672827147684e-06, "loss": 0.631, "step": 25431 }, { "epoch": 0.7794532303542969, "grad_norm": 1.2334783889707297, "learning_rate": 1.2222421316632981e-06, "loss": 0.6253, "step": 25432 }, { "epoch": 0.7794838788770381, "grad_norm": 1.3464881530748594, "learning_rate": 1.221917017835042e-06, "loss": 0.614, "step": 25433 }, { "epoch": 0.7795145273997793, "grad_norm": 1.432367048621949, "learning_rate": 1.2215919412332038e-06, "loss": 0.5512, "step": 25434 }, { "epoch": 0.7795451759225205, "grad_norm": 1.27564566543896, "learning_rate": 1.2212669018609884e-06, "loss": 0.6405, "step": 25435 }, { "epoch": 0.7795758244452617, "grad_norm": 1.3856838512001604, "learning_rate": 1.2209418997215955e-06, "loss": 0.5968, "step": 25436 }, { "epoch": 0.7796064729680029, "grad_norm": 1.3159236390421825, "learning_rate": 1.2206169348182307e-06, "loss": 0.5609, "step": 25437 }, { "epoch": 0.7796371214907442, "grad_norm": 0.44766285247653603, "learning_rate": 1.2202920071540913e-06, "loss": 0.4077, "step": 25438 }, { "epoch": 0.7796677700134853, "grad_norm": 1.315543669639433, "learning_rate": 1.2199671167323846e-06, "loss": 0.6036, "step": 25439 }, { "epoch": 0.7796984185362266, "grad_norm": 1.275062548698595, "learning_rate": 1.2196422635563093e-06, "loss": 0.5354, "step": 25440 }, { "epoch": 0.7797290670589677, "grad_norm": 1.571779706739278, "learning_rate": 1.2193174476290643e-06, "loss": 0.6615, "step": 25441 }, { "epoch": 0.779759715581709, "grad_norm": 1.2331739401492554, "learning_rate": 1.2189926689538516e-06, "loss": 0.5151, "step": 25442 }, { "epoch": 0.7797903641044501, "grad_norm": 1.4120433328068769, "learning_rate": 1.2186679275338737e-06, "loss": 0.6191, "step": 25443 }, { "epoch": 0.7798210126271914, "grad_norm": 1.3398239120070845, "learning_rate": 1.2183432233723263e-06, "loss": 0.6159, "step": 25444 }, { "epoch": 0.7798516611499325, "grad_norm": 1.3794109785425503, "learning_rate": 1.2180185564724106e-06, "loss": 0.6785, "step": 25445 }, { "epoch": 0.7798823096726738, "grad_norm": 1.308748807624058, "learning_rate": 1.2176939268373255e-06, "loss": 0.6045, "step": 25446 }, { "epoch": 0.779912958195415, "grad_norm": 1.3568664469913416, "learning_rate": 1.217369334470272e-06, "loss": 0.5964, "step": 25447 }, { "epoch": 0.7799436067181562, "grad_norm": 0.4437172825486715, "learning_rate": 1.217044779374446e-06, "loss": 0.4039, "step": 25448 }, { "epoch": 0.7799742552408974, "grad_norm": 1.2474526686168315, "learning_rate": 1.2167202615530427e-06, "loss": 0.5593, "step": 25449 }, { "epoch": 0.7800049037636386, "grad_norm": 1.270124699249879, "learning_rate": 1.2163957810092659e-06, "loss": 0.5264, "step": 25450 }, { "epoch": 0.7800355522863798, "grad_norm": 1.3982757653992623, "learning_rate": 1.216071337746309e-06, "loss": 0.502, "step": 25451 }, { "epoch": 0.780066200809121, "grad_norm": 1.4301732452775944, "learning_rate": 1.2157469317673682e-06, "loss": 0.6336, "step": 25452 }, { "epoch": 0.7800968493318622, "grad_norm": 1.2934104978519145, "learning_rate": 1.2154225630756411e-06, "loss": 0.5331, "step": 25453 }, { "epoch": 0.7801274978546034, "grad_norm": 1.351905632104684, "learning_rate": 1.2150982316743236e-06, "loss": 0.6065, "step": 25454 }, { "epoch": 0.7801581463773446, "grad_norm": 1.3001767657999157, "learning_rate": 1.2147739375666134e-06, "loss": 0.6165, "step": 25455 }, { "epoch": 0.7801887949000859, "grad_norm": 1.2833496672279068, "learning_rate": 1.2144496807557027e-06, "loss": 0.5905, "step": 25456 }, { "epoch": 0.780219443422827, "grad_norm": 1.2005975141010556, "learning_rate": 1.2141254612447877e-06, "loss": 0.5113, "step": 25457 }, { "epoch": 0.7802500919455683, "grad_norm": 1.2972604061159523, "learning_rate": 1.2138012790370645e-06, "loss": 0.5903, "step": 25458 }, { "epoch": 0.7802807404683094, "grad_norm": 0.45213850696737157, "learning_rate": 1.2134771341357266e-06, "loss": 0.392, "step": 25459 }, { "epoch": 0.7803113889910507, "grad_norm": 1.3468422939291989, "learning_rate": 1.2131530265439639e-06, "loss": 0.5073, "step": 25460 }, { "epoch": 0.7803420375137918, "grad_norm": 1.289916284170974, "learning_rate": 1.2128289562649765e-06, "loss": 0.6657, "step": 25461 }, { "epoch": 0.7803726860365331, "grad_norm": 1.440377526390541, "learning_rate": 1.2125049233019543e-06, "loss": 0.5413, "step": 25462 }, { "epoch": 0.7804033345592742, "grad_norm": 1.2553256985995613, "learning_rate": 1.2121809276580887e-06, "loss": 0.6397, "step": 25463 }, { "epoch": 0.7804339830820154, "grad_norm": 1.4603100188812534, "learning_rate": 1.2118569693365733e-06, "loss": 0.6169, "step": 25464 }, { "epoch": 0.7804646316047567, "grad_norm": 1.1458214689895332, "learning_rate": 1.2115330483406006e-06, "loss": 0.5024, "step": 25465 }, { "epoch": 0.7804952801274978, "grad_norm": 1.2810491735068443, "learning_rate": 1.2112091646733636e-06, "loss": 0.592, "step": 25466 }, { "epoch": 0.7805259286502391, "grad_norm": 0.4390319879027892, "learning_rate": 1.2108853183380509e-06, "loss": 0.3976, "step": 25467 }, { "epoch": 0.7805565771729802, "grad_norm": 1.2605553488315344, "learning_rate": 1.2105615093378543e-06, "loss": 0.5171, "step": 25468 }, { "epoch": 0.7805872256957215, "grad_norm": 1.3931983525397884, "learning_rate": 1.210237737675966e-06, "loss": 0.5891, "step": 25469 }, { "epoch": 0.7806178742184626, "grad_norm": 1.507319723683885, "learning_rate": 1.209914003355575e-06, "loss": 0.6206, "step": 25470 }, { "epoch": 0.7806485227412039, "grad_norm": 1.3259638606668938, "learning_rate": 1.2095903063798687e-06, "loss": 0.5429, "step": 25471 }, { "epoch": 0.780679171263945, "grad_norm": 1.5144503676499976, "learning_rate": 1.2092666467520415e-06, "loss": 0.6249, "step": 25472 }, { "epoch": 0.7807098197866863, "grad_norm": 1.3415701048290833, "learning_rate": 1.2089430244752782e-06, "loss": 0.5135, "step": 25473 }, { "epoch": 0.7807404683094274, "grad_norm": 1.3426977456404212, "learning_rate": 1.2086194395527712e-06, "loss": 0.5939, "step": 25474 }, { "epoch": 0.7807711168321687, "grad_norm": 0.44365768077029943, "learning_rate": 1.2082958919877052e-06, "loss": 0.3661, "step": 25475 }, { "epoch": 0.7808017653549099, "grad_norm": 1.2630381544719997, "learning_rate": 1.20797238178327e-06, "loss": 0.5845, "step": 25476 }, { "epoch": 0.7808324138776511, "grad_norm": 1.4406028513666824, "learning_rate": 1.2076489089426545e-06, "loss": 0.5863, "step": 25477 }, { "epoch": 0.7808630624003923, "grad_norm": 1.3207556832007996, "learning_rate": 1.2073254734690433e-06, "loss": 0.6393, "step": 25478 }, { "epoch": 0.7808937109231335, "grad_norm": 1.2241796340278515, "learning_rate": 1.2070020753656248e-06, "loss": 0.5762, "step": 25479 }, { "epoch": 0.7809243594458747, "grad_norm": 1.4287333012591181, "learning_rate": 1.2066787146355863e-06, "loss": 0.6539, "step": 25480 }, { "epoch": 0.7809550079686159, "grad_norm": 1.4454623237840916, "learning_rate": 1.2063553912821118e-06, "loss": 0.5676, "step": 25481 }, { "epoch": 0.7809856564913571, "grad_norm": 1.3120001116509923, "learning_rate": 1.2060321053083895e-06, "loss": 0.5651, "step": 25482 }, { "epoch": 0.7810163050140984, "grad_norm": 1.2320906668754397, "learning_rate": 1.2057088567176024e-06, "loss": 0.5208, "step": 25483 }, { "epoch": 0.7810469535368395, "grad_norm": 1.3594358965020994, "learning_rate": 1.2053856455129365e-06, "loss": 0.7095, "step": 25484 }, { "epoch": 0.7810776020595808, "grad_norm": 0.4610703346756579, "learning_rate": 1.2050624716975785e-06, "loss": 0.3871, "step": 25485 }, { "epoch": 0.7811082505823219, "grad_norm": 1.405589973641365, "learning_rate": 1.2047393352747095e-06, "loss": 0.621, "step": 25486 }, { "epoch": 0.7811388991050632, "grad_norm": 1.4577623518604315, "learning_rate": 1.2044162362475148e-06, "loss": 0.561, "step": 25487 }, { "epoch": 0.7811695476278043, "grad_norm": 1.345742678285234, "learning_rate": 1.2040931746191792e-06, "loss": 0.5477, "step": 25488 }, { "epoch": 0.7812001961505456, "grad_norm": 1.255379415976634, "learning_rate": 1.203770150392885e-06, "loss": 0.6105, "step": 25489 }, { "epoch": 0.7812308446732867, "grad_norm": 1.2557548807088754, "learning_rate": 1.2034471635718121e-06, "loss": 0.5522, "step": 25490 }, { "epoch": 0.781261493196028, "grad_norm": 1.240184180934784, "learning_rate": 1.203124214159148e-06, "loss": 0.5153, "step": 25491 }, { "epoch": 0.7812921417187692, "grad_norm": 1.2833672433872483, "learning_rate": 1.202801302158072e-06, "loss": 0.6158, "step": 25492 }, { "epoch": 0.7813227902415104, "grad_norm": 1.1794067649262274, "learning_rate": 1.202478427571767e-06, "loss": 0.5314, "step": 25493 }, { "epoch": 0.7813534387642516, "grad_norm": 1.2569432789166821, "learning_rate": 1.2021555904034127e-06, "loss": 0.6418, "step": 25494 }, { "epoch": 0.7813840872869927, "grad_norm": 0.4822291945946798, "learning_rate": 1.2018327906561911e-06, "loss": 0.3979, "step": 25495 }, { "epoch": 0.781414735809734, "grad_norm": 1.3740359645733298, "learning_rate": 1.2015100283332838e-06, "loss": 0.6972, "step": 25496 }, { "epoch": 0.7814453843324751, "grad_norm": 1.2113383403310876, "learning_rate": 1.201187303437869e-06, "loss": 0.5979, "step": 25497 }, { "epoch": 0.7814760328552164, "grad_norm": 1.3682611075545226, "learning_rate": 1.2008646159731274e-06, "loss": 0.5748, "step": 25498 }, { "epoch": 0.7815066813779575, "grad_norm": 1.4830728808848788, "learning_rate": 1.2005419659422401e-06, "loss": 0.6791, "step": 25499 }, { "epoch": 0.7815373299006988, "grad_norm": 1.3014441207219354, "learning_rate": 1.2002193533483842e-06, "loss": 0.5609, "step": 25500 }, { "epoch": 0.78156797842344, "grad_norm": 1.3968378696194574, "learning_rate": 1.1998967781947385e-06, "loss": 0.6466, "step": 25501 }, { "epoch": 0.7815986269461812, "grad_norm": 1.4206991859774516, "learning_rate": 1.199574240484484e-06, "loss": 0.5523, "step": 25502 }, { "epoch": 0.7816292754689224, "grad_norm": 1.3475989920310365, "learning_rate": 1.1992517402207954e-06, "loss": 0.658, "step": 25503 }, { "epoch": 0.7816599239916636, "grad_norm": 0.46299307708636794, "learning_rate": 1.1989292774068533e-06, "loss": 0.3802, "step": 25504 }, { "epoch": 0.7816905725144048, "grad_norm": 1.16601164139194, "learning_rate": 1.1986068520458322e-06, "loss": 0.6159, "step": 25505 }, { "epoch": 0.781721221037146, "grad_norm": 1.314947222700886, "learning_rate": 1.1982844641409103e-06, "loss": 0.5957, "step": 25506 }, { "epoch": 0.7817518695598872, "grad_norm": 1.2546155564574326, "learning_rate": 1.1979621136952657e-06, "loss": 0.547, "step": 25507 }, { "epoch": 0.7817825180826284, "grad_norm": 1.5278122176733675, "learning_rate": 1.1976398007120715e-06, "loss": 0.5754, "step": 25508 }, { "epoch": 0.7818131666053696, "grad_norm": 0.4331447509517559, "learning_rate": 1.1973175251945058e-06, "loss": 0.4077, "step": 25509 }, { "epoch": 0.7818438151281109, "grad_norm": 1.3947725468323098, "learning_rate": 1.1969952871457442e-06, "loss": 0.5754, "step": 25510 }, { "epoch": 0.781874463650852, "grad_norm": 1.5560087022693774, "learning_rate": 1.1966730865689602e-06, "loss": 0.6229, "step": 25511 }, { "epoch": 0.7819051121735933, "grad_norm": 1.398112402556777, "learning_rate": 1.1963509234673293e-06, "loss": 0.7179, "step": 25512 }, { "epoch": 0.7819357606963344, "grad_norm": 1.1562139070094712, "learning_rate": 1.196028797844027e-06, "loss": 0.6073, "step": 25513 }, { "epoch": 0.7819664092190757, "grad_norm": 1.3600372716485378, "learning_rate": 1.1957067097022252e-06, "loss": 0.6508, "step": 25514 }, { "epoch": 0.7819970577418168, "grad_norm": 1.3497024298402263, "learning_rate": 1.1953846590451002e-06, "loss": 0.5895, "step": 25515 }, { "epoch": 0.7820277062645581, "grad_norm": 1.411306658581348, "learning_rate": 1.1950626458758218e-06, "loss": 0.6668, "step": 25516 }, { "epoch": 0.7820583547872992, "grad_norm": 1.406189398166306, "learning_rate": 1.194740670197565e-06, "loss": 0.6568, "step": 25517 }, { "epoch": 0.7820890033100405, "grad_norm": 1.366809263170413, "learning_rate": 1.1944187320135031e-06, "loss": 0.5965, "step": 25518 }, { "epoch": 0.7821196518327816, "grad_norm": 1.6118414073501792, "learning_rate": 1.1940968313268058e-06, "loss": 0.6094, "step": 25519 }, { "epoch": 0.7821503003555229, "grad_norm": 1.35323708821548, "learning_rate": 1.1937749681406464e-06, "loss": 0.6386, "step": 25520 }, { "epoch": 0.7821809488782641, "grad_norm": 1.4036720877274376, "learning_rate": 1.1934531424581973e-06, "loss": 0.6036, "step": 25521 }, { "epoch": 0.7822115974010053, "grad_norm": 1.204628356639233, "learning_rate": 1.1931313542826268e-06, "loss": 0.6262, "step": 25522 }, { "epoch": 0.7822422459237465, "grad_norm": 1.3948548195581791, "learning_rate": 1.1928096036171072e-06, "loss": 0.5686, "step": 25523 }, { "epoch": 0.7822728944464877, "grad_norm": 1.3798439602016601, "learning_rate": 1.19248789046481e-06, "loss": 0.6242, "step": 25524 }, { "epoch": 0.7823035429692289, "grad_norm": 1.2388622488632883, "learning_rate": 1.1921662148289027e-06, "loss": 0.542, "step": 25525 }, { "epoch": 0.78233419149197, "grad_norm": 1.4403086648688541, "learning_rate": 1.1918445767125575e-06, "loss": 0.6146, "step": 25526 }, { "epoch": 0.7823648400147113, "grad_norm": 1.367633277281206, "learning_rate": 1.19152297611894e-06, "loss": 0.6498, "step": 25527 }, { "epoch": 0.7823954885374524, "grad_norm": 0.42685388535869023, "learning_rate": 1.1912014130512216e-06, "loss": 0.3841, "step": 25528 }, { "epoch": 0.7824261370601937, "grad_norm": 1.4459001693864195, "learning_rate": 1.1908798875125715e-06, "loss": 0.5452, "step": 25529 }, { "epoch": 0.7824567855829349, "grad_norm": 1.4701691537596429, "learning_rate": 1.1905583995061548e-06, "loss": 0.7033, "step": 25530 }, { "epoch": 0.7824874341056761, "grad_norm": 1.4788483822406258, "learning_rate": 1.1902369490351412e-06, "loss": 0.6239, "step": 25531 }, { "epoch": 0.7825180826284173, "grad_norm": 1.4654208225888214, "learning_rate": 1.1899155361026992e-06, "loss": 0.5854, "step": 25532 }, { "epoch": 0.7825487311511585, "grad_norm": 1.5971275155074032, "learning_rate": 1.1895941607119926e-06, "loss": 0.6916, "step": 25533 }, { "epoch": 0.7825793796738997, "grad_norm": 1.545635603119612, "learning_rate": 1.189272822866191e-06, "loss": 0.5952, "step": 25534 }, { "epoch": 0.7826100281966409, "grad_norm": 1.2126418271256352, "learning_rate": 1.1889515225684583e-06, "loss": 0.6216, "step": 25535 }, { "epoch": 0.7826406767193821, "grad_norm": 1.5398455833325595, "learning_rate": 1.1886302598219607e-06, "loss": 0.6111, "step": 25536 }, { "epoch": 0.7826713252421234, "grad_norm": 1.305731547953509, "learning_rate": 1.1883090346298665e-06, "loss": 0.6733, "step": 25537 }, { "epoch": 0.7827019737648645, "grad_norm": 1.406311921171532, "learning_rate": 1.1879878469953366e-06, "loss": 0.6422, "step": 25538 }, { "epoch": 0.7827326222876058, "grad_norm": 1.2016864078627107, "learning_rate": 1.1876666969215384e-06, "loss": 0.5814, "step": 25539 }, { "epoch": 0.7827632708103469, "grad_norm": 1.4302558781286396, "learning_rate": 1.1873455844116366e-06, "loss": 0.5613, "step": 25540 }, { "epoch": 0.7827939193330882, "grad_norm": 1.308875888243569, "learning_rate": 1.1870245094687926e-06, "loss": 0.5917, "step": 25541 }, { "epoch": 0.7828245678558293, "grad_norm": 1.321385262999602, "learning_rate": 1.1867034720961722e-06, "loss": 0.6427, "step": 25542 }, { "epoch": 0.7828552163785706, "grad_norm": 1.3935120910259, "learning_rate": 1.1863824722969396e-06, "loss": 0.6189, "step": 25543 }, { "epoch": 0.7828858649013117, "grad_norm": 1.368980298446613, "learning_rate": 1.1860615100742546e-06, "loss": 0.5919, "step": 25544 }, { "epoch": 0.782916513424053, "grad_norm": 0.45709770160426355, "learning_rate": 1.1857405854312832e-06, "loss": 0.3848, "step": 25545 }, { "epoch": 0.7829471619467941, "grad_norm": 1.4721459493970646, "learning_rate": 1.1854196983711823e-06, "loss": 0.5416, "step": 25546 }, { "epoch": 0.7829778104695354, "grad_norm": 1.2913593177807072, "learning_rate": 1.1850988488971205e-06, "loss": 0.5894, "step": 25547 }, { "epoch": 0.7830084589922766, "grad_norm": 1.3419838916845832, "learning_rate": 1.1847780370122552e-06, "loss": 0.5334, "step": 25548 }, { "epoch": 0.7830391075150178, "grad_norm": 1.5044133769469932, "learning_rate": 1.184457262719747e-06, "loss": 0.6406, "step": 25549 }, { "epoch": 0.783069756037759, "grad_norm": 1.3971565848823304, "learning_rate": 1.1841365260227578e-06, "loss": 0.6215, "step": 25550 }, { "epoch": 0.7831004045605002, "grad_norm": 1.4939089287504441, "learning_rate": 1.1838158269244488e-06, "loss": 0.6, "step": 25551 }, { "epoch": 0.7831310530832414, "grad_norm": 1.286725037789538, "learning_rate": 1.1834951654279775e-06, "loss": 0.6777, "step": 25552 }, { "epoch": 0.7831617016059826, "grad_norm": 1.3614373945887241, "learning_rate": 1.1831745415365054e-06, "loss": 0.4941, "step": 25553 }, { "epoch": 0.7831923501287238, "grad_norm": 1.344701447244795, "learning_rate": 1.1828539552531903e-06, "loss": 0.6264, "step": 25554 }, { "epoch": 0.783222998651465, "grad_norm": 1.3871573929909176, "learning_rate": 1.182533406581194e-06, "loss": 0.5737, "step": 25555 }, { "epoch": 0.7832536471742062, "grad_norm": 0.4463919281045815, "learning_rate": 1.1822128955236722e-06, "loss": 0.3959, "step": 25556 }, { "epoch": 0.7832842956969474, "grad_norm": 1.313248256089466, "learning_rate": 1.1818924220837812e-06, "loss": 0.6388, "step": 25557 }, { "epoch": 0.7833149442196886, "grad_norm": 1.3334034296954456, "learning_rate": 1.1815719862646835e-06, "loss": 0.6017, "step": 25558 }, { "epoch": 0.7833455927424298, "grad_norm": 1.4737425366019619, "learning_rate": 1.1812515880695342e-06, "loss": 0.6555, "step": 25559 }, { "epoch": 0.783376241265171, "grad_norm": 1.5038400027760543, "learning_rate": 1.180931227501489e-06, "loss": 0.6619, "step": 25560 }, { "epoch": 0.7834068897879122, "grad_norm": 1.4895438484640702, "learning_rate": 1.1806109045637048e-06, "loss": 0.588, "step": 25561 }, { "epoch": 0.7834375383106534, "grad_norm": 1.5391431176442827, "learning_rate": 1.1802906192593404e-06, "loss": 0.6078, "step": 25562 }, { "epoch": 0.7834681868333946, "grad_norm": 1.1213284650191069, "learning_rate": 1.1799703715915485e-06, "loss": 0.6143, "step": 25563 }, { "epoch": 0.7834988353561358, "grad_norm": 1.3158509949076203, "learning_rate": 1.179650161563486e-06, "loss": 0.6469, "step": 25564 }, { "epoch": 0.783529483878877, "grad_norm": 1.2544436856466656, "learning_rate": 1.1793299891783078e-06, "loss": 0.5919, "step": 25565 }, { "epoch": 0.7835601324016183, "grad_norm": 1.299479827775646, "learning_rate": 1.1790098544391699e-06, "loss": 0.5632, "step": 25566 }, { "epoch": 0.7835907809243594, "grad_norm": 1.399999222698646, "learning_rate": 1.1786897573492262e-06, "loss": 0.674, "step": 25567 }, { "epoch": 0.7836214294471007, "grad_norm": 1.4022387801743683, "learning_rate": 1.1783696979116265e-06, "loss": 0.5724, "step": 25568 }, { "epoch": 0.7836520779698418, "grad_norm": 1.2783772570283045, "learning_rate": 1.1780496761295312e-06, "loss": 0.6192, "step": 25569 }, { "epoch": 0.7836827264925831, "grad_norm": 1.6201124906553916, "learning_rate": 1.1777296920060905e-06, "loss": 0.6492, "step": 25570 }, { "epoch": 0.7837133750153242, "grad_norm": 1.5869648775320724, "learning_rate": 1.1774097455444554e-06, "loss": 0.6661, "step": 25571 }, { "epoch": 0.7837440235380655, "grad_norm": 0.4459525464713012, "learning_rate": 1.17708983674778e-06, "loss": 0.3801, "step": 25572 }, { "epoch": 0.7837746720608066, "grad_norm": 1.3360025176101082, "learning_rate": 1.1767699656192172e-06, "loss": 0.6491, "step": 25573 }, { "epoch": 0.7838053205835479, "grad_norm": 1.2941793836576903, "learning_rate": 1.1764501321619186e-06, "loss": 0.5841, "step": 25574 }, { "epoch": 0.783835969106289, "grad_norm": 1.5056967357218753, "learning_rate": 1.1761303363790343e-06, "loss": 0.6299, "step": 25575 }, { "epoch": 0.7838666176290303, "grad_norm": 1.6309728657976479, "learning_rate": 1.1758105782737167e-06, "loss": 0.5645, "step": 25576 }, { "epoch": 0.7838972661517715, "grad_norm": 0.45911377186793767, "learning_rate": 1.1754908578491164e-06, "loss": 0.3875, "step": 25577 }, { "epoch": 0.7839279146745127, "grad_norm": 1.2556589722046907, "learning_rate": 1.175171175108384e-06, "loss": 0.5511, "step": 25578 }, { "epoch": 0.7839585631972539, "grad_norm": 1.285318741263088, "learning_rate": 1.1748515300546666e-06, "loss": 0.6099, "step": 25579 }, { "epoch": 0.7839892117199951, "grad_norm": 1.2630400806954538, "learning_rate": 1.174531922691116e-06, "loss": 0.6099, "step": 25580 }, { "epoch": 0.7840198602427363, "grad_norm": 1.2776640155987598, "learning_rate": 1.174212353020881e-06, "loss": 0.7502, "step": 25581 }, { "epoch": 0.7840505087654775, "grad_norm": 1.2587835713291646, "learning_rate": 1.1738928210471124e-06, "loss": 0.5513, "step": 25582 }, { "epoch": 0.7840811572882187, "grad_norm": 0.44609313563317154, "learning_rate": 1.173573326772955e-06, "loss": 0.373, "step": 25583 }, { "epoch": 0.78411180581096, "grad_norm": 1.411525051499168, "learning_rate": 1.173253870201559e-06, "loss": 0.6935, "step": 25584 }, { "epoch": 0.7841424543337011, "grad_norm": 1.2702356864854483, "learning_rate": 1.172934451336073e-06, "loss": 0.5849, "step": 25585 }, { "epoch": 0.7841731028564424, "grad_norm": 1.281959367304843, "learning_rate": 1.172615070179643e-06, "loss": 0.5878, "step": 25586 }, { "epoch": 0.7842037513791835, "grad_norm": 1.3570179800427962, "learning_rate": 1.172295726735413e-06, "loss": 0.6478, "step": 25587 }, { "epoch": 0.7842343999019247, "grad_norm": 0.4481421349655292, "learning_rate": 1.1719764210065354e-06, "loss": 0.4072, "step": 25588 }, { "epoch": 0.7842650484246659, "grad_norm": 1.08020621410536, "learning_rate": 1.1716571529961535e-06, "loss": 0.6198, "step": 25589 }, { "epoch": 0.7842956969474071, "grad_norm": 1.2526248219498133, "learning_rate": 1.1713379227074123e-06, "loss": 0.5976, "step": 25590 }, { "epoch": 0.7843263454701483, "grad_norm": 1.5269593693458958, "learning_rate": 1.1710187301434578e-06, "loss": 0.6597, "step": 25591 }, { "epoch": 0.7843569939928895, "grad_norm": 1.5859298731597624, "learning_rate": 1.1706995753074352e-06, "loss": 0.654, "step": 25592 }, { "epoch": 0.7843876425156308, "grad_norm": 1.3263066984499532, "learning_rate": 1.1703804582024914e-06, "loss": 0.6205, "step": 25593 }, { "epoch": 0.7844182910383719, "grad_norm": 1.3223729646232711, "learning_rate": 1.1700613788317666e-06, "loss": 0.5775, "step": 25594 }, { "epoch": 0.7844489395611132, "grad_norm": 0.44768813178816397, "learning_rate": 1.1697423371984079e-06, "loss": 0.4083, "step": 25595 }, { "epoch": 0.7844795880838543, "grad_norm": 1.345415032460909, "learning_rate": 1.169423333305559e-06, "loss": 0.5584, "step": 25596 }, { "epoch": 0.7845102366065956, "grad_norm": 1.3855271127714457, "learning_rate": 1.1691043671563619e-06, "loss": 0.5978, "step": 25597 }, { "epoch": 0.7845408851293367, "grad_norm": 1.360579428724442, "learning_rate": 1.1687854387539566e-06, "loss": 0.6092, "step": 25598 }, { "epoch": 0.784571533652078, "grad_norm": 1.169562199890126, "learning_rate": 1.1684665481014922e-06, "loss": 0.644, "step": 25599 }, { "epoch": 0.7846021821748191, "grad_norm": 1.2966976891899245, "learning_rate": 1.1681476952021054e-06, "loss": 0.5827, "step": 25600 }, { "epoch": 0.7846328306975604, "grad_norm": 1.3839287541141034, "learning_rate": 1.167828880058941e-06, "loss": 0.6175, "step": 25601 }, { "epoch": 0.7846634792203016, "grad_norm": 1.2994525948212106, "learning_rate": 1.1675101026751378e-06, "loss": 0.636, "step": 25602 }, { "epoch": 0.7846941277430428, "grad_norm": 1.2580686952430973, "learning_rate": 1.1671913630538384e-06, "loss": 0.5806, "step": 25603 }, { "epoch": 0.784724776265784, "grad_norm": 1.257598685456106, "learning_rate": 1.1668726611981846e-06, "loss": 0.6872, "step": 25604 }, { "epoch": 0.7847554247885252, "grad_norm": 1.423523284226896, "learning_rate": 1.1665539971113138e-06, "loss": 0.5576, "step": 25605 }, { "epoch": 0.7847860733112664, "grad_norm": 1.31046863404032, "learning_rate": 1.166235370796367e-06, "loss": 0.6112, "step": 25606 }, { "epoch": 0.7848167218340076, "grad_norm": 1.4657949516680746, "learning_rate": 1.165916782256486e-06, "loss": 0.6877, "step": 25607 }, { "epoch": 0.7848473703567488, "grad_norm": 1.1108584049450612, "learning_rate": 1.165598231494806e-06, "loss": 0.4578, "step": 25608 }, { "epoch": 0.78487801887949, "grad_norm": 0.4380455869461734, "learning_rate": 1.1652797185144677e-06, "loss": 0.3762, "step": 25609 }, { "epoch": 0.7849086674022312, "grad_norm": 1.4248409117435157, "learning_rate": 1.1649612433186108e-06, "loss": 0.5736, "step": 25610 }, { "epoch": 0.7849393159249725, "grad_norm": 1.3468285315211634, "learning_rate": 1.1646428059103709e-06, "loss": 0.6509, "step": 25611 }, { "epoch": 0.7849699644477136, "grad_norm": 1.3722367780465667, "learning_rate": 1.1643244062928881e-06, "loss": 0.6415, "step": 25612 }, { "epoch": 0.7850006129704549, "grad_norm": 1.319482939802121, "learning_rate": 1.1640060444692968e-06, "loss": 0.538, "step": 25613 }, { "epoch": 0.785031261493196, "grad_norm": 1.2993781232778023, "learning_rate": 1.163687720442736e-06, "loss": 0.5521, "step": 25614 }, { "epoch": 0.7850619100159373, "grad_norm": 1.4097767234369354, "learning_rate": 1.1633694342163426e-06, "loss": 0.5857, "step": 25615 }, { "epoch": 0.7850925585386784, "grad_norm": 0.4388698526628268, "learning_rate": 1.1630511857932504e-06, "loss": 0.3723, "step": 25616 }, { "epoch": 0.7851232070614197, "grad_norm": 0.4371765110431401, "learning_rate": 1.1627329751765964e-06, "loss": 0.3886, "step": 25617 }, { "epoch": 0.7851538555841608, "grad_norm": 1.2646405562842444, "learning_rate": 1.1624148023695175e-06, "loss": 0.5311, "step": 25618 }, { "epoch": 0.785184504106902, "grad_norm": 1.267018757025995, "learning_rate": 1.1620966673751466e-06, "loss": 0.603, "step": 25619 }, { "epoch": 0.7852151526296433, "grad_norm": 1.4850565864073824, "learning_rate": 1.1617785701966188e-06, "loss": 0.6962, "step": 25620 }, { "epoch": 0.7852458011523844, "grad_norm": 1.3103397904707208, "learning_rate": 1.1614605108370703e-06, "loss": 0.5573, "step": 25621 }, { "epoch": 0.7852764496751257, "grad_norm": 1.3283274853954041, "learning_rate": 1.1611424892996327e-06, "loss": 0.6132, "step": 25622 }, { "epoch": 0.7853070981978668, "grad_norm": 1.2235160061845436, "learning_rate": 1.1608245055874407e-06, "loss": 0.5615, "step": 25623 }, { "epoch": 0.7853377467206081, "grad_norm": 1.3237888977589187, "learning_rate": 1.1605065597036264e-06, "loss": 0.6895, "step": 25624 }, { "epoch": 0.7853683952433492, "grad_norm": 0.4543664716784566, "learning_rate": 1.1601886516513234e-06, "loss": 0.3886, "step": 25625 }, { "epoch": 0.7853990437660905, "grad_norm": 1.2056353048068003, "learning_rate": 1.159870781433665e-06, "loss": 0.4843, "step": 25626 }, { "epoch": 0.7854296922888316, "grad_norm": 1.2668421782028194, "learning_rate": 1.1595529490537815e-06, "loss": 0.5672, "step": 25627 }, { "epoch": 0.7854603408115729, "grad_norm": 1.2701248162215282, "learning_rate": 1.1592351545148051e-06, "loss": 0.6047, "step": 25628 }, { "epoch": 0.785490989334314, "grad_norm": 1.6270769879325404, "learning_rate": 1.1589173978198687e-06, "loss": 0.6131, "step": 25629 }, { "epoch": 0.7855216378570553, "grad_norm": 1.3096491924317766, "learning_rate": 1.1585996789721004e-06, "loss": 0.6893, "step": 25630 }, { "epoch": 0.7855522863797965, "grad_norm": 1.4079287947877202, "learning_rate": 1.1582819979746347e-06, "loss": 0.5796, "step": 25631 }, { "epoch": 0.7855829349025377, "grad_norm": 1.972478938893751, "learning_rate": 1.157964354830597e-06, "loss": 0.6238, "step": 25632 }, { "epoch": 0.7856135834252789, "grad_norm": 1.3341484729370365, "learning_rate": 1.1576467495431199e-06, "loss": 0.665, "step": 25633 }, { "epoch": 0.7856442319480201, "grad_norm": 0.4661876434785316, "learning_rate": 1.1573291821153338e-06, "loss": 0.4033, "step": 25634 }, { "epoch": 0.7856748804707613, "grad_norm": 1.4109689693217276, "learning_rate": 1.157011652550365e-06, "loss": 0.5687, "step": 25635 }, { "epoch": 0.7857055289935025, "grad_norm": 1.3806421729996474, "learning_rate": 1.1566941608513438e-06, "loss": 0.6168, "step": 25636 }, { "epoch": 0.7857361775162437, "grad_norm": 1.2396652410248443, "learning_rate": 1.1563767070214e-06, "loss": 0.61, "step": 25637 }, { "epoch": 0.785766826038985, "grad_norm": 1.4149052445086086, "learning_rate": 1.1560592910636582e-06, "loss": 0.5637, "step": 25638 }, { "epoch": 0.7857974745617261, "grad_norm": 1.3757779008720126, "learning_rate": 1.155741912981248e-06, "loss": 0.6974, "step": 25639 }, { "epoch": 0.7858281230844674, "grad_norm": 1.3740924019699412, "learning_rate": 1.1554245727772978e-06, "loss": 0.5578, "step": 25640 }, { "epoch": 0.7858587716072085, "grad_norm": 1.2791528496910303, "learning_rate": 1.1551072704549309e-06, "loss": 0.5831, "step": 25641 }, { "epoch": 0.7858894201299498, "grad_norm": 1.2960409724466218, "learning_rate": 1.1547900060172779e-06, "loss": 0.6251, "step": 25642 }, { "epoch": 0.7859200686526909, "grad_norm": 1.35723740569468, "learning_rate": 1.154472779467461e-06, "loss": 0.613, "step": 25643 }, { "epoch": 0.7859507171754322, "grad_norm": 1.2775341514170948, "learning_rate": 1.1541555908086077e-06, "loss": 0.5557, "step": 25644 }, { "epoch": 0.7859813656981733, "grad_norm": 1.3274646749889916, "learning_rate": 1.1538384400438451e-06, "loss": 0.6242, "step": 25645 }, { "epoch": 0.7860120142209146, "grad_norm": 1.5141395051586848, "learning_rate": 1.153521327176295e-06, "loss": 0.5991, "step": 25646 }, { "epoch": 0.7860426627436558, "grad_norm": 1.3209819436796448, "learning_rate": 1.153204252209083e-06, "loss": 0.6243, "step": 25647 }, { "epoch": 0.786073311266397, "grad_norm": 1.3221852720344496, "learning_rate": 1.1528872151453357e-06, "loss": 0.6612, "step": 25648 }, { "epoch": 0.7861039597891382, "grad_norm": 1.3143723832383873, "learning_rate": 1.1525702159881735e-06, "loss": 0.6484, "step": 25649 }, { "epoch": 0.7861346083118793, "grad_norm": 1.671801114577551, "learning_rate": 1.1522532547407212e-06, "loss": 0.6909, "step": 25650 }, { "epoch": 0.7861652568346206, "grad_norm": 1.8770764116747085, "learning_rate": 1.1519363314061033e-06, "loss": 0.628, "step": 25651 }, { "epoch": 0.7861959053573617, "grad_norm": 1.3679330426073861, "learning_rate": 1.1516194459874403e-06, "loss": 0.6182, "step": 25652 }, { "epoch": 0.786226553880103, "grad_norm": 1.2541005454271972, "learning_rate": 1.1513025984878567e-06, "loss": 0.551, "step": 25653 }, { "epoch": 0.7862572024028441, "grad_norm": 1.4920635757937055, "learning_rate": 1.1509857889104704e-06, "loss": 0.7178, "step": 25654 }, { "epoch": 0.7862878509255854, "grad_norm": 1.3138551709625614, "learning_rate": 1.150669017258409e-06, "loss": 0.5995, "step": 25655 }, { "epoch": 0.7863184994483265, "grad_norm": 1.5607545336274622, "learning_rate": 1.1503522835347908e-06, "loss": 0.5371, "step": 25656 }, { "epoch": 0.7863491479710678, "grad_norm": 1.3997070185077654, "learning_rate": 1.1500355877427348e-06, "loss": 0.5847, "step": 25657 }, { "epoch": 0.786379796493809, "grad_norm": 1.3245109419778032, "learning_rate": 1.1497189298853634e-06, "loss": 0.6267, "step": 25658 }, { "epoch": 0.7864104450165502, "grad_norm": 1.3292561669198464, "learning_rate": 1.1494023099657975e-06, "loss": 0.6559, "step": 25659 }, { "epoch": 0.7864410935392914, "grad_norm": 1.368025413934697, "learning_rate": 1.1490857279871548e-06, "loss": 0.56, "step": 25660 }, { "epoch": 0.7864717420620326, "grad_norm": 0.43777887133329935, "learning_rate": 1.1487691839525561e-06, "loss": 0.3832, "step": 25661 }, { "epoch": 0.7865023905847738, "grad_norm": 1.2705210041977724, "learning_rate": 1.1484526778651195e-06, "loss": 0.6206, "step": 25662 }, { "epoch": 0.786533039107515, "grad_norm": 1.2156594943153392, "learning_rate": 1.1481362097279653e-06, "loss": 0.576, "step": 25663 }, { "epoch": 0.7865636876302562, "grad_norm": 1.2174941981595264, "learning_rate": 1.147819779544211e-06, "loss": 0.5878, "step": 25664 }, { "epoch": 0.7865943361529975, "grad_norm": 1.15214862110022, "learning_rate": 1.1475033873169728e-06, "loss": 0.5042, "step": 25665 }, { "epoch": 0.7866249846757386, "grad_norm": 1.4686684077060468, "learning_rate": 1.147187033049369e-06, "loss": 0.6932, "step": 25666 }, { "epoch": 0.7866556331984799, "grad_norm": 1.3522960490327074, "learning_rate": 1.1468707167445187e-06, "loss": 0.6566, "step": 25667 }, { "epoch": 0.786686281721221, "grad_norm": 0.4272542134370332, "learning_rate": 1.1465544384055355e-06, "loss": 0.3867, "step": 25668 }, { "epoch": 0.7867169302439623, "grad_norm": 1.3839651006139189, "learning_rate": 1.1462381980355381e-06, "loss": 0.5617, "step": 25669 }, { "epoch": 0.7867475787667034, "grad_norm": 1.4877440549963277, "learning_rate": 1.1459219956376421e-06, "loss": 0.6481, "step": 25670 }, { "epoch": 0.7867782272894447, "grad_norm": 1.2772837492955538, "learning_rate": 1.145605831214962e-06, "loss": 0.6052, "step": 25671 }, { "epoch": 0.7868088758121858, "grad_norm": 1.2404926738395534, "learning_rate": 1.145289704770614e-06, "loss": 0.564, "step": 25672 }, { "epoch": 0.7868395243349271, "grad_norm": 1.6567598322008679, "learning_rate": 1.1449736163077125e-06, "loss": 0.7125, "step": 25673 }, { "epoch": 0.7868701728576682, "grad_norm": 1.3132069089492253, "learning_rate": 1.144657565829374e-06, "loss": 0.6072, "step": 25674 }, { "epoch": 0.7869008213804095, "grad_norm": 1.5056209284584168, "learning_rate": 1.1443415533387103e-06, "loss": 0.6666, "step": 25675 }, { "epoch": 0.7869314699031507, "grad_norm": 1.510215031500224, "learning_rate": 1.144025578838835e-06, "loss": 0.6426, "step": 25676 }, { "epoch": 0.7869621184258919, "grad_norm": 1.238913345749384, "learning_rate": 1.143709642332862e-06, "loss": 0.5901, "step": 25677 }, { "epoch": 0.7869927669486331, "grad_norm": 1.1959861816162463, "learning_rate": 1.1433937438239062e-06, "loss": 0.5544, "step": 25678 }, { "epoch": 0.7870234154713743, "grad_norm": 1.3894163244734177, "learning_rate": 1.1430778833150768e-06, "loss": 0.6058, "step": 25679 }, { "epoch": 0.7870540639941155, "grad_norm": 1.3627395515677192, "learning_rate": 1.1427620608094881e-06, "loss": 0.6333, "step": 25680 }, { "epoch": 0.7870847125168566, "grad_norm": 1.2110931183612716, "learning_rate": 1.1424462763102517e-06, "loss": 0.6883, "step": 25681 }, { "epoch": 0.7871153610395979, "grad_norm": 1.2807805568122896, "learning_rate": 1.1421305298204805e-06, "loss": 0.5534, "step": 25682 }, { "epoch": 0.787146009562339, "grad_norm": 1.5258574459595278, "learning_rate": 1.1418148213432846e-06, "loss": 0.5674, "step": 25683 }, { "epoch": 0.7871766580850803, "grad_norm": 1.2566769594106513, "learning_rate": 1.1414991508817713e-06, "loss": 0.6442, "step": 25684 }, { "epoch": 0.7872073066078215, "grad_norm": 1.398065839978651, "learning_rate": 1.1411835184390569e-06, "loss": 0.6198, "step": 25685 }, { "epoch": 0.7872379551305627, "grad_norm": 1.4081073687003138, "learning_rate": 1.1408679240182485e-06, "loss": 0.5357, "step": 25686 }, { "epoch": 0.7872686036533039, "grad_norm": 1.3479922202060572, "learning_rate": 1.1405523676224551e-06, "loss": 0.5389, "step": 25687 }, { "epoch": 0.7872992521760451, "grad_norm": 1.4213553622542514, "learning_rate": 1.140236849254786e-06, "loss": 0.688, "step": 25688 }, { "epoch": 0.7873299006987863, "grad_norm": 1.3403246533822446, "learning_rate": 1.1399213689183509e-06, "loss": 0.532, "step": 25689 }, { "epoch": 0.7873605492215275, "grad_norm": 1.4237471750345505, "learning_rate": 1.1396059266162596e-06, "loss": 0.7268, "step": 25690 }, { "epoch": 0.7873911977442687, "grad_norm": 0.46787672484018356, "learning_rate": 1.1392905223516175e-06, "loss": 0.413, "step": 25691 }, { "epoch": 0.78742184626701, "grad_norm": 1.2514823860590285, "learning_rate": 1.1389751561275336e-06, "loss": 0.5726, "step": 25692 }, { "epoch": 0.7874524947897511, "grad_norm": 1.519991653021038, "learning_rate": 1.1386598279471174e-06, "loss": 0.6918, "step": 25693 }, { "epoch": 0.7874831433124924, "grad_norm": 1.4508418074497338, "learning_rate": 1.1383445378134734e-06, "loss": 0.6021, "step": 25694 }, { "epoch": 0.7875137918352335, "grad_norm": 1.2740702493449187, "learning_rate": 1.1380292857297053e-06, "loss": 0.6328, "step": 25695 }, { "epoch": 0.7875444403579748, "grad_norm": 0.44873974886036705, "learning_rate": 1.1377140716989265e-06, "loss": 0.3985, "step": 25696 }, { "epoch": 0.7875750888807159, "grad_norm": 1.3259886423729843, "learning_rate": 1.1373988957242388e-06, "loss": 0.6536, "step": 25697 }, { "epoch": 0.7876057374034572, "grad_norm": 1.3717682909048095, "learning_rate": 1.1370837578087468e-06, "loss": 0.5254, "step": 25698 }, { "epoch": 0.7876363859261983, "grad_norm": 1.3301098091641683, "learning_rate": 1.1367686579555565e-06, "loss": 0.6192, "step": 25699 }, { "epoch": 0.7876670344489396, "grad_norm": 1.3781804508631368, "learning_rate": 1.1364535961677736e-06, "loss": 0.4857, "step": 25700 }, { "epoch": 0.7876976829716807, "grad_norm": 1.313907868368266, "learning_rate": 1.136138572448503e-06, "loss": 0.6316, "step": 25701 }, { "epoch": 0.787728331494422, "grad_norm": 1.319620963504209, "learning_rate": 1.1358235868008466e-06, "loss": 0.6319, "step": 25702 }, { "epoch": 0.7877589800171632, "grad_norm": 1.4190432952340366, "learning_rate": 1.1355086392279085e-06, "loss": 0.5865, "step": 25703 }, { "epoch": 0.7877896285399044, "grad_norm": 1.3880788873652927, "learning_rate": 1.1351937297327942e-06, "loss": 0.5852, "step": 25704 }, { "epoch": 0.7878202770626456, "grad_norm": 1.3918306504909594, "learning_rate": 1.1348788583186054e-06, "loss": 0.6367, "step": 25705 }, { "epoch": 0.7878509255853868, "grad_norm": 1.3895522066930295, "learning_rate": 1.134564024988441e-06, "loss": 0.6073, "step": 25706 }, { "epoch": 0.787881574108128, "grad_norm": 1.3641485087781993, "learning_rate": 1.134249229745409e-06, "loss": 0.6412, "step": 25707 }, { "epoch": 0.7879122226308692, "grad_norm": 1.4239123103278422, "learning_rate": 1.133934472592607e-06, "loss": 0.6085, "step": 25708 }, { "epoch": 0.7879428711536104, "grad_norm": 1.3730289117625016, "learning_rate": 1.1336197535331395e-06, "loss": 0.6199, "step": 25709 }, { "epoch": 0.7879735196763517, "grad_norm": 1.1351989401147082, "learning_rate": 1.133305072570104e-06, "loss": 0.6239, "step": 25710 }, { "epoch": 0.7880041681990928, "grad_norm": 1.377320297821452, "learning_rate": 1.132990429706603e-06, "loss": 0.5349, "step": 25711 }, { "epoch": 0.788034816721834, "grad_norm": 1.2731280595796082, "learning_rate": 1.1326758249457387e-06, "loss": 0.6289, "step": 25712 }, { "epoch": 0.7880654652445752, "grad_norm": 1.442671957450259, "learning_rate": 1.1323612582906069e-06, "loss": 0.5762, "step": 25713 }, { "epoch": 0.7880961137673164, "grad_norm": 1.1445584210024689, "learning_rate": 1.1320467297443094e-06, "loss": 0.492, "step": 25714 }, { "epoch": 0.7881267622900576, "grad_norm": 1.3782431646689568, "learning_rate": 1.1317322393099468e-06, "loss": 0.5779, "step": 25715 }, { "epoch": 0.7881574108127988, "grad_norm": 1.2343250825665584, "learning_rate": 1.1314177869906163e-06, "loss": 0.6539, "step": 25716 }, { "epoch": 0.78818805933554, "grad_norm": 1.3162050311032565, "learning_rate": 1.1311033727894144e-06, "loss": 0.5927, "step": 25717 }, { "epoch": 0.7882187078582812, "grad_norm": 1.3521218099009298, "learning_rate": 1.130788996709441e-06, "loss": 0.5979, "step": 25718 }, { "epoch": 0.7882493563810224, "grad_norm": 1.2947565519736701, "learning_rate": 1.1304746587537935e-06, "loss": 0.5771, "step": 25719 }, { "epoch": 0.7882800049037636, "grad_norm": 1.236306168303927, "learning_rate": 1.1301603589255705e-06, "loss": 0.553, "step": 25720 }, { "epoch": 0.7883106534265049, "grad_norm": 1.3565841365041198, "learning_rate": 1.1298460972278663e-06, "loss": 0.5414, "step": 25721 }, { "epoch": 0.788341301949246, "grad_norm": 1.237674861247166, "learning_rate": 1.129531873663779e-06, "loss": 0.5765, "step": 25722 }, { "epoch": 0.7883719504719873, "grad_norm": 0.46075992415767586, "learning_rate": 1.129217688236406e-06, "loss": 0.3982, "step": 25723 }, { "epoch": 0.7884025989947284, "grad_norm": 1.3625362877795058, "learning_rate": 1.1289035409488391e-06, "loss": 0.4616, "step": 25724 }, { "epoch": 0.7884332475174697, "grad_norm": 0.452565904385474, "learning_rate": 1.1285894318041769e-06, "loss": 0.4121, "step": 25725 }, { "epoch": 0.7884638960402108, "grad_norm": 1.2208324474164973, "learning_rate": 1.1282753608055152e-06, "loss": 0.6054, "step": 25726 }, { "epoch": 0.7884945445629521, "grad_norm": 1.2402940963936526, "learning_rate": 1.127961327955945e-06, "loss": 0.5668, "step": 25727 }, { "epoch": 0.7885251930856932, "grad_norm": 1.3192455755026562, "learning_rate": 1.127647333258564e-06, "loss": 0.4852, "step": 25728 }, { "epoch": 0.7885558416084345, "grad_norm": 1.3247544441250505, "learning_rate": 1.1273333767164634e-06, "loss": 0.5756, "step": 25729 }, { "epoch": 0.7885864901311757, "grad_norm": 1.4917822006535697, "learning_rate": 1.127019458332738e-06, "loss": 0.6178, "step": 25730 }, { "epoch": 0.7886171386539169, "grad_norm": 1.2974013824523105, "learning_rate": 1.126705578110482e-06, "loss": 0.6804, "step": 25731 }, { "epoch": 0.7886477871766581, "grad_norm": 1.5314575429375388, "learning_rate": 1.126391736052786e-06, "loss": 0.6169, "step": 25732 }, { "epoch": 0.7886784356993993, "grad_norm": 1.4325338950751576, "learning_rate": 1.1260779321627429e-06, "loss": 0.6223, "step": 25733 }, { "epoch": 0.7887090842221405, "grad_norm": 1.3882289249199837, "learning_rate": 1.1257641664434466e-06, "loss": 0.7299, "step": 25734 }, { "epoch": 0.7887397327448817, "grad_norm": 1.3975443423314506, "learning_rate": 1.1254504388979859e-06, "loss": 0.6637, "step": 25735 }, { "epoch": 0.7887703812676229, "grad_norm": 1.4500127293031888, "learning_rate": 1.125136749529453e-06, "loss": 0.6187, "step": 25736 }, { "epoch": 0.7888010297903641, "grad_norm": 1.3510324264679263, "learning_rate": 1.1248230983409409e-06, "loss": 0.563, "step": 25737 }, { "epoch": 0.7888316783131053, "grad_norm": 1.3303181268066995, "learning_rate": 1.124509485335537e-06, "loss": 0.5997, "step": 25738 }, { "epoch": 0.7888623268358466, "grad_norm": 1.4610204130446995, "learning_rate": 1.124195910516334e-06, "loss": 0.5564, "step": 25739 }, { "epoch": 0.7888929753585877, "grad_norm": 1.198158650277385, "learning_rate": 1.123882373886419e-06, "loss": 0.5968, "step": 25740 }, { "epoch": 0.788923623881329, "grad_norm": 0.46223202961498105, "learning_rate": 1.1235688754488828e-06, "loss": 0.397, "step": 25741 }, { "epoch": 0.7889542724040701, "grad_norm": 1.6410875845395292, "learning_rate": 1.1232554152068154e-06, "loss": 0.6012, "step": 25742 }, { "epoch": 0.7889849209268113, "grad_norm": 0.44979825725289546, "learning_rate": 1.122941993163303e-06, "loss": 0.3825, "step": 25743 }, { "epoch": 0.7890155694495525, "grad_norm": 1.2515761230226394, "learning_rate": 1.122628609321435e-06, "loss": 0.6421, "step": 25744 }, { "epoch": 0.7890462179722937, "grad_norm": 0.46087666497799934, "learning_rate": 1.1223152636843016e-06, "loss": 0.4175, "step": 25745 }, { "epoch": 0.789076866495035, "grad_norm": 1.3363344096187713, "learning_rate": 1.1220019562549856e-06, "loss": 0.5834, "step": 25746 }, { "epoch": 0.7891075150177761, "grad_norm": 1.5600295501913406, "learning_rate": 1.1216886870365774e-06, "loss": 0.6461, "step": 25747 }, { "epoch": 0.7891381635405174, "grad_norm": 1.2206958616919787, "learning_rate": 1.1213754560321638e-06, "loss": 0.6084, "step": 25748 }, { "epoch": 0.7891688120632585, "grad_norm": 1.3482756646550826, "learning_rate": 1.1210622632448287e-06, "loss": 0.5355, "step": 25749 }, { "epoch": 0.7891994605859998, "grad_norm": 1.512300300237616, "learning_rate": 1.1207491086776613e-06, "loss": 0.5249, "step": 25750 }, { "epoch": 0.7892301091087409, "grad_norm": 1.3610514816894423, "learning_rate": 1.1204359923337437e-06, "loss": 0.5741, "step": 25751 }, { "epoch": 0.7892607576314822, "grad_norm": 0.44728593534330324, "learning_rate": 1.1201229142161634e-06, "loss": 0.3962, "step": 25752 }, { "epoch": 0.7892914061542233, "grad_norm": 1.4195503770206575, "learning_rate": 1.1198098743280056e-06, "loss": 0.5903, "step": 25753 }, { "epoch": 0.7893220546769646, "grad_norm": 1.4003484356628522, "learning_rate": 1.1194968726723533e-06, "loss": 0.6157, "step": 25754 }, { "epoch": 0.7893527031997057, "grad_norm": 1.4626719109533104, "learning_rate": 1.1191839092522904e-06, "loss": 0.6477, "step": 25755 }, { "epoch": 0.789383351722447, "grad_norm": 1.2068103357223585, "learning_rate": 1.1188709840709028e-06, "loss": 0.4677, "step": 25756 }, { "epoch": 0.7894140002451882, "grad_norm": 1.3600077874389618, "learning_rate": 1.1185580971312716e-06, "loss": 0.5985, "step": 25757 }, { "epoch": 0.7894446487679294, "grad_norm": 1.3518495350281274, "learning_rate": 1.1182452484364803e-06, "loss": 0.6463, "step": 25758 }, { "epoch": 0.7894752972906706, "grad_norm": 1.2809963738680092, "learning_rate": 1.117932437989613e-06, "loss": 0.6327, "step": 25759 }, { "epoch": 0.7895059458134118, "grad_norm": 1.351852473535883, "learning_rate": 1.1176196657937495e-06, "loss": 0.6436, "step": 25760 }, { "epoch": 0.789536594336153, "grad_norm": 1.5237925863638402, "learning_rate": 1.117306931851974e-06, "loss": 0.5966, "step": 25761 }, { "epoch": 0.7895672428588942, "grad_norm": 1.4640133458174163, "learning_rate": 1.1169942361673651e-06, "loss": 0.6014, "step": 25762 }, { "epoch": 0.7895978913816354, "grad_norm": 1.384221007049684, "learning_rate": 1.1166815787430062e-06, "loss": 0.5836, "step": 25763 }, { "epoch": 0.7896285399043766, "grad_norm": 1.1061030901858235, "learning_rate": 1.1163689595819783e-06, "loss": 0.5834, "step": 25764 }, { "epoch": 0.7896591884271178, "grad_norm": 0.4583451214984145, "learning_rate": 1.1160563786873591e-06, "loss": 0.3999, "step": 25765 }, { "epoch": 0.7896898369498591, "grad_norm": 1.2569406404696932, "learning_rate": 1.1157438360622302e-06, "loss": 0.5329, "step": 25766 }, { "epoch": 0.7897204854726002, "grad_norm": 1.329876110324474, "learning_rate": 1.1154313317096726e-06, "loss": 0.5783, "step": 25767 }, { "epoch": 0.7897511339953415, "grad_norm": 1.450184779143599, "learning_rate": 1.1151188656327627e-06, "loss": 0.6641, "step": 25768 }, { "epoch": 0.7897817825180826, "grad_norm": 1.4903742251561616, "learning_rate": 1.1148064378345825e-06, "loss": 0.6368, "step": 25769 }, { "epoch": 0.7898124310408239, "grad_norm": 0.43454656725730734, "learning_rate": 1.1144940483182064e-06, "loss": 0.3967, "step": 25770 }, { "epoch": 0.789843079563565, "grad_norm": 1.3425169089589946, "learning_rate": 1.1141816970867148e-06, "loss": 0.5946, "step": 25771 }, { "epoch": 0.7898737280863063, "grad_norm": 1.5333673014290436, "learning_rate": 1.1138693841431864e-06, "loss": 0.6582, "step": 25772 }, { "epoch": 0.7899043766090474, "grad_norm": 1.4096893990558552, "learning_rate": 1.1135571094906961e-06, "loss": 0.6674, "step": 25773 }, { "epoch": 0.7899350251317886, "grad_norm": 1.2212830303050697, "learning_rate": 1.113244873132322e-06, "loss": 0.6038, "step": 25774 }, { "epoch": 0.7899656736545299, "grad_norm": 1.8415215138499796, "learning_rate": 1.1129326750711417e-06, "loss": 0.6483, "step": 25775 }, { "epoch": 0.789996322177271, "grad_norm": 1.3306742984137017, "learning_rate": 1.1126205153102293e-06, "loss": 0.5597, "step": 25776 }, { "epoch": 0.7900269707000123, "grad_norm": 1.4100430358487324, "learning_rate": 1.1123083938526612e-06, "loss": 0.6167, "step": 25777 }, { "epoch": 0.7900576192227534, "grad_norm": 1.5239311350784726, "learning_rate": 1.1119963107015153e-06, "loss": 0.6215, "step": 25778 }, { "epoch": 0.7900882677454947, "grad_norm": 0.434964905999736, "learning_rate": 1.1116842658598626e-06, "loss": 0.3702, "step": 25779 }, { "epoch": 0.7901189162682358, "grad_norm": 1.2312772254936886, "learning_rate": 1.1113722593307813e-06, "loss": 0.6031, "step": 25780 }, { "epoch": 0.7901495647909771, "grad_norm": 0.45931435409766636, "learning_rate": 1.1110602911173413e-06, "loss": 0.3835, "step": 25781 }, { "epoch": 0.7901802133137182, "grad_norm": 1.4505478455741254, "learning_rate": 1.1107483612226216e-06, "loss": 0.5967, "step": 25782 }, { "epoch": 0.7902108618364595, "grad_norm": 1.3239473402107127, "learning_rate": 1.1104364696496938e-06, "loss": 0.5672, "step": 25783 }, { "epoch": 0.7902415103592006, "grad_norm": 1.380111210166379, "learning_rate": 1.1101246164016295e-06, "loss": 0.6636, "step": 25784 }, { "epoch": 0.7902721588819419, "grad_norm": 1.206871195027227, "learning_rate": 1.109812801481502e-06, "loss": 0.4846, "step": 25785 }, { "epoch": 0.7903028074046831, "grad_norm": 1.2879980094903567, "learning_rate": 1.1095010248923859e-06, "loss": 0.6015, "step": 25786 }, { "epoch": 0.7903334559274243, "grad_norm": 1.4420533095114516, "learning_rate": 1.1091892866373506e-06, "loss": 0.5962, "step": 25787 }, { "epoch": 0.7903641044501655, "grad_norm": 1.266605159655363, "learning_rate": 1.1088775867194684e-06, "loss": 0.5944, "step": 25788 }, { "epoch": 0.7903947529729067, "grad_norm": 1.3815353528721581, "learning_rate": 1.1085659251418113e-06, "loss": 0.6535, "step": 25789 }, { "epoch": 0.7904254014956479, "grad_norm": 1.511472275121162, "learning_rate": 1.108254301907451e-06, "loss": 0.7092, "step": 25790 }, { "epoch": 0.7904560500183891, "grad_norm": 1.3047018261988261, "learning_rate": 1.1079427170194568e-06, "loss": 0.5425, "step": 25791 }, { "epoch": 0.7904866985411303, "grad_norm": 1.4610745548959425, "learning_rate": 1.1076311704808957e-06, "loss": 0.6486, "step": 25792 }, { "epoch": 0.7905173470638716, "grad_norm": 1.2882883930398914, "learning_rate": 1.107319662294844e-06, "loss": 0.6051, "step": 25793 }, { "epoch": 0.7905479955866127, "grad_norm": 1.2361629364170017, "learning_rate": 1.1070081924643672e-06, "loss": 0.5613, "step": 25794 }, { "epoch": 0.790578644109354, "grad_norm": 1.2319978934277587, "learning_rate": 1.1066967609925333e-06, "loss": 0.6577, "step": 25795 }, { "epoch": 0.7906092926320951, "grad_norm": 1.2668293903103303, "learning_rate": 1.1063853678824127e-06, "loss": 0.5624, "step": 25796 }, { "epoch": 0.7906399411548364, "grad_norm": 1.3144328637109148, "learning_rate": 1.1060740131370744e-06, "loss": 0.6597, "step": 25797 }, { "epoch": 0.7906705896775775, "grad_norm": 1.469619322084336, "learning_rate": 1.105762696759584e-06, "loss": 0.6445, "step": 25798 }, { "epoch": 0.7907012382003188, "grad_norm": 1.6170297378303902, "learning_rate": 1.1054514187530102e-06, "loss": 0.6335, "step": 25799 }, { "epoch": 0.7907318867230599, "grad_norm": 1.4927734386449973, "learning_rate": 1.1051401791204197e-06, "loss": 0.6602, "step": 25800 }, { "epoch": 0.7907625352458012, "grad_norm": 1.4787984028462442, "learning_rate": 1.1048289778648814e-06, "loss": 0.5785, "step": 25801 }, { "epoch": 0.7907931837685424, "grad_norm": 1.3545612406090097, "learning_rate": 1.1045178149894592e-06, "loss": 0.6779, "step": 25802 }, { "epoch": 0.7908238322912836, "grad_norm": 1.381078365981946, "learning_rate": 1.104206690497217e-06, "loss": 0.544, "step": 25803 }, { "epoch": 0.7908544808140248, "grad_norm": 1.3926841608694664, "learning_rate": 1.1038956043912264e-06, "loss": 0.5858, "step": 25804 }, { "epoch": 0.7908851293367659, "grad_norm": 0.44898736203801454, "learning_rate": 1.103584556674549e-06, "loss": 0.395, "step": 25805 }, { "epoch": 0.7909157778595072, "grad_norm": 1.35565596786305, "learning_rate": 1.103273547350248e-06, "loss": 0.5361, "step": 25806 }, { "epoch": 0.7909464263822483, "grad_norm": 1.4036574349656543, "learning_rate": 1.1029625764213903e-06, "loss": 0.6162, "step": 25807 }, { "epoch": 0.7909770749049896, "grad_norm": 0.4540369579836991, "learning_rate": 1.1026516438910396e-06, "loss": 0.3898, "step": 25808 }, { "epoch": 0.7910077234277307, "grad_norm": 1.2079472854031974, "learning_rate": 1.1023407497622601e-06, "loss": 0.5821, "step": 25809 }, { "epoch": 0.791038371950472, "grad_norm": 1.4198369209041695, "learning_rate": 1.1020298940381135e-06, "loss": 0.5576, "step": 25810 }, { "epoch": 0.7910690204732131, "grad_norm": 1.5159047394024179, "learning_rate": 1.101719076721664e-06, "loss": 0.6198, "step": 25811 }, { "epoch": 0.7910996689959544, "grad_norm": 1.1827017161926547, "learning_rate": 1.101408297815975e-06, "loss": 0.5738, "step": 25812 }, { "epoch": 0.7911303175186956, "grad_norm": 0.4610535094866003, "learning_rate": 1.1010975573241073e-06, "loss": 0.4007, "step": 25813 }, { "epoch": 0.7911609660414368, "grad_norm": 1.1453554554221512, "learning_rate": 1.1007868552491219e-06, "loss": 0.5249, "step": 25814 }, { "epoch": 0.791191614564178, "grad_norm": 1.2054122230913165, "learning_rate": 1.100476191594081e-06, "loss": 0.5139, "step": 25815 }, { "epoch": 0.7912222630869192, "grad_norm": 1.2385735582605106, "learning_rate": 1.1001655663620463e-06, "loss": 0.6329, "step": 25816 }, { "epoch": 0.7912529116096604, "grad_norm": 1.3722871902139426, "learning_rate": 1.0998549795560791e-06, "loss": 0.6487, "step": 25817 }, { "epoch": 0.7912835601324016, "grad_norm": 1.2461407073486668, "learning_rate": 1.0995444311792374e-06, "loss": 0.5988, "step": 25818 }, { "epoch": 0.7913142086551428, "grad_norm": 1.4277889313092573, "learning_rate": 1.0992339212345827e-06, "loss": 0.552, "step": 25819 }, { "epoch": 0.791344857177884, "grad_norm": 1.393218124474093, "learning_rate": 1.098923449725175e-06, "loss": 0.5991, "step": 25820 }, { "epoch": 0.7913755057006252, "grad_norm": 0.4625944460371546, "learning_rate": 1.098613016654073e-06, "loss": 0.3867, "step": 25821 }, { "epoch": 0.7914061542233665, "grad_norm": 1.3570896602013889, "learning_rate": 1.0983026220243326e-06, "loss": 0.5727, "step": 25822 }, { "epoch": 0.7914368027461076, "grad_norm": 1.3404613652648218, "learning_rate": 1.097992265839017e-06, "loss": 0.5994, "step": 25823 }, { "epoch": 0.7914674512688489, "grad_norm": 1.417925911163351, "learning_rate": 1.0976819481011824e-06, "loss": 0.6226, "step": 25824 }, { "epoch": 0.79149809979159, "grad_norm": 0.4418459801722978, "learning_rate": 1.0973716688138847e-06, "loss": 0.3799, "step": 25825 }, { "epoch": 0.7915287483143313, "grad_norm": 1.5148473546938244, "learning_rate": 1.0970614279801823e-06, "loss": 0.5987, "step": 25826 }, { "epoch": 0.7915593968370724, "grad_norm": 1.2864879598875645, "learning_rate": 1.0967512256031322e-06, "loss": 0.6024, "step": 25827 }, { "epoch": 0.7915900453598137, "grad_norm": 1.404184519438241, "learning_rate": 1.0964410616857924e-06, "loss": 0.5991, "step": 25828 }, { "epoch": 0.7916206938825548, "grad_norm": 1.337317484931882, "learning_rate": 1.0961309362312162e-06, "loss": 0.6594, "step": 25829 }, { "epoch": 0.7916513424052961, "grad_norm": 1.1836709648940094, "learning_rate": 1.0958208492424605e-06, "loss": 0.5385, "step": 25830 }, { "epoch": 0.7916819909280373, "grad_norm": 1.3474113364784783, "learning_rate": 1.0955108007225828e-06, "loss": 0.6038, "step": 25831 }, { "epoch": 0.7917126394507785, "grad_norm": 1.3161627589380112, "learning_rate": 1.095200790674636e-06, "loss": 0.6526, "step": 25832 }, { "epoch": 0.7917432879735197, "grad_norm": 1.3454969084547435, "learning_rate": 1.0948908191016716e-06, "loss": 0.6349, "step": 25833 }, { "epoch": 0.7917739364962609, "grad_norm": 1.372507391609629, "learning_rate": 1.0945808860067502e-06, "loss": 0.5984, "step": 25834 }, { "epoch": 0.7918045850190021, "grad_norm": 1.3748529683783086, "learning_rate": 1.0942709913929216e-06, "loss": 0.5655, "step": 25835 }, { "epoch": 0.7918352335417432, "grad_norm": 1.358096344965067, "learning_rate": 1.0939611352632413e-06, "loss": 0.6229, "step": 25836 }, { "epoch": 0.7918658820644845, "grad_norm": 1.46126747510998, "learning_rate": 1.0936513176207603e-06, "loss": 0.607, "step": 25837 }, { "epoch": 0.7918965305872256, "grad_norm": 1.3542701261830608, "learning_rate": 1.093341538468532e-06, "loss": 0.6917, "step": 25838 }, { "epoch": 0.7919271791099669, "grad_norm": 1.264519938210022, "learning_rate": 1.0930317978096106e-06, "loss": 0.6216, "step": 25839 }, { "epoch": 0.7919578276327081, "grad_norm": 1.204577164596669, "learning_rate": 1.0927220956470446e-06, "loss": 0.583, "step": 25840 }, { "epoch": 0.7919884761554493, "grad_norm": 0.4444176742977897, "learning_rate": 1.092412431983888e-06, "loss": 0.3935, "step": 25841 }, { "epoch": 0.7920191246781905, "grad_norm": 1.45198999638819, "learning_rate": 1.092102806823193e-06, "loss": 0.5812, "step": 25842 }, { "epoch": 0.7920497732009317, "grad_norm": 1.2230370141567406, "learning_rate": 1.0917932201680075e-06, "loss": 0.6504, "step": 25843 }, { "epoch": 0.7920804217236729, "grad_norm": 1.5577321846759244, "learning_rate": 1.0914836720213835e-06, "loss": 0.6645, "step": 25844 }, { "epoch": 0.7921110702464141, "grad_norm": 1.4002651970031978, "learning_rate": 1.091174162386372e-06, "loss": 0.5836, "step": 25845 }, { "epoch": 0.7921417187691553, "grad_norm": 1.3379309429992396, "learning_rate": 1.0908646912660204e-06, "loss": 0.6443, "step": 25846 }, { "epoch": 0.7921723672918966, "grad_norm": 1.453094454587672, "learning_rate": 1.0905552586633804e-06, "loss": 0.5573, "step": 25847 }, { "epoch": 0.7922030158146377, "grad_norm": 1.3812191905113558, "learning_rate": 1.0902458645814984e-06, "loss": 0.677, "step": 25848 }, { "epoch": 0.792233664337379, "grad_norm": 1.2614340342860049, "learning_rate": 1.0899365090234244e-06, "loss": 0.6278, "step": 25849 }, { "epoch": 0.7922643128601201, "grad_norm": 1.4410038387418944, "learning_rate": 1.0896271919922074e-06, "loss": 0.5425, "step": 25850 }, { "epoch": 0.7922949613828614, "grad_norm": 1.2952357937100083, "learning_rate": 1.0893179134908932e-06, "loss": 0.6008, "step": 25851 }, { "epoch": 0.7923256099056025, "grad_norm": 1.2549095495597578, "learning_rate": 1.0890086735225296e-06, "loss": 0.604, "step": 25852 }, { "epoch": 0.7923562584283438, "grad_norm": 1.4512627645141418, "learning_rate": 1.088699472090166e-06, "loss": 0.5327, "step": 25853 }, { "epoch": 0.7923869069510849, "grad_norm": 1.4349357250447212, "learning_rate": 1.0883903091968461e-06, "loss": 0.6796, "step": 25854 }, { "epoch": 0.7924175554738262, "grad_norm": 1.5040553912897312, "learning_rate": 1.0880811848456169e-06, "loss": 0.5604, "step": 25855 }, { "epoch": 0.7924482039965673, "grad_norm": 1.626940682136973, "learning_rate": 1.0877720990395263e-06, "loss": 0.6887, "step": 25856 }, { "epoch": 0.7924788525193086, "grad_norm": 1.3931407352559422, "learning_rate": 1.087463051781617e-06, "loss": 0.6237, "step": 25857 }, { "epoch": 0.7925095010420498, "grad_norm": 1.3950538981551708, "learning_rate": 1.0871540430749366e-06, "loss": 0.5532, "step": 25858 }, { "epoch": 0.792540149564791, "grad_norm": 1.527229354183288, "learning_rate": 1.0868450729225272e-06, "loss": 0.7021, "step": 25859 }, { "epoch": 0.7925707980875322, "grad_norm": 1.3923596769962867, "learning_rate": 1.086536141327434e-06, "loss": 0.6049, "step": 25860 }, { "epoch": 0.7926014466102734, "grad_norm": 1.4841825043089014, "learning_rate": 1.0862272482927033e-06, "loss": 0.645, "step": 25861 }, { "epoch": 0.7926320951330146, "grad_norm": 1.2574763929265502, "learning_rate": 1.0859183938213759e-06, "loss": 0.5173, "step": 25862 }, { "epoch": 0.7926627436557558, "grad_norm": 1.344872678544502, "learning_rate": 1.0856095779164955e-06, "loss": 0.5127, "step": 25863 }, { "epoch": 0.792693392178497, "grad_norm": 1.492778868186668, "learning_rate": 1.0853008005811067e-06, "loss": 0.57, "step": 25864 }, { "epoch": 0.7927240407012383, "grad_norm": 1.3154018361748059, "learning_rate": 1.0849920618182496e-06, "loss": 0.5819, "step": 25865 }, { "epoch": 0.7927546892239794, "grad_norm": 1.2309937327598757, "learning_rate": 1.0846833616309687e-06, "loss": 0.5284, "step": 25866 }, { "epoch": 0.7927853377467206, "grad_norm": 1.3476913400787676, "learning_rate": 1.084374700022303e-06, "loss": 0.6259, "step": 25867 }, { "epoch": 0.7928159862694618, "grad_norm": 1.2626570785070974, "learning_rate": 1.084066076995296e-06, "loss": 0.5757, "step": 25868 }, { "epoch": 0.792846634792203, "grad_norm": 1.393381615589503, "learning_rate": 1.0837574925529887e-06, "loss": 0.5568, "step": 25869 }, { "epoch": 0.7928772833149442, "grad_norm": 1.40325678174174, "learning_rate": 1.0834489466984193e-06, "loss": 0.5909, "step": 25870 }, { "epoch": 0.7929079318376854, "grad_norm": 1.3452883468726495, "learning_rate": 1.0831404394346294e-06, "loss": 0.6334, "step": 25871 }, { "epoch": 0.7929385803604266, "grad_norm": 1.4772397134678437, "learning_rate": 1.0828319707646606e-06, "loss": 0.6158, "step": 25872 }, { "epoch": 0.7929692288831678, "grad_norm": 1.534236758968482, "learning_rate": 1.082523540691549e-06, "loss": 0.7378, "step": 25873 }, { "epoch": 0.792999877405909, "grad_norm": 0.44318089942834216, "learning_rate": 1.0822151492183359e-06, "loss": 0.3991, "step": 25874 }, { "epoch": 0.7930305259286502, "grad_norm": 1.2554781607134395, "learning_rate": 1.08190679634806e-06, "loss": 0.6291, "step": 25875 }, { "epoch": 0.7930611744513915, "grad_norm": 0.46137309908685886, "learning_rate": 1.0815984820837577e-06, "loss": 0.3963, "step": 25876 }, { "epoch": 0.7930918229741326, "grad_norm": 1.2349762985402226, "learning_rate": 1.0812902064284697e-06, "loss": 0.5175, "step": 25877 }, { "epoch": 0.7931224714968739, "grad_norm": 1.3072617559523696, "learning_rate": 1.0809819693852308e-06, "loss": 0.6248, "step": 25878 }, { "epoch": 0.793153120019615, "grad_norm": 0.45505491112314156, "learning_rate": 1.0806737709570786e-06, "loss": 0.3906, "step": 25879 }, { "epoch": 0.7931837685423563, "grad_norm": 1.4696127413612978, "learning_rate": 1.0803656111470523e-06, "loss": 0.7094, "step": 25880 }, { "epoch": 0.7932144170650974, "grad_norm": 0.4514939483584638, "learning_rate": 1.080057489958185e-06, "loss": 0.3908, "step": 25881 }, { "epoch": 0.7932450655878387, "grad_norm": 1.3474924487888378, "learning_rate": 1.0797494073935143e-06, "loss": 0.5443, "step": 25882 }, { "epoch": 0.7932757141105798, "grad_norm": 1.3641838680563798, "learning_rate": 1.0794413634560775e-06, "loss": 0.614, "step": 25883 }, { "epoch": 0.7933063626333211, "grad_norm": 1.3791263641059994, "learning_rate": 1.0791333581489061e-06, "loss": 0.5896, "step": 25884 }, { "epoch": 0.7933370111560623, "grad_norm": 1.3666966827805909, "learning_rate": 1.0788253914750373e-06, "loss": 0.5437, "step": 25885 }, { "epoch": 0.7933676596788035, "grad_norm": 1.176799893679123, "learning_rate": 1.0785174634375068e-06, "loss": 0.534, "step": 25886 }, { "epoch": 0.7933983082015447, "grad_norm": 1.4651101521176717, "learning_rate": 1.078209574039345e-06, "loss": 0.6081, "step": 25887 }, { "epoch": 0.7934289567242859, "grad_norm": 1.4200071292284997, "learning_rate": 1.0779017232835893e-06, "loss": 0.5609, "step": 25888 }, { "epoch": 0.7934596052470271, "grad_norm": 1.3349562937465902, "learning_rate": 1.0775939111732692e-06, "loss": 0.6217, "step": 25889 }, { "epoch": 0.7934902537697683, "grad_norm": 1.3115603461943857, "learning_rate": 1.077286137711422e-06, "loss": 0.5998, "step": 25890 }, { "epoch": 0.7935209022925095, "grad_norm": 1.4034007358017178, "learning_rate": 1.0769784029010783e-06, "loss": 0.6317, "step": 25891 }, { "epoch": 0.7935515508152508, "grad_norm": 1.2865496480220409, "learning_rate": 1.0766707067452692e-06, "loss": 0.479, "step": 25892 }, { "epoch": 0.7935821993379919, "grad_norm": 1.3988872338107396, "learning_rate": 1.0763630492470267e-06, "loss": 0.5701, "step": 25893 }, { "epoch": 0.7936128478607332, "grad_norm": 1.4043924147614033, "learning_rate": 1.076055430409385e-06, "loss": 0.6158, "step": 25894 }, { "epoch": 0.7936434963834743, "grad_norm": 1.177652294531899, "learning_rate": 1.0757478502353713e-06, "loss": 0.4971, "step": 25895 }, { "epoch": 0.7936741449062156, "grad_norm": 1.3097449494650752, "learning_rate": 1.0754403087280185e-06, "loss": 0.5895, "step": 25896 }, { "epoch": 0.7937047934289567, "grad_norm": 1.378076012082876, "learning_rate": 1.0751328058903576e-06, "loss": 0.5706, "step": 25897 }, { "epoch": 0.7937354419516979, "grad_norm": 1.3672827586390603, "learning_rate": 1.0748253417254162e-06, "loss": 0.5836, "step": 25898 }, { "epoch": 0.7937660904744391, "grad_norm": 1.362687144377657, "learning_rate": 1.0745179162362263e-06, "loss": 0.5763, "step": 25899 }, { "epoch": 0.7937967389971803, "grad_norm": 1.256496724337974, "learning_rate": 1.0742105294258126e-06, "loss": 0.5713, "step": 25900 }, { "epoch": 0.7938273875199215, "grad_norm": 1.4675841422442695, "learning_rate": 1.07390318129721e-06, "loss": 0.6454, "step": 25901 }, { "epoch": 0.7938580360426627, "grad_norm": 1.319650103447533, "learning_rate": 1.0735958718534445e-06, "loss": 0.5463, "step": 25902 }, { "epoch": 0.793888684565404, "grad_norm": 1.311310445910216, "learning_rate": 1.0732886010975414e-06, "loss": 0.6025, "step": 25903 }, { "epoch": 0.7939193330881451, "grad_norm": 1.369174777526594, "learning_rate": 1.0729813690325307e-06, "loss": 0.668, "step": 25904 }, { "epoch": 0.7939499816108864, "grad_norm": 1.4755914549270737, "learning_rate": 1.0726741756614406e-06, "loss": 0.7466, "step": 25905 }, { "epoch": 0.7939806301336275, "grad_norm": 1.3953587477287774, "learning_rate": 1.0723670209872956e-06, "loss": 0.5755, "step": 25906 }, { "epoch": 0.7940112786563688, "grad_norm": 0.4325375649293084, "learning_rate": 1.0720599050131225e-06, "loss": 0.3818, "step": 25907 }, { "epoch": 0.7940419271791099, "grad_norm": 1.3444378846156748, "learning_rate": 1.0717528277419491e-06, "loss": 0.6626, "step": 25908 }, { "epoch": 0.7940725757018512, "grad_norm": 1.1710042941575798, "learning_rate": 1.071445789176801e-06, "loss": 0.5869, "step": 25909 }, { "epoch": 0.7941032242245923, "grad_norm": 1.0893423462844396, "learning_rate": 1.0711387893207026e-06, "loss": 0.5336, "step": 25910 }, { "epoch": 0.7941338727473336, "grad_norm": 1.5021148771313477, "learning_rate": 1.0708318281766784e-06, "loss": 0.6696, "step": 25911 }, { "epoch": 0.7941645212700748, "grad_norm": 0.45111172205164785, "learning_rate": 1.0705249057477524e-06, "loss": 0.3944, "step": 25912 }, { "epoch": 0.794195169792816, "grad_norm": 1.388281955465662, "learning_rate": 1.070218022036952e-06, "loss": 0.6501, "step": 25913 }, { "epoch": 0.7942258183155572, "grad_norm": 1.4897918540636748, "learning_rate": 1.0699111770472976e-06, "loss": 0.7548, "step": 25914 }, { "epoch": 0.7942564668382984, "grad_norm": 1.3757093328442882, "learning_rate": 1.0696043707818132e-06, "loss": 0.688, "step": 25915 }, { "epoch": 0.7942871153610396, "grad_norm": 1.2630955460578808, "learning_rate": 1.0692976032435232e-06, "loss": 0.6145, "step": 25916 }, { "epoch": 0.7943177638837808, "grad_norm": 0.4363100743520718, "learning_rate": 1.068990874435451e-06, "loss": 0.3883, "step": 25917 }, { "epoch": 0.794348412406522, "grad_norm": 0.4495683859264894, "learning_rate": 1.0686841843606171e-06, "loss": 0.3997, "step": 25918 }, { "epoch": 0.7943790609292632, "grad_norm": 1.4441106081959334, "learning_rate": 1.0683775330220414e-06, "loss": 0.5964, "step": 25919 }, { "epoch": 0.7944097094520044, "grad_norm": 1.2446197452144339, "learning_rate": 1.0680709204227507e-06, "loss": 0.6501, "step": 25920 }, { "epoch": 0.7944403579747457, "grad_norm": 1.3024124778911264, "learning_rate": 1.0677643465657628e-06, "loss": 0.5845, "step": 25921 }, { "epoch": 0.7944710064974868, "grad_norm": 0.44420295016188993, "learning_rate": 1.0674578114540979e-06, "loss": 0.3916, "step": 25922 }, { "epoch": 0.7945016550202281, "grad_norm": 1.2043412035699008, "learning_rate": 1.0671513150907776e-06, "loss": 0.5697, "step": 25923 }, { "epoch": 0.7945323035429692, "grad_norm": 1.3171176953599852, "learning_rate": 1.066844857478822e-06, "loss": 0.5754, "step": 25924 }, { "epoch": 0.7945629520657105, "grad_norm": 1.9421224756583562, "learning_rate": 1.0665384386212501e-06, "loss": 0.6278, "step": 25925 }, { "epoch": 0.7945936005884516, "grad_norm": 1.3345130860375067, "learning_rate": 1.0662320585210806e-06, "loss": 0.6186, "step": 25926 }, { "epoch": 0.7946242491111929, "grad_norm": 1.6029503991538927, "learning_rate": 1.0659257171813337e-06, "loss": 0.6676, "step": 25927 }, { "epoch": 0.794654897633934, "grad_norm": 1.2749528502021616, "learning_rate": 1.0656194146050281e-06, "loss": 0.5271, "step": 25928 }, { "epoch": 0.7946855461566752, "grad_norm": 1.2221303778657822, "learning_rate": 1.0653131507951814e-06, "loss": 0.51, "step": 25929 }, { "epoch": 0.7947161946794165, "grad_norm": 1.310620688033817, "learning_rate": 1.0650069257548084e-06, "loss": 0.6748, "step": 25930 }, { "epoch": 0.7947468432021576, "grad_norm": 1.1654316026853757, "learning_rate": 1.0647007394869313e-06, "loss": 0.5375, "step": 25931 }, { "epoch": 0.7947774917248989, "grad_norm": 1.1496969962132204, "learning_rate": 1.0643945919945652e-06, "loss": 0.5167, "step": 25932 }, { "epoch": 0.79480814024764, "grad_norm": 1.248745030429711, "learning_rate": 1.064088483280724e-06, "loss": 0.5769, "step": 25933 }, { "epoch": 0.7948387887703813, "grad_norm": 1.3762894151286318, "learning_rate": 1.063782413348427e-06, "loss": 0.5659, "step": 25934 }, { "epoch": 0.7948694372931224, "grad_norm": 0.45217490481183265, "learning_rate": 1.0634763822006883e-06, "loss": 0.4048, "step": 25935 }, { "epoch": 0.7949000858158637, "grad_norm": 1.323175370158012, "learning_rate": 1.0631703898405255e-06, "loss": 0.6091, "step": 25936 }, { "epoch": 0.7949307343386048, "grad_norm": 1.308888802163555, "learning_rate": 1.0628644362709512e-06, "loss": 0.5223, "step": 25937 }, { "epoch": 0.7949613828613461, "grad_norm": 1.3163160091042592, "learning_rate": 1.0625585214949802e-06, "loss": 0.6343, "step": 25938 }, { "epoch": 0.7949920313840872, "grad_norm": 1.4843706605723892, "learning_rate": 1.0622526455156297e-06, "loss": 0.6148, "step": 25939 }, { "epoch": 0.7950226799068285, "grad_norm": 1.2776340652014837, "learning_rate": 1.0619468083359107e-06, "loss": 0.571, "step": 25940 }, { "epoch": 0.7950533284295697, "grad_norm": 0.4409848887680531, "learning_rate": 1.0616410099588349e-06, "loss": 0.3896, "step": 25941 }, { "epoch": 0.7950839769523109, "grad_norm": 0.44582522240515904, "learning_rate": 1.0613352503874209e-06, "loss": 0.3956, "step": 25942 }, { "epoch": 0.7951146254750521, "grad_norm": 0.4546378349739301, "learning_rate": 1.061029529624677e-06, "loss": 0.3889, "step": 25943 }, { "epoch": 0.7951452739977933, "grad_norm": 1.1245003000429787, "learning_rate": 1.060723847673618e-06, "loss": 0.6038, "step": 25944 }, { "epoch": 0.7951759225205345, "grad_norm": 1.2395294650679805, "learning_rate": 1.0604182045372535e-06, "loss": 0.516, "step": 25945 }, { "epoch": 0.7952065710432757, "grad_norm": 1.2671841308332126, "learning_rate": 1.060112600218597e-06, "loss": 0.6031, "step": 25946 }, { "epoch": 0.7952372195660169, "grad_norm": 1.3590152106049167, "learning_rate": 1.0598070347206595e-06, "loss": 0.5675, "step": 25947 }, { "epoch": 0.7952678680887582, "grad_norm": 0.44811392014001483, "learning_rate": 1.059501508046451e-06, "loss": 0.4099, "step": 25948 }, { "epoch": 0.7952985166114993, "grad_norm": 0.4526072666354316, "learning_rate": 1.0591960201989821e-06, "loss": 0.3781, "step": 25949 }, { "epoch": 0.7953291651342406, "grad_norm": 1.4283902242390343, "learning_rate": 1.0588905711812641e-06, "loss": 0.6113, "step": 25950 }, { "epoch": 0.7953598136569817, "grad_norm": 0.4601483709136035, "learning_rate": 1.058585160996306e-06, "loss": 0.41, "step": 25951 }, { "epoch": 0.795390462179723, "grad_norm": 1.3411677053787308, "learning_rate": 1.0582797896471143e-06, "loss": 0.6357, "step": 25952 }, { "epoch": 0.7954211107024641, "grad_norm": 1.1969632462566075, "learning_rate": 1.0579744571367023e-06, "loss": 0.573, "step": 25953 }, { "epoch": 0.7954517592252054, "grad_norm": 1.5445682180481626, "learning_rate": 1.0576691634680758e-06, "loss": 0.6569, "step": 25954 }, { "epoch": 0.7954824077479465, "grad_norm": 1.4567234779896112, "learning_rate": 1.057363908644245e-06, "loss": 0.6512, "step": 25955 }, { "epoch": 0.7955130562706878, "grad_norm": 1.2727832508376518, "learning_rate": 1.0570586926682153e-06, "loss": 0.5574, "step": 25956 }, { "epoch": 0.795543704793429, "grad_norm": 0.4391719810570889, "learning_rate": 1.0567535155429947e-06, "loss": 0.3859, "step": 25957 }, { "epoch": 0.7955743533161702, "grad_norm": 1.199383870482794, "learning_rate": 1.0564483772715922e-06, "loss": 0.5426, "step": 25958 }, { "epoch": 0.7956050018389114, "grad_norm": 1.4192530739053428, "learning_rate": 1.0561432778570113e-06, "loss": 0.63, "step": 25959 }, { "epoch": 0.7956356503616525, "grad_norm": 1.2473903004605584, "learning_rate": 1.0558382173022596e-06, "loss": 0.6191, "step": 25960 }, { "epoch": 0.7956662988843938, "grad_norm": 1.306946061973518, "learning_rate": 1.0555331956103448e-06, "loss": 0.7236, "step": 25961 }, { "epoch": 0.7956969474071349, "grad_norm": 1.3553398775210792, "learning_rate": 1.0552282127842694e-06, "loss": 0.6727, "step": 25962 }, { "epoch": 0.7957275959298762, "grad_norm": 1.4850610127478259, "learning_rate": 1.054923268827041e-06, "loss": 0.4823, "step": 25963 }, { "epoch": 0.7957582444526173, "grad_norm": 1.1864674391350536, "learning_rate": 1.0546183637416612e-06, "loss": 0.5719, "step": 25964 }, { "epoch": 0.7957888929753586, "grad_norm": 0.4635367150535194, "learning_rate": 1.054313497531137e-06, "loss": 0.3918, "step": 25965 }, { "epoch": 0.7958195414980997, "grad_norm": 1.354577776833168, "learning_rate": 1.054008670198472e-06, "loss": 0.573, "step": 25966 }, { "epoch": 0.795850190020841, "grad_norm": 1.702445463372357, "learning_rate": 1.0537038817466682e-06, "loss": 0.5721, "step": 25967 }, { "epoch": 0.7958808385435822, "grad_norm": 1.3575886110687077, "learning_rate": 1.05339913217873e-06, "loss": 0.6366, "step": 25968 }, { "epoch": 0.7959114870663234, "grad_norm": 0.44042459100030623, "learning_rate": 1.0530944214976608e-06, "loss": 0.3965, "step": 25969 }, { "epoch": 0.7959421355890646, "grad_norm": 1.3028811621199474, "learning_rate": 1.052789749706461e-06, "loss": 0.6565, "step": 25970 }, { "epoch": 0.7959727841118058, "grad_norm": 1.2095757669164706, "learning_rate": 1.0524851168081341e-06, "loss": 0.582, "step": 25971 }, { "epoch": 0.796003432634547, "grad_norm": 1.3229519992057417, "learning_rate": 1.052180522805682e-06, "loss": 0.6651, "step": 25972 }, { "epoch": 0.7960340811572882, "grad_norm": 1.2855510594376842, "learning_rate": 1.0518759677021046e-06, "loss": 0.6439, "step": 25973 }, { "epoch": 0.7960647296800294, "grad_norm": 1.5347609234066244, "learning_rate": 1.0515714515004043e-06, "loss": 0.7115, "step": 25974 }, { "epoch": 0.7960953782027707, "grad_norm": 2.7803078777010315, "learning_rate": 1.0512669742035798e-06, "loss": 0.5293, "step": 25975 }, { "epoch": 0.7961260267255118, "grad_norm": 1.358754759316276, "learning_rate": 1.050962535814632e-06, "loss": 0.6667, "step": 25976 }, { "epoch": 0.7961566752482531, "grad_norm": 1.2485080662129615, "learning_rate": 1.0506581363365615e-06, "loss": 0.6761, "step": 25977 }, { "epoch": 0.7961873237709942, "grad_norm": 1.4224839324974698, "learning_rate": 1.0503537757723664e-06, "loss": 0.653, "step": 25978 }, { "epoch": 0.7962179722937355, "grad_norm": 1.2354880781087847, "learning_rate": 1.0500494541250455e-06, "loss": 0.5291, "step": 25979 }, { "epoch": 0.7962486208164766, "grad_norm": 1.4356998698044967, "learning_rate": 1.0497451713975997e-06, "loss": 0.5702, "step": 25980 }, { "epoch": 0.7962792693392179, "grad_norm": 1.3964466049694564, "learning_rate": 1.049440927593024e-06, "loss": 0.5573, "step": 25981 }, { "epoch": 0.796309917861959, "grad_norm": 1.33000356950097, "learning_rate": 1.0491367227143173e-06, "loss": 0.698, "step": 25982 }, { "epoch": 0.7963405663847003, "grad_norm": 0.4445806655612377, "learning_rate": 1.0488325567644792e-06, "loss": 0.4196, "step": 25983 }, { "epoch": 0.7963712149074414, "grad_norm": 1.3735001907907645, "learning_rate": 1.0485284297465032e-06, "loss": 0.6553, "step": 25984 }, { "epoch": 0.7964018634301827, "grad_norm": 1.4434166306564258, "learning_rate": 1.048224341663389e-06, "loss": 0.5722, "step": 25985 }, { "epoch": 0.7964325119529239, "grad_norm": 1.3190445998052724, "learning_rate": 1.0479202925181303e-06, "loss": 0.6207, "step": 25986 }, { "epoch": 0.7964631604756651, "grad_norm": 1.2093113654206848, "learning_rate": 1.0476162823137238e-06, "loss": 0.5596, "step": 25987 }, { "epoch": 0.7964938089984063, "grad_norm": 1.1197017618467426, "learning_rate": 1.047312311053167e-06, "loss": 0.5658, "step": 25988 }, { "epoch": 0.7965244575211475, "grad_norm": 1.4387208937739024, "learning_rate": 1.047008378739452e-06, "loss": 0.707, "step": 25989 }, { "epoch": 0.7965551060438887, "grad_norm": 0.44026952253719803, "learning_rate": 1.0467044853755741e-06, "loss": 0.4039, "step": 25990 }, { "epoch": 0.7965857545666298, "grad_norm": 0.43967561717831416, "learning_rate": 1.0464006309645308e-06, "loss": 0.3892, "step": 25991 }, { "epoch": 0.7966164030893711, "grad_norm": 1.2432201873285929, "learning_rate": 1.046096815509312e-06, "loss": 0.6415, "step": 25992 }, { "epoch": 0.7966470516121122, "grad_norm": 1.621212617904962, "learning_rate": 1.0457930390129129e-06, "loss": 0.7225, "step": 25993 }, { "epoch": 0.7966777001348535, "grad_norm": 0.4665694792992582, "learning_rate": 1.045489301478328e-06, "loss": 0.4189, "step": 25994 }, { "epoch": 0.7967083486575947, "grad_norm": 0.44068267703002456, "learning_rate": 1.0451856029085473e-06, "loss": 0.3894, "step": 25995 }, { "epoch": 0.7967389971803359, "grad_norm": 1.2876918162572757, "learning_rate": 1.044881943306566e-06, "loss": 0.5967, "step": 25996 }, { "epoch": 0.7967696457030771, "grad_norm": 1.367013289535944, "learning_rate": 1.0445783226753725e-06, "loss": 0.5625, "step": 25997 }, { "epoch": 0.7968002942258183, "grad_norm": 1.0694205499751037, "learning_rate": 1.0442747410179633e-06, "loss": 0.5208, "step": 25998 }, { "epoch": 0.7968309427485595, "grad_norm": 1.3313957818636495, "learning_rate": 1.0439711983373275e-06, "loss": 0.678, "step": 25999 }, { "epoch": 0.7968615912713007, "grad_norm": 1.362959580457042, "learning_rate": 1.0436676946364544e-06, "loss": 0.6064, "step": 26000 }, { "epoch": 0.7968922397940419, "grad_norm": 1.5457754262473897, "learning_rate": 1.0433642299183355e-06, "loss": 0.6477, "step": 26001 }, { "epoch": 0.7969228883167832, "grad_norm": 1.419906757689801, "learning_rate": 1.0430608041859624e-06, "loss": 0.5942, "step": 26002 }, { "epoch": 0.7969535368395243, "grad_norm": 1.3294146541135328, "learning_rate": 1.042757417442322e-06, "loss": 0.601, "step": 26003 }, { "epoch": 0.7969841853622656, "grad_norm": 1.1852569851749084, "learning_rate": 1.042454069690406e-06, "loss": 0.6544, "step": 26004 }, { "epoch": 0.7970148338850067, "grad_norm": 1.3289308338895838, "learning_rate": 1.0421507609332038e-06, "loss": 0.5739, "step": 26005 }, { "epoch": 0.797045482407748, "grad_norm": 1.2795056937780631, "learning_rate": 1.0418474911737014e-06, "loss": 0.6103, "step": 26006 }, { "epoch": 0.7970761309304891, "grad_norm": 1.3139921179849259, "learning_rate": 1.0415442604148896e-06, "loss": 0.5349, "step": 26007 }, { "epoch": 0.7971067794532304, "grad_norm": 1.381276206009072, "learning_rate": 1.0412410686597542e-06, "loss": 0.6058, "step": 26008 }, { "epoch": 0.7971374279759715, "grad_norm": 1.2519699405542053, "learning_rate": 1.0409379159112826e-06, "loss": 0.6766, "step": 26009 }, { "epoch": 0.7971680764987128, "grad_norm": 1.2260322771865784, "learning_rate": 1.0406348021724645e-06, "loss": 0.6135, "step": 26010 }, { "epoch": 0.797198725021454, "grad_norm": 1.3076862622287049, "learning_rate": 1.0403317274462833e-06, "loss": 0.5519, "step": 26011 }, { "epoch": 0.7972293735441952, "grad_norm": 1.2813795297872372, "learning_rate": 1.0400286917357267e-06, "loss": 0.6243, "step": 26012 }, { "epoch": 0.7972600220669364, "grad_norm": 1.3009769882418598, "learning_rate": 1.0397256950437822e-06, "loss": 0.5341, "step": 26013 }, { "epoch": 0.7972906705896776, "grad_norm": 1.3925533691138974, "learning_rate": 1.0394227373734322e-06, "loss": 0.615, "step": 26014 }, { "epoch": 0.7973213191124188, "grad_norm": 1.2335947766531015, "learning_rate": 1.0391198187276646e-06, "loss": 0.5198, "step": 26015 }, { "epoch": 0.79735196763516, "grad_norm": 1.4871102610699445, "learning_rate": 1.03881693910946e-06, "loss": 0.6167, "step": 26016 }, { "epoch": 0.7973826161579012, "grad_norm": 1.4277536384764775, "learning_rate": 1.0385140985218085e-06, "loss": 0.6589, "step": 26017 }, { "epoch": 0.7974132646806424, "grad_norm": 1.6696520846301501, "learning_rate": 1.038211296967691e-06, "loss": 0.6391, "step": 26018 }, { "epoch": 0.7974439132033836, "grad_norm": 0.49285137285602043, "learning_rate": 1.0379085344500905e-06, "loss": 0.3978, "step": 26019 }, { "epoch": 0.7974745617261249, "grad_norm": 1.200899413130172, "learning_rate": 1.0376058109719906e-06, "loss": 0.6809, "step": 26020 }, { "epoch": 0.797505210248866, "grad_norm": 1.191043256776814, "learning_rate": 1.037303126536376e-06, "loss": 0.5723, "step": 26021 }, { "epoch": 0.7975358587716072, "grad_norm": 1.3517749236290502, "learning_rate": 1.0370004811462258e-06, "loss": 0.6149, "step": 26022 }, { "epoch": 0.7975665072943484, "grad_norm": 1.3053650165212622, "learning_rate": 1.0366978748045236e-06, "loss": 0.6024, "step": 26023 }, { "epoch": 0.7975971558170896, "grad_norm": 2.163704076656925, "learning_rate": 1.0363953075142519e-06, "loss": 0.5788, "step": 26024 }, { "epoch": 0.7976278043398308, "grad_norm": 1.4384312341176575, "learning_rate": 1.0360927792783925e-06, "loss": 0.6971, "step": 26025 }, { "epoch": 0.797658452862572, "grad_norm": 1.4085294502797596, "learning_rate": 1.0357902900999256e-06, "loss": 0.6579, "step": 26026 }, { "epoch": 0.7976891013853132, "grad_norm": 1.290889512554438, "learning_rate": 1.035487839981828e-06, "loss": 0.629, "step": 26027 }, { "epoch": 0.7977197499080544, "grad_norm": 1.2721206968030339, "learning_rate": 1.0351854289270857e-06, "loss": 0.5876, "step": 26028 }, { "epoch": 0.7977503984307956, "grad_norm": 1.166870423509128, "learning_rate": 1.0348830569386764e-06, "loss": 0.547, "step": 26029 }, { "epoch": 0.7977810469535368, "grad_norm": 1.4278780754623155, "learning_rate": 1.034580724019577e-06, "loss": 0.6737, "step": 26030 }, { "epoch": 0.7978116954762781, "grad_norm": 1.1932435298719573, "learning_rate": 1.0342784301727688e-06, "loss": 0.5395, "step": 26031 }, { "epoch": 0.7978423439990192, "grad_norm": 0.45288785975371143, "learning_rate": 1.0339761754012307e-06, "loss": 0.3829, "step": 26032 }, { "epoch": 0.7978729925217605, "grad_norm": 1.3410473050025051, "learning_rate": 1.0336739597079387e-06, "loss": 0.7072, "step": 26033 }, { "epoch": 0.7979036410445016, "grad_norm": 1.3533958489395834, "learning_rate": 1.0333717830958729e-06, "loss": 0.5154, "step": 26034 }, { "epoch": 0.7979342895672429, "grad_norm": 1.5607028555406603, "learning_rate": 1.0330696455680089e-06, "loss": 0.6644, "step": 26035 }, { "epoch": 0.797964938089984, "grad_norm": 1.214666967878608, "learning_rate": 1.0327675471273262e-06, "loss": 0.579, "step": 26036 }, { "epoch": 0.7979955866127253, "grad_norm": 1.3505755680007323, "learning_rate": 1.0324654877768003e-06, "loss": 0.5201, "step": 26037 }, { "epoch": 0.7980262351354664, "grad_norm": 1.3468707638357207, "learning_rate": 1.0321634675194042e-06, "loss": 0.5824, "step": 26038 }, { "epoch": 0.7980568836582077, "grad_norm": 1.406803010519991, "learning_rate": 1.0318614863581188e-06, "loss": 0.6413, "step": 26039 }, { "epoch": 0.7980875321809489, "grad_norm": 1.2581671989091923, "learning_rate": 1.0315595442959182e-06, "loss": 0.4953, "step": 26040 }, { "epoch": 0.7981181807036901, "grad_norm": 1.55944418552084, "learning_rate": 1.0312576413357755e-06, "loss": 0.602, "step": 26041 }, { "epoch": 0.7981488292264313, "grad_norm": 1.0942000616654448, "learning_rate": 1.0309557774806662e-06, "loss": 0.5769, "step": 26042 }, { "epoch": 0.7981794777491725, "grad_norm": 1.3035502580303715, "learning_rate": 1.0306539527335652e-06, "loss": 0.6358, "step": 26043 }, { "epoch": 0.7982101262719137, "grad_norm": 1.3901522360438008, "learning_rate": 1.0303521670974481e-06, "loss": 0.5319, "step": 26044 }, { "epoch": 0.7982407747946549, "grad_norm": 0.44826075480990635, "learning_rate": 1.0300504205752853e-06, "loss": 0.4088, "step": 26045 }, { "epoch": 0.7982714233173961, "grad_norm": 1.434224747749744, "learning_rate": 1.0297487131700512e-06, "loss": 0.6595, "step": 26046 }, { "epoch": 0.7983020718401374, "grad_norm": 0.4305166410533537, "learning_rate": 1.0294470448847204e-06, "loss": 0.4021, "step": 26047 }, { "epoch": 0.7983327203628785, "grad_norm": 1.3030423252192322, "learning_rate": 1.029145415722264e-06, "loss": 0.5847, "step": 26048 }, { "epoch": 0.7983633688856198, "grad_norm": 1.4018115742922872, "learning_rate": 1.028843825685651e-06, "loss": 0.6476, "step": 26049 }, { "epoch": 0.7983940174083609, "grad_norm": 1.4379202075085253, "learning_rate": 1.0285422747778583e-06, "loss": 0.5913, "step": 26050 }, { "epoch": 0.7984246659311022, "grad_norm": 1.1926050844549023, "learning_rate": 1.028240763001855e-06, "loss": 0.581, "step": 26051 }, { "epoch": 0.7984553144538433, "grad_norm": 1.2485823833293719, "learning_rate": 1.02793929036061e-06, "loss": 0.6234, "step": 26052 }, { "epoch": 0.7984859629765845, "grad_norm": 1.3726415640207899, "learning_rate": 1.0276378568570955e-06, "loss": 0.6961, "step": 26053 }, { "epoch": 0.7985166114993257, "grad_norm": 1.4127167099704958, "learning_rate": 1.0273364624942816e-06, "loss": 0.5953, "step": 26054 }, { "epoch": 0.7985472600220669, "grad_norm": 1.5126837326109202, "learning_rate": 1.0270351072751394e-06, "loss": 0.6059, "step": 26055 }, { "epoch": 0.7985779085448081, "grad_norm": 1.3796491807895082, "learning_rate": 1.026733791202636e-06, "loss": 0.5308, "step": 26056 }, { "epoch": 0.7986085570675493, "grad_norm": 1.643290167514896, "learning_rate": 1.0264325142797405e-06, "loss": 0.6681, "step": 26057 }, { "epoch": 0.7986392055902906, "grad_norm": 1.4158102623002422, "learning_rate": 1.026131276509424e-06, "loss": 0.5458, "step": 26058 }, { "epoch": 0.7986698541130317, "grad_norm": 1.325647370584118, "learning_rate": 1.0258300778946522e-06, "loss": 0.639, "step": 26059 }, { "epoch": 0.798700502635773, "grad_norm": 1.3897184512212384, "learning_rate": 1.0255289184383921e-06, "loss": 0.5561, "step": 26060 }, { "epoch": 0.7987311511585141, "grad_norm": 1.3417954556925862, "learning_rate": 1.0252277981436131e-06, "loss": 0.6213, "step": 26061 }, { "epoch": 0.7987617996812554, "grad_norm": 1.238485280395751, "learning_rate": 1.024926717013281e-06, "loss": 0.6288, "step": 26062 }, { "epoch": 0.7987924482039965, "grad_norm": 1.4526332475339352, "learning_rate": 1.0246256750503648e-06, "loss": 0.5773, "step": 26063 }, { "epoch": 0.7988230967267378, "grad_norm": 0.4365040366659157, "learning_rate": 1.024324672257827e-06, "loss": 0.382, "step": 26064 }, { "epoch": 0.7988537452494789, "grad_norm": 1.3238460209635312, "learning_rate": 1.0240237086386363e-06, "loss": 0.6046, "step": 26065 }, { "epoch": 0.7988843937722202, "grad_norm": 1.3237071123682616, "learning_rate": 1.023722784195758e-06, "loss": 0.5649, "step": 26066 }, { "epoch": 0.7989150422949614, "grad_norm": 1.5974868059491203, "learning_rate": 1.0234218989321564e-06, "loss": 0.5726, "step": 26067 }, { "epoch": 0.7989456908177026, "grad_norm": 0.43515555129426425, "learning_rate": 1.023121052850794e-06, "loss": 0.3938, "step": 26068 }, { "epoch": 0.7989763393404438, "grad_norm": 1.1890766843649463, "learning_rate": 1.0228202459546398e-06, "loss": 0.5769, "step": 26069 }, { "epoch": 0.799006987863185, "grad_norm": 1.3880752970678043, "learning_rate": 1.0225194782466546e-06, "loss": 0.6456, "step": 26070 }, { "epoch": 0.7990376363859262, "grad_norm": 1.3844172567670623, "learning_rate": 1.0222187497298037e-06, "loss": 0.6326, "step": 26071 }, { "epoch": 0.7990682849086674, "grad_norm": 1.496625200639839, "learning_rate": 1.0219180604070472e-06, "loss": 0.639, "step": 26072 }, { "epoch": 0.7990989334314086, "grad_norm": 1.3087520504114676, "learning_rate": 1.0216174102813504e-06, "loss": 0.4959, "step": 26073 }, { "epoch": 0.7991295819541498, "grad_norm": 1.44055726635423, "learning_rate": 1.0213167993556767e-06, "loss": 0.6763, "step": 26074 }, { "epoch": 0.799160230476891, "grad_norm": 1.3158731775295915, "learning_rate": 1.021016227632985e-06, "loss": 0.586, "step": 26075 }, { "epoch": 0.7991908789996323, "grad_norm": 1.4012357816999734, "learning_rate": 1.0207156951162384e-06, "loss": 0.6184, "step": 26076 }, { "epoch": 0.7992215275223734, "grad_norm": 1.2858364192749898, "learning_rate": 1.0204152018083995e-06, "loss": 0.6237, "step": 26077 }, { "epoch": 0.7992521760451147, "grad_norm": 1.4671657606429527, "learning_rate": 1.0201147477124284e-06, "loss": 0.5836, "step": 26078 }, { "epoch": 0.7992828245678558, "grad_norm": 1.3373789738031114, "learning_rate": 1.0198143328312816e-06, "loss": 0.5403, "step": 26079 }, { "epoch": 0.7993134730905971, "grad_norm": 0.45277925870861707, "learning_rate": 1.0195139571679258e-06, "loss": 0.3888, "step": 26080 }, { "epoch": 0.7993441216133382, "grad_norm": 1.2860925594411452, "learning_rate": 1.0192136207253156e-06, "loss": 0.624, "step": 26081 }, { "epoch": 0.7993747701360795, "grad_norm": 1.3758275867698688, "learning_rate": 1.0189133235064135e-06, "loss": 0.5568, "step": 26082 }, { "epoch": 0.7994054186588206, "grad_norm": 1.4763237586098314, "learning_rate": 1.0186130655141763e-06, "loss": 0.5943, "step": 26083 }, { "epoch": 0.7994360671815618, "grad_norm": 1.4249637694959933, "learning_rate": 1.0183128467515625e-06, "loss": 0.6412, "step": 26084 }, { "epoch": 0.799466715704303, "grad_norm": 1.2864357274997176, "learning_rate": 1.018012667221533e-06, "loss": 0.6284, "step": 26085 }, { "epoch": 0.7994973642270442, "grad_norm": 1.4129247484429532, "learning_rate": 1.0177125269270415e-06, "loss": 0.5578, "step": 26086 }, { "epoch": 0.7995280127497855, "grad_norm": 1.3450695489434186, "learning_rate": 1.0174124258710477e-06, "loss": 0.5998, "step": 26087 }, { "epoch": 0.7995586612725266, "grad_norm": 1.2504559988541477, "learning_rate": 1.0171123640565095e-06, "loss": 0.5831, "step": 26088 }, { "epoch": 0.7995893097952679, "grad_norm": 1.4992330207768092, "learning_rate": 1.0168123414863813e-06, "loss": 0.6436, "step": 26089 }, { "epoch": 0.799619958318009, "grad_norm": 1.2407868400973496, "learning_rate": 1.01651235816362e-06, "loss": 0.6287, "step": 26090 }, { "epoch": 0.7996506068407503, "grad_norm": 1.3153236302499587, "learning_rate": 1.0162124140911827e-06, "loss": 0.6184, "step": 26091 }, { "epoch": 0.7996812553634914, "grad_norm": 0.47889511416922337, "learning_rate": 1.015912509272023e-06, "loss": 0.3865, "step": 26092 }, { "epoch": 0.7997119038862327, "grad_norm": 1.2422164923930998, "learning_rate": 1.0156126437090973e-06, "loss": 0.7086, "step": 26093 }, { "epoch": 0.7997425524089739, "grad_norm": 1.4189790556819106, "learning_rate": 1.015312817405359e-06, "loss": 0.5834, "step": 26094 }, { "epoch": 0.7997732009317151, "grad_norm": 1.3304481230544174, "learning_rate": 1.0150130303637628e-06, "loss": 0.6141, "step": 26095 }, { "epoch": 0.7998038494544563, "grad_norm": 1.3232126658490833, "learning_rate": 1.0147132825872641e-06, "loss": 0.6411, "step": 26096 }, { "epoch": 0.7998344979771975, "grad_norm": 1.4235403527031416, "learning_rate": 1.0144135740788142e-06, "loss": 0.6063, "step": 26097 }, { "epoch": 0.7998651464999387, "grad_norm": 1.386828667409563, "learning_rate": 1.014113904841366e-06, "loss": 0.589, "step": 26098 }, { "epoch": 0.7998957950226799, "grad_norm": 1.413034884390436, "learning_rate": 1.0138142748778756e-06, "loss": 0.6745, "step": 26099 }, { "epoch": 0.7999264435454211, "grad_norm": 1.4368072003455203, "learning_rate": 1.0135146841912918e-06, "loss": 0.5766, "step": 26100 }, { "epoch": 0.7999570920681623, "grad_norm": 1.373606843290622, "learning_rate": 1.0132151327845674e-06, "loss": 0.593, "step": 26101 }, { "epoch": 0.7999877405909035, "grad_norm": 1.5094730398722398, "learning_rate": 1.012915620660656e-06, "loss": 0.5491, "step": 26102 }, { "epoch": 0.8000183891136448, "grad_norm": 1.286056820559514, "learning_rate": 1.0126161478225055e-06, "loss": 0.5076, "step": 26103 }, { "epoch": 0.8000490376363859, "grad_norm": 0.4599527604106152, "learning_rate": 1.01231671427307e-06, "loss": 0.3932, "step": 26104 }, { "epoch": 0.8000796861591272, "grad_norm": 1.537740666016933, "learning_rate": 1.0120173200152967e-06, "loss": 0.5892, "step": 26105 }, { "epoch": 0.8001103346818683, "grad_norm": 1.3790718668481028, "learning_rate": 1.011717965052137e-06, "loss": 0.6128, "step": 26106 }, { "epoch": 0.8001409832046096, "grad_norm": 1.2392162798597328, "learning_rate": 1.011418649386542e-06, "loss": 0.596, "step": 26107 }, { "epoch": 0.8001716317273507, "grad_norm": 0.45225291058184136, "learning_rate": 1.0111193730214581e-06, "loss": 0.4016, "step": 26108 }, { "epoch": 0.800202280250092, "grad_norm": 1.2071764988573606, "learning_rate": 1.0108201359598357e-06, "loss": 0.536, "step": 26109 }, { "epoch": 0.8002329287728331, "grad_norm": 1.479011623836651, "learning_rate": 1.0105209382046244e-06, "loss": 0.6036, "step": 26110 }, { "epoch": 0.8002635772955744, "grad_norm": 1.2931650997212767, "learning_rate": 1.01022177975877e-06, "loss": 0.5551, "step": 26111 }, { "epoch": 0.8002942258183156, "grad_norm": 1.2208860588152755, "learning_rate": 1.0099226606252226e-06, "loss": 0.5032, "step": 26112 }, { "epoch": 0.8003248743410568, "grad_norm": 1.3075503087884868, "learning_rate": 1.0096235808069266e-06, "loss": 0.6243, "step": 26113 }, { "epoch": 0.800355522863798, "grad_norm": 1.260052169632096, "learning_rate": 1.0093245403068308e-06, "loss": 0.646, "step": 26114 }, { "epoch": 0.8003861713865391, "grad_norm": 1.2618493365214092, "learning_rate": 1.0090255391278819e-06, "loss": 0.5934, "step": 26115 }, { "epoch": 0.8004168199092804, "grad_norm": 1.2862757161614047, "learning_rate": 1.008726577273025e-06, "loss": 0.505, "step": 26116 }, { "epoch": 0.8004474684320215, "grad_norm": 1.294960309761468, "learning_rate": 1.008427654745206e-06, "loss": 0.5791, "step": 26117 }, { "epoch": 0.8004781169547628, "grad_norm": 1.4705702122070161, "learning_rate": 1.008128771547372e-06, "loss": 0.6267, "step": 26118 }, { "epoch": 0.8005087654775039, "grad_norm": 1.521700775230297, "learning_rate": 1.007829927682465e-06, "loss": 0.6192, "step": 26119 }, { "epoch": 0.8005394140002452, "grad_norm": 1.3568049547021777, "learning_rate": 1.0075311231534314e-06, "loss": 0.5283, "step": 26120 }, { "epoch": 0.8005700625229863, "grad_norm": 1.3054988081785108, "learning_rate": 1.0072323579632165e-06, "loss": 0.6592, "step": 26121 }, { "epoch": 0.8006007110457276, "grad_norm": 1.325732929479909, "learning_rate": 1.0069336321147616e-06, "loss": 0.6242, "step": 26122 }, { "epoch": 0.8006313595684688, "grad_norm": 1.3749047939648755, "learning_rate": 1.006634945611012e-06, "loss": 0.6248, "step": 26123 }, { "epoch": 0.80066200809121, "grad_norm": 1.311530918970573, "learning_rate": 1.006336298454908e-06, "loss": 0.601, "step": 26124 }, { "epoch": 0.8006926566139512, "grad_norm": 0.4307115381263687, "learning_rate": 1.0060376906493968e-06, "loss": 0.3754, "step": 26125 }, { "epoch": 0.8007233051366924, "grad_norm": 1.3534860048130202, "learning_rate": 1.0057391221974178e-06, "loss": 0.5656, "step": 26126 }, { "epoch": 0.8007539536594336, "grad_norm": 1.2334561094602872, "learning_rate": 1.0054405931019124e-06, "loss": 0.5263, "step": 26127 }, { "epoch": 0.8007846021821748, "grad_norm": 0.44627771677557726, "learning_rate": 1.0051421033658226e-06, "loss": 0.3738, "step": 26128 }, { "epoch": 0.800815250704916, "grad_norm": 1.475214518631038, "learning_rate": 1.0048436529920908e-06, "loss": 0.6159, "step": 26129 }, { "epoch": 0.8008458992276573, "grad_norm": 1.185003835749406, "learning_rate": 1.004545241983656e-06, "loss": 0.6625, "step": 26130 }, { "epoch": 0.8008765477503984, "grad_norm": 1.3542011857744467, "learning_rate": 1.0042468703434588e-06, "loss": 0.6114, "step": 26131 }, { "epoch": 0.8009071962731397, "grad_norm": 1.3219435563074184, "learning_rate": 1.0039485380744408e-06, "loss": 0.6477, "step": 26132 }, { "epoch": 0.8009378447958808, "grad_norm": 1.3741072216909287, "learning_rate": 1.0036502451795393e-06, "loss": 0.5923, "step": 26133 }, { "epoch": 0.8009684933186221, "grad_norm": 1.3272943387981826, "learning_rate": 1.0033519916616958e-06, "loss": 0.6291, "step": 26134 }, { "epoch": 0.8009991418413632, "grad_norm": 1.2778089239934636, "learning_rate": 1.0030537775238447e-06, "loss": 0.6147, "step": 26135 }, { "epoch": 0.8010297903641045, "grad_norm": 1.287555048636982, "learning_rate": 1.00275560276893e-06, "loss": 0.6055, "step": 26136 }, { "epoch": 0.8010604388868456, "grad_norm": 1.2518307213005508, "learning_rate": 1.0024574673998871e-06, "loss": 0.5871, "step": 26137 }, { "epoch": 0.8010910874095869, "grad_norm": 1.3878256357721575, "learning_rate": 1.0021593714196525e-06, "loss": 0.6427, "step": 26138 }, { "epoch": 0.801121735932328, "grad_norm": 1.2280119545240737, "learning_rate": 1.0018613148311646e-06, "loss": 0.5373, "step": 26139 }, { "epoch": 0.8011523844550693, "grad_norm": 1.4455279275909612, "learning_rate": 1.0015632976373612e-06, "loss": 0.669, "step": 26140 }, { "epoch": 0.8011830329778105, "grad_norm": 0.4636544012312799, "learning_rate": 1.0012653198411765e-06, "loss": 0.3873, "step": 26141 }, { "epoch": 0.8012136815005517, "grad_norm": 1.3841085238630555, "learning_rate": 1.0009673814455478e-06, "loss": 0.5375, "step": 26142 }, { "epoch": 0.8012443300232929, "grad_norm": 1.489476205712825, "learning_rate": 1.0006694824534109e-06, "loss": 0.6416, "step": 26143 }, { "epoch": 0.8012749785460341, "grad_norm": 0.44074906938335384, "learning_rate": 1.0003716228677018e-06, "loss": 0.3973, "step": 26144 }, { "epoch": 0.8013056270687753, "grad_norm": 1.2565594300689176, "learning_rate": 1.0000738026913542e-06, "loss": 0.6225, "step": 26145 }, { "epoch": 0.8013362755915164, "grad_norm": 1.2794669633558506, "learning_rate": 9.997760219273006e-07, "loss": 0.5946, "step": 26146 }, { "epoch": 0.8013669241142577, "grad_norm": 1.3932819677402692, "learning_rate": 9.994782805784798e-07, "loss": 0.5665, "step": 26147 }, { "epoch": 0.8013975726369988, "grad_norm": 0.4657533621869864, "learning_rate": 9.99180578647823e-07, "loss": 0.4127, "step": 26148 }, { "epoch": 0.8014282211597401, "grad_norm": 1.2728283687330066, "learning_rate": 9.98882916138263e-07, "loss": 0.6488, "step": 26149 }, { "epoch": 0.8014588696824813, "grad_norm": 1.3133998451433164, "learning_rate": 9.985852930527329e-07, "loss": 0.6353, "step": 26150 }, { "epoch": 0.8014895182052225, "grad_norm": 1.265538125947871, "learning_rate": 9.982877093941655e-07, "loss": 0.5817, "step": 26151 }, { "epoch": 0.8015201667279637, "grad_norm": 0.45442515631180735, "learning_rate": 9.979901651654944e-07, "loss": 0.3846, "step": 26152 }, { "epoch": 0.8015508152507049, "grad_norm": 1.3830549216422876, "learning_rate": 9.97692660369649e-07, "loss": 0.6407, "step": 26153 }, { "epoch": 0.8015814637734461, "grad_norm": 1.293673353850775, "learning_rate": 9.973951950095624e-07, "loss": 0.6353, "step": 26154 }, { "epoch": 0.8016121122961873, "grad_norm": 0.4453524225560449, "learning_rate": 9.970977690881656e-07, "loss": 0.3945, "step": 26155 }, { "epoch": 0.8016427608189285, "grad_norm": 1.5562428651925198, "learning_rate": 9.968003826083889e-07, "loss": 0.4906, "step": 26156 }, { "epoch": 0.8016734093416698, "grad_norm": 1.232610115864175, "learning_rate": 9.965030355731614e-07, "loss": 0.5904, "step": 26157 }, { "epoch": 0.8017040578644109, "grad_norm": 1.348960783924823, "learning_rate": 9.962057279854132e-07, "loss": 0.623, "step": 26158 }, { "epoch": 0.8017347063871522, "grad_norm": 1.552645842071631, "learning_rate": 9.959084598480762e-07, "loss": 0.6844, "step": 26159 }, { "epoch": 0.8017653549098933, "grad_norm": 1.3024163852949047, "learning_rate": 9.956112311640758e-07, "loss": 0.6038, "step": 26160 }, { "epoch": 0.8017960034326346, "grad_norm": 1.2805575382747731, "learning_rate": 9.953140419363433e-07, "loss": 0.6317, "step": 26161 }, { "epoch": 0.8018266519553757, "grad_norm": 1.253997018525803, "learning_rate": 9.950168921678056e-07, "loss": 0.5871, "step": 26162 }, { "epoch": 0.801857300478117, "grad_norm": 0.42656787489953707, "learning_rate": 9.947197818613923e-07, "loss": 0.392, "step": 26163 }, { "epoch": 0.8018879490008581, "grad_norm": 1.2729468301840778, "learning_rate": 9.944227110200305e-07, "loss": 0.5368, "step": 26164 }, { "epoch": 0.8019185975235994, "grad_norm": 1.31215765549378, "learning_rate": 9.941256796466432e-07, "loss": 0.609, "step": 26165 }, { "epoch": 0.8019492460463405, "grad_norm": 1.4523740531901344, "learning_rate": 9.938286877441639e-07, "loss": 0.6101, "step": 26166 }, { "epoch": 0.8019798945690818, "grad_norm": 1.2484779175745455, "learning_rate": 9.93531735315515e-07, "loss": 0.5349, "step": 26167 }, { "epoch": 0.802010543091823, "grad_norm": 0.44876409896249503, "learning_rate": 9.932348223636217e-07, "loss": 0.4143, "step": 26168 }, { "epoch": 0.8020411916145642, "grad_norm": 0.4630682680768631, "learning_rate": 9.92937948891411e-07, "loss": 0.3921, "step": 26169 }, { "epoch": 0.8020718401373054, "grad_norm": 1.485612311660052, "learning_rate": 9.92641114901808e-07, "loss": 0.6841, "step": 26170 }, { "epoch": 0.8021024886600466, "grad_norm": 1.3192946931299858, "learning_rate": 9.92344320397739e-07, "loss": 0.595, "step": 26171 }, { "epoch": 0.8021331371827878, "grad_norm": 1.4000581675663448, "learning_rate": 9.920475653821248e-07, "loss": 0.5839, "step": 26172 }, { "epoch": 0.802163785705529, "grad_norm": 1.36645459037609, "learning_rate": 9.91750849857892e-07, "loss": 0.6392, "step": 26173 }, { "epoch": 0.8021944342282702, "grad_norm": 1.2647144352091366, "learning_rate": 9.914541738279648e-07, "loss": 0.5332, "step": 26174 }, { "epoch": 0.8022250827510115, "grad_norm": 1.3402378388056273, "learning_rate": 9.911575372952653e-07, "loss": 0.6636, "step": 26175 }, { "epoch": 0.8022557312737526, "grad_norm": 1.3460697497794667, "learning_rate": 9.908609402627135e-07, "loss": 0.6113, "step": 26176 }, { "epoch": 0.8022863797964938, "grad_norm": 1.3512329404360197, "learning_rate": 9.905643827332373e-07, "loss": 0.5649, "step": 26177 }, { "epoch": 0.802317028319235, "grad_norm": 1.2786165132155447, "learning_rate": 9.902678647097547e-07, "loss": 0.707, "step": 26178 }, { "epoch": 0.8023476768419762, "grad_norm": 1.371761948279537, "learning_rate": 9.899713861951905e-07, "loss": 0.6261, "step": 26179 }, { "epoch": 0.8023783253647174, "grad_norm": 1.2109510759585067, "learning_rate": 9.896749471924627e-07, "loss": 0.5583, "step": 26180 }, { "epoch": 0.8024089738874586, "grad_norm": 1.2923703094312402, "learning_rate": 9.893785477044936e-07, "loss": 0.5703, "step": 26181 }, { "epoch": 0.8024396224101998, "grad_norm": 1.3437261368365316, "learning_rate": 9.89082187734205e-07, "loss": 0.6187, "step": 26182 }, { "epoch": 0.802470270932941, "grad_norm": 1.3372989011845962, "learning_rate": 9.887858672845146e-07, "loss": 0.5596, "step": 26183 }, { "epoch": 0.8025009194556822, "grad_norm": 1.1985880812267242, "learning_rate": 9.884895863583437e-07, "loss": 0.4687, "step": 26184 }, { "epoch": 0.8025315679784234, "grad_norm": 1.4211996648293672, "learning_rate": 9.881933449586123e-07, "loss": 0.6129, "step": 26185 }, { "epoch": 0.8025622165011647, "grad_norm": 1.368312663681409, "learning_rate": 9.878971430882388e-07, "loss": 0.5354, "step": 26186 }, { "epoch": 0.8025928650239058, "grad_norm": 1.31958826019113, "learning_rate": 9.87600980750138e-07, "loss": 0.6535, "step": 26187 }, { "epoch": 0.8026235135466471, "grad_norm": 1.2190666858324053, "learning_rate": 9.873048579472344e-07, "loss": 0.5691, "step": 26188 }, { "epoch": 0.8026541620693882, "grad_norm": 1.226051257706415, "learning_rate": 9.870087746824414e-07, "loss": 0.5346, "step": 26189 }, { "epoch": 0.8026848105921295, "grad_norm": 1.3889149282326025, "learning_rate": 9.86712730958679e-07, "loss": 0.6264, "step": 26190 }, { "epoch": 0.8027154591148706, "grad_norm": 1.3116359184689041, "learning_rate": 9.864167267788615e-07, "loss": 0.5755, "step": 26191 }, { "epoch": 0.8027461076376119, "grad_norm": 1.417883620241957, "learning_rate": 9.861207621459068e-07, "loss": 0.6041, "step": 26192 }, { "epoch": 0.802776756160353, "grad_norm": 1.273210197396744, "learning_rate": 9.858248370627327e-07, "loss": 0.6394, "step": 26193 }, { "epoch": 0.8028074046830943, "grad_norm": 1.341040236551717, "learning_rate": 9.855289515322524e-07, "loss": 0.6336, "step": 26194 }, { "epoch": 0.8028380532058355, "grad_norm": 1.3284763032187918, "learning_rate": 9.852331055573822e-07, "loss": 0.6056, "step": 26195 }, { "epoch": 0.8028687017285767, "grad_norm": 1.2840018367567305, "learning_rate": 9.849372991410388e-07, "loss": 0.5899, "step": 26196 }, { "epoch": 0.8028993502513179, "grad_norm": 1.2759644988052716, "learning_rate": 9.84641532286134e-07, "loss": 0.5678, "step": 26197 }, { "epoch": 0.8029299987740591, "grad_norm": 1.365716310021518, "learning_rate": 9.843458049955839e-07, "loss": 0.6961, "step": 26198 }, { "epoch": 0.8029606472968003, "grad_norm": 1.6357447332377708, "learning_rate": 9.840501172723033e-07, "loss": 0.6188, "step": 26199 }, { "epoch": 0.8029912958195415, "grad_norm": 1.302728210441366, "learning_rate": 9.83754469119203e-07, "loss": 0.6331, "step": 26200 }, { "epoch": 0.8030219443422827, "grad_norm": 1.248446320820193, "learning_rate": 9.834588605391988e-07, "loss": 0.5753, "step": 26201 }, { "epoch": 0.803052592865024, "grad_norm": 1.3765339735117643, "learning_rate": 9.831632915352013e-07, "loss": 0.5987, "step": 26202 }, { "epoch": 0.8030832413877651, "grad_norm": 1.3887798089181862, "learning_rate": 9.828677621101229e-07, "loss": 0.6218, "step": 26203 }, { "epoch": 0.8031138899105064, "grad_norm": 1.35728521961736, "learning_rate": 9.82572272266878e-07, "loss": 0.6238, "step": 26204 }, { "epoch": 0.8031445384332475, "grad_norm": 1.4693385857436911, "learning_rate": 9.822768220083751e-07, "loss": 0.6328, "step": 26205 }, { "epoch": 0.8031751869559888, "grad_norm": 1.2800176055647852, "learning_rate": 9.819814113375264e-07, "loss": 0.5446, "step": 26206 }, { "epoch": 0.8032058354787299, "grad_norm": 1.369396948576716, "learning_rate": 9.816860402572442e-07, "loss": 0.5845, "step": 26207 }, { "epoch": 0.8032364840014711, "grad_norm": 1.2663712561190472, "learning_rate": 9.813907087704366e-07, "loss": 0.5603, "step": 26208 }, { "epoch": 0.8032671325242123, "grad_norm": 1.2721951504074624, "learning_rate": 9.810954168800157e-07, "loss": 0.5216, "step": 26209 }, { "epoch": 0.8032977810469535, "grad_norm": 1.2662699276132294, "learning_rate": 9.808001645888888e-07, "loss": 0.6805, "step": 26210 }, { "epoch": 0.8033284295696947, "grad_norm": 1.2476418821355786, "learning_rate": 9.80504951899966e-07, "loss": 0.5082, "step": 26211 }, { "epoch": 0.8033590780924359, "grad_norm": 1.3194838593335474, "learning_rate": 9.802097788161574e-07, "loss": 0.6743, "step": 26212 }, { "epoch": 0.8033897266151772, "grad_norm": 1.3166263537506933, "learning_rate": 9.799146453403696e-07, "loss": 0.4915, "step": 26213 }, { "epoch": 0.8034203751379183, "grad_norm": 1.1840180150302009, "learning_rate": 9.796195514755107e-07, "loss": 0.6, "step": 26214 }, { "epoch": 0.8034510236606596, "grad_norm": 2.2930144639668044, "learning_rate": 9.79324497224491e-07, "loss": 0.5586, "step": 26215 }, { "epoch": 0.8034816721834007, "grad_norm": 1.184457746686482, "learning_rate": 9.790294825902141e-07, "loss": 0.5845, "step": 26216 }, { "epoch": 0.803512320706142, "grad_norm": 1.40988938627372, "learning_rate": 9.78734507575589e-07, "loss": 0.6285, "step": 26217 }, { "epoch": 0.8035429692288831, "grad_norm": 1.3657688922105018, "learning_rate": 9.784395721835222e-07, "loss": 0.6404, "step": 26218 }, { "epoch": 0.8035736177516244, "grad_norm": 1.2237753933899056, "learning_rate": 9.781446764169184e-07, "loss": 0.6154, "step": 26219 }, { "epoch": 0.8036042662743655, "grad_norm": 1.3225390278821458, "learning_rate": 9.778498202786858e-07, "loss": 0.6006, "step": 26220 }, { "epoch": 0.8036349147971068, "grad_norm": 0.4600348587041672, "learning_rate": 9.775550037717263e-07, "loss": 0.3837, "step": 26221 }, { "epoch": 0.803665563319848, "grad_norm": 1.417056672282965, "learning_rate": 9.772602268989462e-07, "loss": 0.6126, "step": 26222 }, { "epoch": 0.8036962118425892, "grad_norm": 1.1018946112107186, "learning_rate": 9.769654896632524e-07, "loss": 0.4697, "step": 26223 }, { "epoch": 0.8037268603653304, "grad_norm": 1.260469382452452, "learning_rate": 9.76670792067545e-07, "loss": 0.5867, "step": 26224 }, { "epoch": 0.8037575088880716, "grad_norm": 1.4219239620714754, "learning_rate": 9.763761341147299e-07, "loss": 0.5993, "step": 26225 }, { "epoch": 0.8037881574108128, "grad_norm": 1.1623288902167748, "learning_rate": 9.76081515807712e-07, "loss": 0.6335, "step": 26226 }, { "epoch": 0.803818805933554, "grad_norm": 1.3836445609181656, "learning_rate": 9.757869371493906e-07, "loss": 0.6085, "step": 26227 }, { "epoch": 0.8038494544562952, "grad_norm": 1.3734450591030933, "learning_rate": 9.754923981426706e-07, "loss": 0.5631, "step": 26228 }, { "epoch": 0.8038801029790364, "grad_norm": 1.3446468350662597, "learning_rate": 9.751978987904547e-07, "loss": 0.5312, "step": 26229 }, { "epoch": 0.8039107515017776, "grad_norm": 1.2396874452906608, "learning_rate": 9.749034390956424e-07, "loss": 0.5676, "step": 26230 }, { "epoch": 0.8039414000245189, "grad_norm": 1.3529474582761818, "learning_rate": 9.74609019061138e-07, "loss": 0.6484, "step": 26231 }, { "epoch": 0.80397204854726, "grad_norm": 1.3825318495108017, "learning_rate": 9.74314638689839e-07, "loss": 0.6157, "step": 26232 }, { "epoch": 0.8040026970700013, "grad_norm": 1.42299454744733, "learning_rate": 9.74020297984648e-07, "loss": 0.6253, "step": 26233 }, { "epoch": 0.8040333455927424, "grad_norm": 1.3226009399308745, "learning_rate": 9.73725996948467e-07, "loss": 0.6795, "step": 26234 }, { "epoch": 0.8040639941154837, "grad_norm": 1.5275971623318896, "learning_rate": 9.734317355841922e-07, "loss": 0.6874, "step": 26235 }, { "epoch": 0.8040946426382248, "grad_norm": 1.4561030369025487, "learning_rate": 9.731375138947246e-07, "loss": 0.6635, "step": 26236 }, { "epoch": 0.8041252911609661, "grad_norm": 1.2405263289720976, "learning_rate": 9.72843331882965e-07, "loss": 0.5295, "step": 26237 }, { "epoch": 0.8041559396837072, "grad_norm": 1.3134534934275948, "learning_rate": 9.725491895518092e-07, "loss": 0.5924, "step": 26238 }, { "epoch": 0.8041865882064484, "grad_norm": 1.331834332172165, "learning_rate": 9.722550869041563e-07, "loss": 0.6017, "step": 26239 }, { "epoch": 0.8042172367291897, "grad_norm": 1.5306133696984308, "learning_rate": 9.719610239429062e-07, "loss": 0.7403, "step": 26240 }, { "epoch": 0.8042478852519308, "grad_norm": 1.466968365222212, "learning_rate": 9.716670006709533e-07, "loss": 0.5738, "step": 26241 }, { "epoch": 0.8042785337746721, "grad_norm": 1.2866914316057831, "learning_rate": 9.713730170911973e-07, "loss": 0.5224, "step": 26242 }, { "epoch": 0.8043091822974132, "grad_norm": 1.3247854982184646, "learning_rate": 9.71079073206531e-07, "loss": 0.5785, "step": 26243 }, { "epoch": 0.8043398308201545, "grad_norm": 1.2437627068968502, "learning_rate": 9.707851690198565e-07, "loss": 0.5741, "step": 26244 }, { "epoch": 0.8043704793428956, "grad_norm": 1.213998028061593, "learning_rate": 9.704913045340664e-07, "loss": 0.5104, "step": 26245 }, { "epoch": 0.8044011278656369, "grad_norm": 0.4551919462382254, "learning_rate": 9.701974797520553e-07, "loss": 0.3953, "step": 26246 }, { "epoch": 0.804431776388378, "grad_norm": 1.348135511927475, "learning_rate": 9.6990369467672e-07, "loss": 0.6081, "step": 26247 }, { "epoch": 0.8044624249111193, "grad_norm": 0.4694901107686813, "learning_rate": 9.69609949310955e-07, "loss": 0.406, "step": 26248 }, { "epoch": 0.8044930734338605, "grad_norm": 1.4861926206058302, "learning_rate": 9.693162436576537e-07, "loss": 0.6732, "step": 26249 }, { "epoch": 0.8045237219566017, "grad_norm": 1.3161598632061775, "learning_rate": 9.690225777197104e-07, "loss": 0.6577, "step": 26250 }, { "epoch": 0.8045543704793429, "grad_norm": 1.3467753957200062, "learning_rate": 9.687289515000192e-07, "loss": 0.6386, "step": 26251 }, { "epoch": 0.8045850190020841, "grad_norm": 1.3012073868208989, "learning_rate": 9.684353650014749e-07, "loss": 0.6027, "step": 26252 }, { "epoch": 0.8046156675248253, "grad_norm": 1.4213019880418125, "learning_rate": 9.681418182269682e-07, "loss": 0.5857, "step": 26253 }, { "epoch": 0.8046463160475665, "grad_norm": 1.2637840403506613, "learning_rate": 9.678483111793896e-07, "loss": 0.5412, "step": 26254 }, { "epoch": 0.8046769645703077, "grad_norm": 1.3827173469929332, "learning_rate": 9.67554843861634e-07, "loss": 0.5702, "step": 26255 }, { "epoch": 0.804707613093049, "grad_norm": 1.2524610779067757, "learning_rate": 9.672614162765936e-07, "loss": 0.5777, "step": 26256 }, { "epoch": 0.8047382616157901, "grad_norm": 1.3058288432081682, "learning_rate": 9.66968028427157e-07, "loss": 0.6032, "step": 26257 }, { "epoch": 0.8047689101385314, "grad_norm": 1.2844237294324663, "learning_rate": 9.666746803162163e-07, "loss": 0.578, "step": 26258 }, { "epoch": 0.8047995586612725, "grad_norm": 0.46373115425792194, "learning_rate": 9.663813719466631e-07, "loss": 0.4074, "step": 26259 }, { "epoch": 0.8048302071840138, "grad_norm": 0.4462342255056604, "learning_rate": 9.660881033213847e-07, "loss": 0.388, "step": 26260 }, { "epoch": 0.8048608557067549, "grad_norm": 1.1825290602355887, "learning_rate": 9.657948744432743e-07, "loss": 0.5239, "step": 26261 }, { "epoch": 0.8048915042294962, "grad_norm": 1.2265029788357458, "learning_rate": 9.65501685315216e-07, "loss": 0.5462, "step": 26262 }, { "epoch": 0.8049221527522373, "grad_norm": 1.2270400979405016, "learning_rate": 9.652085359401047e-07, "loss": 0.5235, "step": 26263 }, { "epoch": 0.8049528012749786, "grad_norm": 0.4442652198064148, "learning_rate": 9.64915426320826e-07, "loss": 0.3894, "step": 26264 }, { "epoch": 0.8049834497977197, "grad_norm": 1.3873566483491488, "learning_rate": 9.64622356460267e-07, "loss": 0.6035, "step": 26265 }, { "epoch": 0.805014098320461, "grad_norm": 0.46078890341084855, "learning_rate": 9.643293263613162e-07, "loss": 0.3864, "step": 26266 }, { "epoch": 0.8050447468432022, "grad_norm": 1.578079096883303, "learning_rate": 9.640363360268623e-07, "loss": 0.6511, "step": 26267 }, { "epoch": 0.8050753953659434, "grad_norm": 1.4407936200469407, "learning_rate": 9.6374338545979e-07, "loss": 0.5827, "step": 26268 }, { "epoch": 0.8051060438886846, "grad_norm": 1.2750063767649094, "learning_rate": 9.634504746629863e-07, "loss": 0.5948, "step": 26269 }, { "epoch": 0.8051366924114257, "grad_norm": 1.3811162834760367, "learning_rate": 9.631576036393386e-07, "loss": 0.4663, "step": 26270 }, { "epoch": 0.805167340934167, "grad_norm": 1.6548513415378237, "learning_rate": 9.628647723917329e-07, "loss": 0.5303, "step": 26271 }, { "epoch": 0.8051979894569081, "grad_norm": 1.1783902489811917, "learning_rate": 9.625719809230532e-07, "loss": 0.5611, "step": 26272 }, { "epoch": 0.8052286379796494, "grad_norm": 1.3131465361020769, "learning_rate": 9.622792292361827e-07, "loss": 0.5215, "step": 26273 }, { "epoch": 0.8052592865023905, "grad_norm": 1.314100696840957, "learning_rate": 9.619865173340105e-07, "loss": 0.6538, "step": 26274 }, { "epoch": 0.8052899350251318, "grad_norm": 1.1605698132315283, "learning_rate": 9.61693845219418e-07, "loss": 0.6518, "step": 26275 }, { "epoch": 0.805320583547873, "grad_norm": 1.1472714141059002, "learning_rate": 9.614012128952888e-07, "loss": 0.5061, "step": 26276 }, { "epoch": 0.8053512320706142, "grad_norm": 1.1881169524041701, "learning_rate": 9.61108620364506e-07, "loss": 0.6066, "step": 26277 }, { "epoch": 0.8053818805933554, "grad_norm": 1.2483774471442066, "learning_rate": 9.608160676299534e-07, "loss": 0.5476, "step": 26278 }, { "epoch": 0.8054125291160966, "grad_norm": 1.3183178096338672, "learning_rate": 9.605235546945152e-07, "loss": 0.5433, "step": 26279 }, { "epoch": 0.8054431776388378, "grad_norm": 1.243929508258993, "learning_rate": 9.602310815610705e-07, "loss": 0.4998, "step": 26280 }, { "epoch": 0.805473826161579, "grad_norm": 1.4912085905434902, "learning_rate": 9.599386482325024e-07, "loss": 0.5307, "step": 26281 }, { "epoch": 0.8055044746843202, "grad_norm": 1.2043890891553244, "learning_rate": 9.596462547116942e-07, "loss": 0.5661, "step": 26282 }, { "epoch": 0.8055351232070614, "grad_norm": 1.4484667774784257, "learning_rate": 9.593539010015245e-07, "loss": 0.6118, "step": 26283 }, { "epoch": 0.8055657717298026, "grad_norm": 1.3923479227409463, "learning_rate": 9.59061587104873e-07, "loss": 0.5429, "step": 26284 }, { "epoch": 0.8055964202525439, "grad_norm": 1.2873317120204437, "learning_rate": 9.587693130246235e-07, "loss": 0.6298, "step": 26285 }, { "epoch": 0.805627068775285, "grad_norm": 1.302540987015517, "learning_rate": 9.584770787636543e-07, "loss": 0.61, "step": 26286 }, { "epoch": 0.8056577172980263, "grad_norm": 1.2420007749169084, "learning_rate": 9.58184884324843e-07, "loss": 0.5339, "step": 26287 }, { "epoch": 0.8056883658207674, "grad_norm": 1.487500952245604, "learning_rate": 9.578927297110701e-07, "loss": 0.6888, "step": 26288 }, { "epoch": 0.8057190143435087, "grad_norm": 1.3255996109679529, "learning_rate": 9.576006149252148e-07, "loss": 0.5189, "step": 26289 }, { "epoch": 0.8057496628662498, "grad_norm": 0.4468151800887417, "learning_rate": 9.573085399701558e-07, "loss": 0.3914, "step": 26290 }, { "epoch": 0.8057803113889911, "grad_norm": 1.3124587776210588, "learning_rate": 9.57016504848769e-07, "loss": 0.4911, "step": 26291 }, { "epoch": 0.8058109599117322, "grad_norm": 1.396716674252809, "learning_rate": 9.567245095639333e-07, "loss": 0.5806, "step": 26292 }, { "epoch": 0.8058416084344735, "grad_norm": 1.3339947533151302, "learning_rate": 9.564325541185266e-07, "loss": 0.681, "step": 26293 }, { "epoch": 0.8058722569572146, "grad_norm": 1.3499230073583997, "learning_rate": 9.561406385154243e-07, "loss": 0.5987, "step": 26294 }, { "epoch": 0.8059029054799559, "grad_norm": 1.2563546130704455, "learning_rate": 9.558487627575002e-07, "loss": 0.6052, "step": 26295 }, { "epoch": 0.8059335540026971, "grad_norm": 1.3189580018116969, "learning_rate": 9.55556926847636e-07, "loss": 0.6049, "step": 26296 }, { "epoch": 0.8059642025254383, "grad_norm": 0.4330599733140347, "learning_rate": 9.552651307887028e-07, "loss": 0.3781, "step": 26297 }, { "epoch": 0.8059948510481795, "grad_norm": 1.4465660287407314, "learning_rate": 9.549733745835787e-07, "loss": 0.5774, "step": 26298 }, { "epoch": 0.8060254995709207, "grad_norm": 1.5019544336382793, "learning_rate": 9.546816582351354e-07, "loss": 0.5946, "step": 26299 }, { "epoch": 0.8060561480936619, "grad_norm": 1.2715599952425491, "learning_rate": 9.543899817462488e-07, "loss": 0.6014, "step": 26300 }, { "epoch": 0.806086796616403, "grad_norm": 1.558013435503534, "learning_rate": 9.540983451197939e-07, "loss": 0.6148, "step": 26301 }, { "epoch": 0.8061174451391443, "grad_norm": 1.532416409712163, "learning_rate": 9.538067483586422e-07, "loss": 0.6205, "step": 26302 }, { "epoch": 0.8061480936618854, "grad_norm": 0.4559239178329735, "learning_rate": 9.535151914656676e-07, "loss": 0.4019, "step": 26303 }, { "epoch": 0.8061787421846267, "grad_norm": 1.470641077925289, "learning_rate": 9.532236744437445e-07, "loss": 0.675, "step": 26304 }, { "epoch": 0.8062093907073679, "grad_norm": 1.5179810177677167, "learning_rate": 9.529321972957428e-07, "loss": 0.6202, "step": 26305 }, { "epoch": 0.8062400392301091, "grad_norm": 1.2884453391992738, "learning_rate": 9.526407600245369e-07, "loss": 0.6106, "step": 26306 }, { "epoch": 0.8062706877528503, "grad_norm": 1.4286447308762575, "learning_rate": 9.523493626329961e-07, "loss": 0.6092, "step": 26307 }, { "epoch": 0.8063013362755915, "grad_norm": 1.5747178573459963, "learning_rate": 9.520580051239925e-07, "loss": 0.6588, "step": 26308 }, { "epoch": 0.8063319847983327, "grad_norm": 1.1943118824818266, "learning_rate": 9.517666875003978e-07, "loss": 0.543, "step": 26309 }, { "epoch": 0.8063626333210739, "grad_norm": 1.3842185000834646, "learning_rate": 9.514754097650813e-07, "loss": 0.6465, "step": 26310 }, { "epoch": 0.8063932818438151, "grad_norm": 1.4490281077350378, "learning_rate": 9.51184171920913e-07, "loss": 0.5515, "step": 26311 }, { "epoch": 0.8064239303665564, "grad_norm": 0.4404479076040062, "learning_rate": 9.508929739707639e-07, "loss": 0.3896, "step": 26312 }, { "epoch": 0.8064545788892975, "grad_norm": 1.4193210184963638, "learning_rate": 9.506018159175029e-07, "loss": 0.5576, "step": 26313 }, { "epoch": 0.8064852274120388, "grad_norm": 1.299351237555776, "learning_rate": 9.503106977639959e-07, "loss": 0.5699, "step": 26314 }, { "epoch": 0.8065158759347799, "grad_norm": 1.2641955945907968, "learning_rate": 9.500196195131156e-07, "loss": 0.5718, "step": 26315 }, { "epoch": 0.8065465244575212, "grad_norm": 1.4126689536708197, "learning_rate": 9.497285811677276e-07, "loss": 0.6013, "step": 26316 }, { "epoch": 0.8065771729802623, "grad_norm": 1.4292303075719401, "learning_rate": 9.494375827307012e-07, "loss": 0.6401, "step": 26317 }, { "epoch": 0.8066078215030036, "grad_norm": 1.359306396547591, "learning_rate": 9.491466242049014e-07, "loss": 0.5549, "step": 26318 }, { "epoch": 0.8066384700257447, "grad_norm": 1.3340997758012523, "learning_rate": 9.488557055931963e-07, "loss": 0.6508, "step": 26319 }, { "epoch": 0.806669118548486, "grad_norm": 1.3974480896270118, "learning_rate": 9.485648268984538e-07, "loss": 0.5992, "step": 26320 }, { "epoch": 0.8066997670712271, "grad_norm": 1.2063561693831817, "learning_rate": 9.482739881235375e-07, "loss": 0.5668, "step": 26321 }, { "epoch": 0.8067304155939684, "grad_norm": 1.4573504837607274, "learning_rate": 9.479831892713143e-07, "loss": 0.53, "step": 26322 }, { "epoch": 0.8067610641167096, "grad_norm": 1.2363678207082938, "learning_rate": 9.476924303446505e-07, "loss": 0.5708, "step": 26323 }, { "epoch": 0.8067917126394508, "grad_norm": 1.2789270827524506, "learning_rate": 9.47401711346409e-07, "loss": 0.5525, "step": 26324 }, { "epoch": 0.806822361162192, "grad_norm": 1.4697192412681555, "learning_rate": 9.471110322794552e-07, "loss": 0.5398, "step": 26325 }, { "epoch": 0.8068530096849332, "grad_norm": 1.4964646440758005, "learning_rate": 9.468203931466546e-07, "loss": 0.6709, "step": 26326 }, { "epoch": 0.8068836582076744, "grad_norm": 1.3050105667210348, "learning_rate": 9.465297939508688e-07, "loss": 0.5974, "step": 26327 }, { "epoch": 0.8069143067304156, "grad_norm": 1.3216966802560872, "learning_rate": 9.462392346949629e-07, "loss": 0.6257, "step": 26328 }, { "epoch": 0.8069449552531568, "grad_norm": 1.4859393123521931, "learning_rate": 9.459487153817981e-07, "loss": 0.5384, "step": 26329 }, { "epoch": 0.806975603775898, "grad_norm": 1.4044826450448462, "learning_rate": 9.456582360142375e-07, "loss": 0.5932, "step": 26330 }, { "epoch": 0.8070062522986392, "grad_norm": 1.461171950477337, "learning_rate": 9.453677965951452e-07, "loss": 0.4993, "step": 26331 }, { "epoch": 0.8070369008213804, "grad_norm": 1.3736464584436814, "learning_rate": 9.450773971273797e-07, "loss": 0.5688, "step": 26332 }, { "epoch": 0.8070675493441216, "grad_norm": 0.44536106433543604, "learning_rate": 9.447870376138047e-07, "loss": 0.3847, "step": 26333 }, { "epoch": 0.8070981978668628, "grad_norm": 1.3525702613802841, "learning_rate": 9.444967180572817e-07, "loss": 0.6172, "step": 26334 }, { "epoch": 0.807128846389604, "grad_norm": 1.3441224319580451, "learning_rate": 9.442064384606687e-07, "loss": 0.5508, "step": 26335 }, { "epoch": 0.8071594949123452, "grad_norm": 1.2399379211297605, "learning_rate": 9.439161988268275e-07, "loss": 0.5386, "step": 26336 }, { "epoch": 0.8071901434350864, "grad_norm": 0.45229575870921807, "learning_rate": 9.436259991586188e-07, "loss": 0.3803, "step": 26337 }, { "epoch": 0.8072207919578276, "grad_norm": 1.3850586875131452, "learning_rate": 9.433358394589003e-07, "loss": 0.6801, "step": 26338 }, { "epoch": 0.8072514404805688, "grad_norm": 1.2464135159468241, "learning_rate": 9.430457197305326e-07, "loss": 0.6553, "step": 26339 }, { "epoch": 0.80728208900331, "grad_norm": 1.3494408162734632, "learning_rate": 9.427556399763721e-07, "loss": 0.6022, "step": 26340 }, { "epoch": 0.8073127375260513, "grad_norm": 1.2197110551757055, "learning_rate": 9.424656001992788e-07, "loss": 0.5797, "step": 26341 }, { "epoch": 0.8073433860487924, "grad_norm": 1.2814569666758404, "learning_rate": 9.42175600402111e-07, "loss": 0.647, "step": 26342 }, { "epoch": 0.8073740345715337, "grad_norm": 1.5195982298436927, "learning_rate": 9.418856405877241e-07, "loss": 0.6098, "step": 26343 }, { "epoch": 0.8074046830942748, "grad_norm": 1.1789698036202052, "learning_rate": 9.415957207589765e-07, "loss": 0.5211, "step": 26344 }, { "epoch": 0.8074353316170161, "grad_norm": 1.3953527123734868, "learning_rate": 9.413058409187254e-07, "loss": 0.5106, "step": 26345 }, { "epoch": 0.8074659801397572, "grad_norm": 0.45513773779419764, "learning_rate": 9.410160010698255e-07, "loss": 0.3934, "step": 26346 }, { "epoch": 0.8074966286624985, "grad_norm": 1.33167617900191, "learning_rate": 9.40726201215133e-07, "loss": 0.6667, "step": 26347 }, { "epoch": 0.8075272771852396, "grad_norm": 0.43983087952907995, "learning_rate": 9.404364413575057e-07, "loss": 0.3936, "step": 26348 }, { "epoch": 0.8075579257079809, "grad_norm": 1.3131666710472927, "learning_rate": 9.401467214997956e-07, "loss": 0.5935, "step": 26349 }, { "epoch": 0.8075885742307221, "grad_norm": 1.4359126241918136, "learning_rate": 9.398570416448593e-07, "loss": 0.5411, "step": 26350 }, { "epoch": 0.8076192227534633, "grad_norm": 1.2630027834441335, "learning_rate": 9.395674017955492e-07, "loss": 0.5068, "step": 26351 }, { "epoch": 0.8076498712762045, "grad_norm": 1.4626790933940463, "learning_rate": 9.392778019547205e-07, "loss": 0.6259, "step": 26352 }, { "epoch": 0.8076805197989457, "grad_norm": 1.5518490027443503, "learning_rate": 9.389882421252284e-07, "loss": 0.6706, "step": 26353 }, { "epoch": 0.8077111683216869, "grad_norm": 1.2239243873525694, "learning_rate": 9.386987223099225e-07, "loss": 0.58, "step": 26354 }, { "epoch": 0.8077418168444281, "grad_norm": 0.43238297279958254, "learning_rate": 9.38409242511657e-07, "loss": 0.3704, "step": 26355 }, { "epoch": 0.8077724653671693, "grad_norm": 1.3559909629146079, "learning_rate": 9.381198027332861e-07, "loss": 0.6027, "step": 26356 }, { "epoch": 0.8078031138899106, "grad_norm": 1.3801826497836154, "learning_rate": 9.378304029776586e-07, "loss": 0.7095, "step": 26357 }, { "epoch": 0.8078337624126517, "grad_norm": 1.5003003388878642, "learning_rate": 9.37541043247629e-07, "loss": 0.6025, "step": 26358 }, { "epoch": 0.807864410935393, "grad_norm": 1.492203000981377, "learning_rate": 9.372517235460437e-07, "loss": 0.7004, "step": 26359 }, { "epoch": 0.8078950594581341, "grad_norm": 0.4319179313532811, "learning_rate": 9.369624438757597e-07, "loss": 0.4031, "step": 26360 }, { "epoch": 0.8079257079808754, "grad_norm": 1.2978729866434477, "learning_rate": 9.366732042396243e-07, "loss": 0.5258, "step": 26361 }, { "epoch": 0.8079563565036165, "grad_norm": 1.3766980028179463, "learning_rate": 9.363840046404865e-07, "loss": 0.6322, "step": 26362 }, { "epoch": 0.8079870050263577, "grad_norm": 1.5104412526608137, "learning_rate": 9.360948450811963e-07, "loss": 0.5825, "step": 26363 }, { "epoch": 0.8080176535490989, "grad_norm": 1.3858167135937454, "learning_rate": 9.358057255646047e-07, "loss": 0.5643, "step": 26364 }, { "epoch": 0.8080483020718401, "grad_norm": 1.4129724885439958, "learning_rate": 9.355166460935583e-07, "loss": 0.6617, "step": 26365 }, { "epoch": 0.8080789505945813, "grad_norm": 1.2661686384227124, "learning_rate": 9.352276066709059e-07, "loss": 0.5377, "step": 26366 }, { "epoch": 0.8081095991173225, "grad_norm": 1.3855551939090853, "learning_rate": 9.349386072994976e-07, "loss": 0.6885, "step": 26367 }, { "epoch": 0.8081402476400638, "grad_norm": 1.3215885522046316, "learning_rate": 9.346496479821776e-07, "loss": 0.5309, "step": 26368 }, { "epoch": 0.8081708961628049, "grad_norm": 1.2710605817447729, "learning_rate": 9.343607287217959e-07, "loss": 0.542, "step": 26369 }, { "epoch": 0.8082015446855462, "grad_norm": 1.203547119348245, "learning_rate": 9.340718495211965e-07, "loss": 0.5182, "step": 26370 }, { "epoch": 0.8082321932082873, "grad_norm": 1.294073602494622, "learning_rate": 9.337830103832291e-07, "loss": 0.6192, "step": 26371 }, { "epoch": 0.8082628417310286, "grad_norm": 1.3662409114161485, "learning_rate": 9.334942113107387e-07, "loss": 0.5577, "step": 26372 }, { "epoch": 0.8082934902537697, "grad_norm": 1.6683515262473136, "learning_rate": 9.332054523065686e-07, "loss": 0.6234, "step": 26373 }, { "epoch": 0.808324138776511, "grad_norm": 1.3407641715489924, "learning_rate": 9.329167333735661e-07, "loss": 0.6239, "step": 26374 }, { "epoch": 0.8083547872992521, "grad_norm": 1.3341183548746427, "learning_rate": 9.326280545145766e-07, "loss": 0.5565, "step": 26375 }, { "epoch": 0.8083854358219934, "grad_norm": 1.4639323651918705, "learning_rate": 9.323394157324422e-07, "loss": 0.6245, "step": 26376 }, { "epoch": 0.8084160843447346, "grad_norm": 1.2940962915066014, "learning_rate": 9.320508170300085e-07, "loss": 0.5733, "step": 26377 }, { "epoch": 0.8084467328674758, "grad_norm": 1.4368940647589497, "learning_rate": 9.317622584101194e-07, "loss": 0.6124, "step": 26378 }, { "epoch": 0.808477381390217, "grad_norm": 1.2248269489511536, "learning_rate": 9.31473739875618e-07, "loss": 0.6043, "step": 26379 }, { "epoch": 0.8085080299129582, "grad_norm": 1.4501615430716088, "learning_rate": 9.311852614293476e-07, "loss": 0.6694, "step": 26380 }, { "epoch": 0.8085386784356994, "grad_norm": 1.2832317329490803, "learning_rate": 9.308968230741467e-07, "loss": 0.5276, "step": 26381 }, { "epoch": 0.8085693269584406, "grad_norm": 1.274857514270977, "learning_rate": 9.306084248128638e-07, "loss": 0.5833, "step": 26382 }, { "epoch": 0.8085999754811818, "grad_norm": 1.3027446729922267, "learning_rate": 9.303200666483364e-07, "loss": 0.6759, "step": 26383 }, { "epoch": 0.808630624003923, "grad_norm": 0.4522714422565352, "learning_rate": 9.30031748583406e-07, "loss": 0.3928, "step": 26384 }, { "epoch": 0.8086612725266642, "grad_norm": 1.5270296085066375, "learning_rate": 9.297434706209141e-07, "loss": 0.6307, "step": 26385 }, { "epoch": 0.8086919210494055, "grad_norm": 1.4725440628542035, "learning_rate": 9.294552327637025e-07, "loss": 0.5897, "step": 26386 }, { "epoch": 0.8087225695721466, "grad_norm": 0.4519793115908394, "learning_rate": 9.291670350146087e-07, "loss": 0.424, "step": 26387 }, { "epoch": 0.8087532180948879, "grad_norm": 1.2548173931251818, "learning_rate": 9.288788773764734e-07, "loss": 0.5208, "step": 26388 }, { "epoch": 0.808783866617629, "grad_norm": 1.2472193858102076, "learning_rate": 9.285907598521359e-07, "loss": 0.562, "step": 26389 }, { "epoch": 0.8088145151403703, "grad_norm": 1.3789807279381554, "learning_rate": 9.283026824444374e-07, "loss": 0.6283, "step": 26390 }, { "epoch": 0.8088451636631114, "grad_norm": 1.3716261434303698, "learning_rate": 9.280146451562139e-07, "loss": 0.5362, "step": 26391 }, { "epoch": 0.8088758121858527, "grad_norm": 1.4351124353847355, "learning_rate": 9.27726647990303e-07, "loss": 0.6511, "step": 26392 }, { "epoch": 0.8089064607085938, "grad_norm": 0.44942467792409463, "learning_rate": 9.274386909495431e-07, "loss": 0.3956, "step": 26393 }, { "epoch": 0.808937109231335, "grad_norm": 1.403550455402864, "learning_rate": 9.27150774036773e-07, "loss": 0.4986, "step": 26394 }, { "epoch": 0.8089677577540763, "grad_norm": 0.46358993479354893, "learning_rate": 9.268628972548272e-07, "loss": 0.4064, "step": 26395 }, { "epoch": 0.8089984062768174, "grad_norm": 1.391840400830387, "learning_rate": 9.265750606065438e-07, "loss": 0.6981, "step": 26396 }, { "epoch": 0.8090290547995587, "grad_norm": 1.3806214551563345, "learning_rate": 9.262872640947579e-07, "loss": 0.6216, "step": 26397 }, { "epoch": 0.8090597033222998, "grad_norm": 1.3248735193192804, "learning_rate": 9.259995077223077e-07, "loss": 0.6631, "step": 26398 }, { "epoch": 0.8090903518450411, "grad_norm": 1.5251337608130315, "learning_rate": 9.257117914920249e-07, "loss": 0.6494, "step": 26399 }, { "epoch": 0.8091210003677822, "grad_norm": 1.2060564452485734, "learning_rate": 9.254241154067467e-07, "loss": 0.5421, "step": 26400 }, { "epoch": 0.8091516488905235, "grad_norm": 1.3467228947317873, "learning_rate": 9.251364794693085e-07, "loss": 0.5277, "step": 26401 }, { "epoch": 0.8091822974132646, "grad_norm": 1.2923682632117626, "learning_rate": 9.248488836825431e-07, "loss": 0.5784, "step": 26402 }, { "epoch": 0.8092129459360059, "grad_norm": 1.4200836681401439, "learning_rate": 9.245613280492833e-07, "loss": 0.6575, "step": 26403 }, { "epoch": 0.809243594458747, "grad_norm": 1.206209063140893, "learning_rate": 9.242738125723633e-07, "loss": 0.5658, "step": 26404 }, { "epoch": 0.8092742429814883, "grad_norm": 1.2993542481564928, "learning_rate": 9.239863372546159e-07, "loss": 0.5969, "step": 26405 }, { "epoch": 0.8093048915042295, "grad_norm": 1.3113105701471166, "learning_rate": 9.236989020988757e-07, "loss": 0.5568, "step": 26406 }, { "epoch": 0.8093355400269707, "grad_norm": 1.5467444602062146, "learning_rate": 9.234115071079713e-07, "loss": 0.6956, "step": 26407 }, { "epoch": 0.8093661885497119, "grad_norm": 1.3633829409199627, "learning_rate": 9.231241522847373e-07, "loss": 0.6101, "step": 26408 }, { "epoch": 0.8093968370724531, "grad_norm": 1.5121150089868332, "learning_rate": 9.228368376320046e-07, "loss": 0.6172, "step": 26409 }, { "epoch": 0.8094274855951943, "grad_norm": 1.4395711024108693, "learning_rate": 9.225495631526044e-07, "loss": 0.5924, "step": 26410 }, { "epoch": 0.8094581341179355, "grad_norm": 1.1009090319800088, "learning_rate": 9.222623288493637e-07, "loss": 0.5372, "step": 26411 }, { "epoch": 0.8094887826406767, "grad_norm": 1.403896517516118, "learning_rate": 9.219751347251183e-07, "loss": 0.6591, "step": 26412 }, { "epoch": 0.809519431163418, "grad_norm": 1.344210882355894, "learning_rate": 9.216879807826951e-07, "loss": 0.6077, "step": 26413 }, { "epoch": 0.8095500796861591, "grad_norm": 1.4451932223775072, "learning_rate": 9.214008670249225e-07, "loss": 0.6562, "step": 26414 }, { "epoch": 0.8095807282089004, "grad_norm": 1.3352584054468515, "learning_rate": 9.211137934546304e-07, "loss": 0.613, "step": 26415 }, { "epoch": 0.8096113767316415, "grad_norm": 1.2919540790233768, "learning_rate": 9.208267600746479e-07, "loss": 0.6506, "step": 26416 }, { "epoch": 0.8096420252543828, "grad_norm": 1.2294394864053728, "learning_rate": 9.205397668878046e-07, "loss": 0.6102, "step": 26417 }, { "epoch": 0.8096726737771239, "grad_norm": 0.44938822355068003, "learning_rate": 9.202528138969252e-07, "loss": 0.4055, "step": 26418 }, { "epoch": 0.8097033222998652, "grad_norm": 1.1348546946085372, "learning_rate": 9.199659011048389e-07, "loss": 0.4782, "step": 26419 }, { "epoch": 0.8097339708226063, "grad_norm": 0.4557295201797119, "learning_rate": 9.196790285143736e-07, "loss": 0.3963, "step": 26420 }, { "epoch": 0.8097646193453476, "grad_norm": 1.3979488289752027, "learning_rate": 9.193921961283552e-07, "loss": 0.5579, "step": 26421 }, { "epoch": 0.8097952678680888, "grad_norm": 1.4486471257807705, "learning_rate": 9.191054039496067e-07, "loss": 0.6497, "step": 26422 }, { "epoch": 0.80982591639083, "grad_norm": 1.3111504485202352, "learning_rate": 9.188186519809594e-07, "loss": 0.6317, "step": 26423 }, { "epoch": 0.8098565649135712, "grad_norm": 1.319167062971204, "learning_rate": 9.185319402252346e-07, "loss": 0.5195, "step": 26424 }, { "epoch": 0.8098872134363123, "grad_norm": 1.4859968019720535, "learning_rate": 9.182452686852605e-07, "loss": 0.6469, "step": 26425 }, { "epoch": 0.8099178619590536, "grad_norm": 1.5355472593716095, "learning_rate": 9.179586373638588e-07, "loss": 0.6448, "step": 26426 }, { "epoch": 0.8099485104817947, "grad_norm": 1.2884404466569177, "learning_rate": 9.176720462638549e-07, "loss": 0.5948, "step": 26427 }, { "epoch": 0.809979159004536, "grad_norm": 1.1684244893335585, "learning_rate": 9.173854953880745e-07, "loss": 0.5644, "step": 26428 }, { "epoch": 0.8100098075272771, "grad_norm": 1.4308105549163332, "learning_rate": 9.170989847393375e-07, "loss": 0.6267, "step": 26429 }, { "epoch": 0.8100404560500184, "grad_norm": 1.3276420373797904, "learning_rate": 9.168125143204692e-07, "loss": 0.5881, "step": 26430 }, { "epoch": 0.8100711045727595, "grad_norm": 1.5086414648616524, "learning_rate": 9.165260841342933e-07, "loss": 0.6221, "step": 26431 }, { "epoch": 0.8101017530955008, "grad_norm": 0.4659023846110651, "learning_rate": 9.162396941836293e-07, "loss": 0.4123, "step": 26432 }, { "epoch": 0.810132401618242, "grad_norm": 1.4480666509638302, "learning_rate": 9.159533444713003e-07, "loss": 0.6493, "step": 26433 }, { "epoch": 0.8101630501409832, "grad_norm": 1.570697081985781, "learning_rate": 9.156670350001295e-07, "loss": 0.6503, "step": 26434 }, { "epoch": 0.8101936986637244, "grad_norm": 1.38098979118614, "learning_rate": 9.153807657729352e-07, "loss": 0.6566, "step": 26435 }, { "epoch": 0.8102243471864656, "grad_norm": 1.3212456336259872, "learning_rate": 9.150945367925407e-07, "loss": 0.6149, "step": 26436 }, { "epoch": 0.8102549957092068, "grad_norm": 1.188558031024844, "learning_rate": 9.148083480617631e-07, "loss": 0.5789, "step": 26437 }, { "epoch": 0.810285644231948, "grad_norm": 0.448363351772942, "learning_rate": 9.145221995834247e-07, "loss": 0.3841, "step": 26438 }, { "epoch": 0.8103162927546892, "grad_norm": 1.2452065143380509, "learning_rate": 9.142360913603449e-07, "loss": 0.551, "step": 26439 }, { "epoch": 0.8103469412774305, "grad_norm": 1.4468858593447123, "learning_rate": 9.139500233953419e-07, "loss": 0.6509, "step": 26440 }, { "epoch": 0.8103775898001716, "grad_norm": 1.288480743573403, "learning_rate": 9.136639956912341e-07, "loss": 0.5836, "step": 26441 }, { "epoch": 0.8104082383229129, "grad_norm": 1.5163229127371303, "learning_rate": 9.13378008250842e-07, "loss": 0.6222, "step": 26442 }, { "epoch": 0.810438886845654, "grad_norm": 1.32322054974966, "learning_rate": 9.130920610769806e-07, "loss": 0.6339, "step": 26443 }, { "epoch": 0.8104695353683953, "grad_norm": 1.3691657405078572, "learning_rate": 9.128061541724704e-07, "loss": 0.5887, "step": 26444 }, { "epoch": 0.8105001838911364, "grad_norm": 0.44887566177653265, "learning_rate": 9.125202875401251e-07, "loss": 0.3977, "step": 26445 }, { "epoch": 0.8105308324138777, "grad_norm": 1.4706384754265087, "learning_rate": 9.122344611827639e-07, "loss": 0.6527, "step": 26446 }, { "epoch": 0.8105614809366188, "grad_norm": 0.43943249759406483, "learning_rate": 9.119486751032031e-07, "loss": 0.3888, "step": 26447 }, { "epoch": 0.8105921294593601, "grad_norm": 0.42849585036956056, "learning_rate": 9.116629293042567e-07, "loss": 0.3969, "step": 26448 }, { "epoch": 0.8106227779821013, "grad_norm": 0.425994283579122, "learning_rate": 9.11377223788742e-07, "loss": 0.4002, "step": 26449 }, { "epoch": 0.8106534265048425, "grad_norm": 1.351119202193079, "learning_rate": 9.110915585594748e-07, "loss": 0.6373, "step": 26450 }, { "epoch": 0.8106840750275837, "grad_norm": 1.4851634455511045, "learning_rate": 9.10805933619267e-07, "loss": 0.5831, "step": 26451 }, { "epoch": 0.8107147235503249, "grad_norm": 0.44263983650641775, "learning_rate": 9.105203489709353e-07, "loss": 0.3767, "step": 26452 }, { "epoch": 0.8107453720730661, "grad_norm": 1.4524403915026904, "learning_rate": 9.102348046172937e-07, "loss": 0.5545, "step": 26453 }, { "epoch": 0.8107760205958073, "grad_norm": 1.3003069578954698, "learning_rate": 9.099493005611537e-07, "loss": 0.6006, "step": 26454 }, { "epoch": 0.8108066691185485, "grad_norm": 1.2582850067114688, "learning_rate": 9.096638368053312e-07, "loss": 0.6323, "step": 26455 }, { "epoch": 0.8108373176412896, "grad_norm": 1.2879570387082073, "learning_rate": 9.093784133526357e-07, "loss": 0.5217, "step": 26456 }, { "epoch": 0.8108679661640309, "grad_norm": 1.3148308401413484, "learning_rate": 9.09093030205882e-07, "loss": 0.6125, "step": 26457 }, { "epoch": 0.810898614686772, "grad_norm": 1.2872616641541479, "learning_rate": 9.088076873678825e-07, "loss": 0.6128, "step": 26458 }, { "epoch": 0.8109292632095133, "grad_norm": 1.3569641957472647, "learning_rate": 9.08522384841446e-07, "loss": 0.566, "step": 26459 }, { "epoch": 0.8109599117322545, "grad_norm": 1.401583308968099, "learning_rate": 9.082371226293856e-07, "loss": 0.641, "step": 26460 }, { "epoch": 0.8109905602549957, "grad_norm": 1.3899023915821793, "learning_rate": 9.079519007345128e-07, "loss": 0.5942, "step": 26461 }, { "epoch": 0.8110212087777369, "grad_norm": 1.1198243974387307, "learning_rate": 9.076667191596355e-07, "loss": 0.5987, "step": 26462 }, { "epoch": 0.8110518573004781, "grad_norm": 1.4380469630621093, "learning_rate": 9.073815779075657e-07, "loss": 0.6484, "step": 26463 }, { "epoch": 0.8110825058232193, "grad_norm": 1.1215447776791159, "learning_rate": 9.070964769811131e-07, "loss": 0.5563, "step": 26464 }, { "epoch": 0.8111131543459605, "grad_norm": 1.3485475072127788, "learning_rate": 9.068114163830854e-07, "loss": 0.6709, "step": 26465 }, { "epoch": 0.8111438028687017, "grad_norm": 1.322170377058947, "learning_rate": 9.065263961162929e-07, "loss": 0.5295, "step": 26466 }, { "epoch": 0.811174451391443, "grad_norm": 1.3510176521247352, "learning_rate": 9.06241416183542e-07, "loss": 0.6353, "step": 26467 }, { "epoch": 0.8112050999141841, "grad_norm": 1.4016337502060268, "learning_rate": 9.059564765876417e-07, "loss": 0.6008, "step": 26468 }, { "epoch": 0.8112357484369254, "grad_norm": 1.350088502683774, "learning_rate": 9.056715773314012e-07, "loss": 0.5995, "step": 26469 }, { "epoch": 0.8112663969596665, "grad_norm": 1.4321390027522365, "learning_rate": 9.053867184176252e-07, "loss": 0.5839, "step": 26470 }, { "epoch": 0.8112970454824078, "grad_norm": 0.44520534507016074, "learning_rate": 9.051018998491212e-07, "loss": 0.4013, "step": 26471 }, { "epoch": 0.8113276940051489, "grad_norm": 1.2360124297969193, "learning_rate": 9.048171216286971e-07, "loss": 0.593, "step": 26472 }, { "epoch": 0.8113583425278902, "grad_norm": 1.348355516653101, "learning_rate": 9.045323837591569e-07, "loss": 0.6411, "step": 26473 }, { "epoch": 0.8113889910506313, "grad_norm": 1.3334664848314166, "learning_rate": 9.042476862433064e-07, "loss": 0.6502, "step": 26474 }, { "epoch": 0.8114196395733726, "grad_norm": 1.5084154414651294, "learning_rate": 9.039630290839529e-07, "loss": 0.6642, "step": 26475 }, { "epoch": 0.8114502880961137, "grad_norm": 1.2744498153291757, "learning_rate": 9.036784122838987e-07, "loss": 0.6012, "step": 26476 }, { "epoch": 0.811480936618855, "grad_norm": 1.458649742682144, "learning_rate": 9.033938358459504e-07, "loss": 0.6333, "step": 26477 }, { "epoch": 0.8115115851415962, "grad_norm": 1.2155482970856104, "learning_rate": 9.03109299772908e-07, "loss": 0.5387, "step": 26478 }, { "epoch": 0.8115422336643374, "grad_norm": 1.4625595489939691, "learning_rate": 9.028248040675802e-07, "loss": 0.5642, "step": 26479 }, { "epoch": 0.8115728821870786, "grad_norm": 1.3555811807151503, "learning_rate": 9.025403487327683e-07, "loss": 0.5657, "step": 26480 }, { "epoch": 0.8116035307098198, "grad_norm": 1.4323129203330722, "learning_rate": 9.022559337712733e-07, "loss": 0.6609, "step": 26481 }, { "epoch": 0.811634179232561, "grad_norm": 1.5425671539854553, "learning_rate": 9.01971559185899e-07, "loss": 0.6294, "step": 26482 }, { "epoch": 0.8116648277553022, "grad_norm": 1.3771500381094115, "learning_rate": 9.01687224979449e-07, "loss": 0.6129, "step": 26483 }, { "epoch": 0.8116954762780434, "grad_norm": 1.3199271536944333, "learning_rate": 9.014029311547223e-07, "loss": 0.5746, "step": 26484 }, { "epoch": 0.8117261248007847, "grad_norm": 1.5559315325721887, "learning_rate": 9.011186777145209e-07, "loss": 0.5951, "step": 26485 }, { "epoch": 0.8117567733235258, "grad_norm": 1.445076609087266, "learning_rate": 9.008344646616457e-07, "loss": 0.7066, "step": 26486 }, { "epoch": 0.811787421846267, "grad_norm": 1.37295536559082, "learning_rate": 9.005502919988984e-07, "loss": 0.6891, "step": 26487 }, { "epoch": 0.8118180703690082, "grad_norm": 1.3292914039172967, "learning_rate": 9.002661597290785e-07, "loss": 0.5244, "step": 26488 }, { "epoch": 0.8118487188917494, "grad_norm": 0.4577259325741392, "learning_rate": 8.999820678549836e-07, "loss": 0.406, "step": 26489 }, { "epoch": 0.8118793674144906, "grad_norm": 1.1052800782813346, "learning_rate": 8.996980163794145e-07, "loss": 0.4838, "step": 26490 }, { "epoch": 0.8119100159372318, "grad_norm": 1.329517409310572, "learning_rate": 8.994140053051715e-07, "loss": 0.5615, "step": 26491 }, { "epoch": 0.811940664459973, "grad_norm": 0.4459826000701454, "learning_rate": 8.991300346350495e-07, "loss": 0.4019, "step": 26492 }, { "epoch": 0.8119713129827142, "grad_norm": 1.422016760263828, "learning_rate": 8.988461043718489e-07, "loss": 0.6166, "step": 26493 }, { "epoch": 0.8120019615054554, "grad_norm": 1.34352901150663, "learning_rate": 8.985622145183687e-07, "loss": 0.5583, "step": 26494 }, { "epoch": 0.8120326100281966, "grad_norm": 1.6815774490964581, "learning_rate": 8.982783650774024e-07, "loss": 0.6138, "step": 26495 }, { "epoch": 0.8120632585509379, "grad_norm": 1.2557164171341346, "learning_rate": 8.979945560517506e-07, "loss": 0.612, "step": 26496 }, { "epoch": 0.812093907073679, "grad_norm": 1.4131799157628397, "learning_rate": 8.977107874442048e-07, "loss": 0.5438, "step": 26497 }, { "epoch": 0.8121245555964203, "grad_norm": 0.44504343347870673, "learning_rate": 8.974270592575673e-07, "loss": 0.3843, "step": 26498 }, { "epoch": 0.8121552041191614, "grad_norm": 1.1873595151237344, "learning_rate": 8.97143371494631e-07, "loss": 0.5189, "step": 26499 }, { "epoch": 0.8121858526419027, "grad_norm": 1.23358803384987, "learning_rate": 8.968597241581889e-07, "loss": 0.5741, "step": 26500 }, { "epoch": 0.8122165011646438, "grad_norm": 1.3758569430057208, "learning_rate": 8.965761172510379e-07, "loss": 0.6326, "step": 26501 }, { "epoch": 0.8122471496873851, "grad_norm": 1.3703998601656824, "learning_rate": 8.96292550775974e-07, "loss": 0.5418, "step": 26502 }, { "epoch": 0.8122777982101262, "grad_norm": 0.4903679440304055, "learning_rate": 8.960090247357878e-07, "loss": 0.3867, "step": 26503 }, { "epoch": 0.8123084467328675, "grad_norm": 1.4015914281197541, "learning_rate": 8.957255391332748e-07, "loss": 0.6882, "step": 26504 }, { "epoch": 0.8123390952556087, "grad_norm": 1.4475398162383553, "learning_rate": 8.954420939712283e-07, "loss": 0.6434, "step": 26505 }, { "epoch": 0.8123697437783499, "grad_norm": 1.1810911149093668, "learning_rate": 8.951586892524422e-07, "loss": 0.5224, "step": 26506 }, { "epoch": 0.8124003923010911, "grad_norm": 1.4079494475745986, "learning_rate": 8.948753249797082e-07, "loss": 0.6959, "step": 26507 }, { "epoch": 0.8124310408238323, "grad_norm": 1.208867386770389, "learning_rate": 8.945920011558152e-07, "loss": 0.5835, "step": 26508 }, { "epoch": 0.8124616893465735, "grad_norm": 1.3313607860141907, "learning_rate": 8.943087177835602e-07, "loss": 0.5318, "step": 26509 }, { "epoch": 0.8124923378693147, "grad_norm": 1.6144402573894645, "learning_rate": 8.940254748657317e-07, "loss": 0.6174, "step": 26510 }, { "epoch": 0.8125229863920559, "grad_norm": 0.44036535728335646, "learning_rate": 8.937422724051193e-07, "loss": 0.3739, "step": 26511 }, { "epoch": 0.8125536349147972, "grad_norm": 1.3427444990401758, "learning_rate": 8.934591104045154e-07, "loss": 0.6803, "step": 26512 }, { "epoch": 0.8125842834375383, "grad_norm": 1.5051529886916932, "learning_rate": 8.931759888667096e-07, "loss": 0.6117, "step": 26513 }, { "epoch": 0.8126149319602796, "grad_norm": 0.4490642850602708, "learning_rate": 8.928929077944925e-07, "loss": 0.3963, "step": 26514 }, { "epoch": 0.8126455804830207, "grad_norm": 1.2931833331949834, "learning_rate": 8.926098671906514e-07, "loss": 0.5232, "step": 26515 }, { "epoch": 0.812676229005762, "grad_norm": 1.4158506409837694, "learning_rate": 8.92326867057976e-07, "loss": 0.6773, "step": 26516 }, { "epoch": 0.8127068775285031, "grad_norm": 1.300313437076442, "learning_rate": 8.920439073992565e-07, "loss": 0.6033, "step": 26517 }, { "epoch": 0.8127375260512444, "grad_norm": 0.4460002913203774, "learning_rate": 8.91760988217279e-07, "loss": 0.402, "step": 26518 }, { "epoch": 0.8127681745739855, "grad_norm": 1.241734970409848, "learning_rate": 8.914781095148294e-07, "loss": 0.6032, "step": 26519 }, { "epoch": 0.8127988230967267, "grad_norm": 1.2415579362033404, "learning_rate": 8.911952712946997e-07, "loss": 0.5891, "step": 26520 }, { "epoch": 0.812829471619468, "grad_norm": 1.2935345862134946, "learning_rate": 8.909124735596741e-07, "loss": 0.5641, "step": 26521 }, { "epoch": 0.8128601201422091, "grad_norm": 1.2232412432096575, "learning_rate": 8.906297163125382e-07, "loss": 0.5752, "step": 26522 }, { "epoch": 0.8128907686649504, "grad_norm": 1.2504469417119544, "learning_rate": 8.903469995560792e-07, "loss": 0.5743, "step": 26523 }, { "epoch": 0.8129214171876915, "grad_norm": 1.4150041356398493, "learning_rate": 8.900643232930827e-07, "loss": 0.6519, "step": 26524 }, { "epoch": 0.8129520657104328, "grad_norm": 1.380362318815663, "learning_rate": 8.897816875263348e-07, "loss": 0.5775, "step": 26525 }, { "epoch": 0.8129827142331739, "grad_norm": 1.3144552551347564, "learning_rate": 8.894990922586189e-07, "loss": 0.6429, "step": 26526 }, { "epoch": 0.8130133627559152, "grad_norm": 1.1394443934821643, "learning_rate": 8.892165374927198e-07, "loss": 0.5578, "step": 26527 }, { "epoch": 0.8130440112786563, "grad_norm": 1.3944360638506765, "learning_rate": 8.889340232314236e-07, "loss": 0.5502, "step": 26528 }, { "epoch": 0.8130746598013976, "grad_norm": 0.453285957130865, "learning_rate": 8.886515494775122e-07, "loss": 0.391, "step": 26529 }, { "epoch": 0.8131053083241387, "grad_norm": 1.496153950566298, "learning_rate": 8.88369116233766e-07, "loss": 0.5864, "step": 26530 }, { "epoch": 0.81313595684688, "grad_norm": 1.4282763872745965, "learning_rate": 8.880867235029739e-07, "loss": 0.5897, "step": 26531 }, { "epoch": 0.8131666053696212, "grad_norm": 0.4630691906096147, "learning_rate": 8.878043712879142e-07, "loss": 0.4051, "step": 26532 }, { "epoch": 0.8131972538923624, "grad_norm": 1.2140439520643787, "learning_rate": 8.875220595913714e-07, "loss": 0.6204, "step": 26533 }, { "epoch": 0.8132279024151036, "grad_norm": 1.4670921381521191, "learning_rate": 8.872397884161244e-07, "loss": 0.5753, "step": 26534 }, { "epoch": 0.8132585509378448, "grad_norm": 1.351741143202461, "learning_rate": 8.869575577649564e-07, "loss": 0.6076, "step": 26535 }, { "epoch": 0.813289199460586, "grad_norm": 1.247476134125751, "learning_rate": 8.866753676406486e-07, "loss": 0.4916, "step": 26536 }, { "epoch": 0.8133198479833272, "grad_norm": 1.313390268744249, "learning_rate": 8.8639321804598e-07, "loss": 0.6471, "step": 26537 }, { "epoch": 0.8133504965060684, "grad_norm": 1.2447943633769045, "learning_rate": 8.861111089837315e-07, "loss": 0.501, "step": 26538 }, { "epoch": 0.8133811450288096, "grad_norm": 1.2862509282850663, "learning_rate": 8.858290404566844e-07, "loss": 0.5396, "step": 26539 }, { "epoch": 0.8134117935515508, "grad_norm": 1.3204062912310524, "learning_rate": 8.855470124676152e-07, "loss": 0.5214, "step": 26540 }, { "epoch": 0.8134424420742921, "grad_norm": 1.3007403745148283, "learning_rate": 8.852650250193045e-07, "loss": 0.5278, "step": 26541 }, { "epoch": 0.8134730905970332, "grad_norm": 1.446944168300684, "learning_rate": 8.849830781145297e-07, "loss": 0.5718, "step": 26542 }, { "epoch": 0.8135037391197745, "grad_norm": 1.402929584609569, "learning_rate": 8.847011717560694e-07, "loss": 0.6389, "step": 26543 }, { "epoch": 0.8135343876425156, "grad_norm": 1.4407800744543025, "learning_rate": 8.844193059467027e-07, "loss": 0.515, "step": 26544 }, { "epoch": 0.8135650361652569, "grad_norm": 1.4682105571670068, "learning_rate": 8.841374806892039e-07, "loss": 0.6372, "step": 26545 }, { "epoch": 0.813595684687998, "grad_norm": 0.45558064708481794, "learning_rate": 8.83855695986352e-07, "loss": 0.4139, "step": 26546 }, { "epoch": 0.8136263332107393, "grad_norm": 1.3564240233674045, "learning_rate": 8.835739518409242e-07, "loss": 0.6273, "step": 26547 }, { "epoch": 0.8136569817334804, "grad_norm": 0.5831451668875077, "learning_rate": 8.832922482556961e-07, "loss": 0.3793, "step": 26548 }, { "epoch": 0.8136876302562217, "grad_norm": 0.45748326190530947, "learning_rate": 8.830105852334392e-07, "loss": 0.4099, "step": 26549 }, { "epoch": 0.8137182787789629, "grad_norm": 1.3867673127352729, "learning_rate": 8.827289627769358e-07, "loss": 0.6187, "step": 26550 }, { "epoch": 0.813748927301704, "grad_norm": 1.3552912224485587, "learning_rate": 8.824473808889555e-07, "loss": 0.6306, "step": 26551 }, { "epoch": 0.8137795758244453, "grad_norm": 1.377071896404938, "learning_rate": 8.82165839572276e-07, "loss": 0.6315, "step": 26552 }, { "epoch": 0.8138102243471864, "grad_norm": 1.3602814479961167, "learning_rate": 8.818843388296694e-07, "loss": 0.6162, "step": 26553 }, { "epoch": 0.8138408728699277, "grad_norm": 1.292425482479968, "learning_rate": 8.816028786639097e-07, "loss": 0.6412, "step": 26554 }, { "epoch": 0.8138715213926688, "grad_norm": 1.2574642696213845, "learning_rate": 8.813214590777713e-07, "loss": 0.6815, "step": 26555 }, { "epoch": 0.8139021699154101, "grad_norm": 1.5318388208428653, "learning_rate": 8.81040080074026e-07, "loss": 0.6614, "step": 26556 }, { "epoch": 0.8139328184381512, "grad_norm": 1.5278987931313475, "learning_rate": 8.807587416554464e-07, "loss": 0.5949, "step": 26557 }, { "epoch": 0.8139634669608925, "grad_norm": 1.5754542864636245, "learning_rate": 8.80477443824806e-07, "loss": 0.6743, "step": 26558 }, { "epoch": 0.8139941154836337, "grad_norm": 1.5467278413403933, "learning_rate": 8.80196186584874e-07, "loss": 0.6328, "step": 26559 }, { "epoch": 0.8140247640063749, "grad_norm": 1.3224654334067314, "learning_rate": 8.79914969938423e-07, "loss": 0.6424, "step": 26560 }, { "epoch": 0.8140554125291161, "grad_norm": 1.3934711201245107, "learning_rate": 8.796337938882254e-07, "loss": 0.5895, "step": 26561 }, { "epoch": 0.8140860610518573, "grad_norm": 1.2748448458859174, "learning_rate": 8.793526584370493e-07, "loss": 0.5751, "step": 26562 }, { "epoch": 0.8141167095745985, "grad_norm": 1.3542649997487, "learning_rate": 8.790715635876667e-07, "loss": 0.6441, "step": 26563 }, { "epoch": 0.8141473580973397, "grad_norm": 0.4427748293929892, "learning_rate": 8.78790509342845e-07, "loss": 0.3817, "step": 26564 }, { "epoch": 0.8141780066200809, "grad_norm": 1.3381111133936334, "learning_rate": 8.785094957053552e-07, "loss": 0.5582, "step": 26565 }, { "epoch": 0.8142086551428221, "grad_norm": 1.2603021055394554, "learning_rate": 8.782285226779669e-07, "loss": 0.6508, "step": 26566 }, { "epoch": 0.8142393036655633, "grad_norm": 1.3054932593125899, "learning_rate": 8.779475902634466e-07, "loss": 0.6561, "step": 26567 }, { "epoch": 0.8142699521883046, "grad_norm": 0.45642466583349806, "learning_rate": 8.776666984645632e-07, "loss": 0.3919, "step": 26568 }, { "epoch": 0.8143006007110457, "grad_norm": 1.3999802410548736, "learning_rate": 8.773858472840857e-07, "loss": 0.6025, "step": 26569 }, { "epoch": 0.814331249233787, "grad_norm": 1.3225291534820978, "learning_rate": 8.771050367247791e-07, "loss": 0.6136, "step": 26570 }, { "epoch": 0.8143618977565281, "grad_norm": 1.4413276259160657, "learning_rate": 8.768242667894112e-07, "loss": 0.5596, "step": 26571 }, { "epoch": 0.8143925462792694, "grad_norm": 1.4449414343778122, "learning_rate": 8.765435374807501e-07, "loss": 0.6584, "step": 26572 }, { "epoch": 0.8144231948020105, "grad_norm": 0.4521908673755987, "learning_rate": 8.762628488015596e-07, "loss": 0.4407, "step": 26573 }, { "epoch": 0.8144538433247518, "grad_norm": 1.3547726384907495, "learning_rate": 8.759822007546076e-07, "loss": 0.5511, "step": 26574 }, { "epoch": 0.8144844918474929, "grad_norm": 1.3575741381220423, "learning_rate": 8.757015933426566e-07, "loss": 0.5946, "step": 26575 }, { "epoch": 0.8145151403702342, "grad_norm": 0.4476729315343506, "learning_rate": 8.754210265684732e-07, "loss": 0.3759, "step": 26576 }, { "epoch": 0.8145457888929754, "grad_norm": 1.4662659223474872, "learning_rate": 8.751405004348229e-07, "loss": 0.6306, "step": 26577 }, { "epoch": 0.8145764374157166, "grad_norm": 1.3102745563174625, "learning_rate": 8.748600149444674e-07, "loss": 0.509, "step": 26578 }, { "epoch": 0.8146070859384578, "grad_norm": 1.2789752550267899, "learning_rate": 8.745795701001719e-07, "loss": 0.5997, "step": 26579 }, { "epoch": 0.814637734461199, "grad_norm": 1.5076278694613134, "learning_rate": 8.742991659047006e-07, "loss": 0.5458, "step": 26580 }, { "epoch": 0.8146683829839402, "grad_norm": 1.301505610436266, "learning_rate": 8.740188023608137e-07, "loss": 0.5998, "step": 26581 }, { "epoch": 0.8146990315066813, "grad_norm": 0.4560126767749589, "learning_rate": 8.737384794712755e-07, "loss": 0.3841, "step": 26582 }, { "epoch": 0.8147296800294226, "grad_norm": 0.46080540475495924, "learning_rate": 8.73458197238849e-07, "loss": 0.3806, "step": 26583 }, { "epoch": 0.8147603285521637, "grad_norm": 1.2464939625837717, "learning_rate": 8.731779556662934e-07, "loss": 0.6625, "step": 26584 }, { "epoch": 0.814790977074905, "grad_norm": 1.2467093357798398, "learning_rate": 8.728977547563727e-07, "loss": 0.5396, "step": 26585 }, { "epoch": 0.8148216255976461, "grad_norm": 0.4501352485114252, "learning_rate": 8.726175945118449e-07, "loss": 0.3931, "step": 26586 }, { "epoch": 0.8148522741203874, "grad_norm": 1.3296025723721, "learning_rate": 8.723374749354719e-07, "loss": 0.5621, "step": 26587 }, { "epoch": 0.8148829226431286, "grad_norm": 1.1651715396078801, "learning_rate": 8.720573960300155e-07, "loss": 0.5096, "step": 26588 }, { "epoch": 0.8149135711658698, "grad_norm": 1.379428626296076, "learning_rate": 8.717773577982325e-07, "loss": 0.6412, "step": 26589 }, { "epoch": 0.814944219688611, "grad_norm": 1.310776866497055, "learning_rate": 8.714973602428828e-07, "loss": 0.6149, "step": 26590 }, { "epoch": 0.8149748682113522, "grad_norm": 1.330479679992573, "learning_rate": 8.712174033667281e-07, "loss": 0.6195, "step": 26591 }, { "epoch": 0.8150055167340934, "grad_norm": 0.45445468080315776, "learning_rate": 8.70937487172523e-07, "loss": 0.3724, "step": 26592 }, { "epoch": 0.8150361652568346, "grad_norm": 1.0769962126091057, "learning_rate": 8.706576116630283e-07, "loss": 0.529, "step": 26593 }, { "epoch": 0.8150668137795758, "grad_norm": 1.2906237382700403, "learning_rate": 8.703777768409999e-07, "loss": 0.5419, "step": 26594 }, { "epoch": 0.815097462302317, "grad_norm": 1.2529624994147246, "learning_rate": 8.700979827091954e-07, "loss": 0.6468, "step": 26595 }, { "epoch": 0.8151281108250582, "grad_norm": 1.2407945953191277, "learning_rate": 8.698182292703738e-07, "loss": 0.5367, "step": 26596 }, { "epoch": 0.8151587593477995, "grad_norm": 1.239138165220387, "learning_rate": 8.695385165272884e-07, "loss": 0.6065, "step": 26597 }, { "epoch": 0.8151894078705406, "grad_norm": 1.4158893637366798, "learning_rate": 8.692588444826972e-07, "loss": 0.6889, "step": 26598 }, { "epoch": 0.8152200563932819, "grad_norm": 0.4542267275557365, "learning_rate": 8.689792131393566e-07, "loss": 0.3907, "step": 26599 }, { "epoch": 0.815250704916023, "grad_norm": 1.3903576572158578, "learning_rate": 8.686996225000194e-07, "loss": 0.5885, "step": 26600 }, { "epoch": 0.8152813534387643, "grad_norm": 1.28863269569434, "learning_rate": 8.684200725674419e-07, "loss": 0.6472, "step": 26601 }, { "epoch": 0.8153120019615054, "grad_norm": 1.2626712532642126, "learning_rate": 8.681405633443795e-07, "loss": 0.5176, "step": 26602 }, { "epoch": 0.8153426504842467, "grad_norm": 1.3200644088124986, "learning_rate": 8.678610948335847e-07, "loss": 0.6614, "step": 26603 }, { "epoch": 0.8153732990069879, "grad_norm": 1.361020574772888, "learning_rate": 8.675816670378123e-07, "loss": 0.6377, "step": 26604 }, { "epoch": 0.8154039475297291, "grad_norm": 1.411607433794563, "learning_rate": 8.67302279959813e-07, "loss": 0.6084, "step": 26605 }, { "epoch": 0.8154345960524703, "grad_norm": 1.3756418607238783, "learning_rate": 8.670229336023445e-07, "loss": 0.573, "step": 26606 }, { "epoch": 0.8154652445752115, "grad_norm": 1.2164954207819418, "learning_rate": 8.667436279681563e-07, "loss": 0.6173, "step": 26607 }, { "epoch": 0.8154958930979527, "grad_norm": 1.6758872852454192, "learning_rate": 8.664643630599989e-07, "loss": 0.7312, "step": 26608 }, { "epoch": 0.8155265416206939, "grad_norm": 0.4191934537201839, "learning_rate": 8.661851388806264e-07, "loss": 0.3684, "step": 26609 }, { "epoch": 0.8155571901434351, "grad_norm": 1.2499922863175577, "learning_rate": 8.659059554327904e-07, "loss": 0.616, "step": 26610 }, { "epoch": 0.8155878386661763, "grad_norm": 1.4532012044989224, "learning_rate": 8.656268127192397e-07, "loss": 0.6709, "step": 26611 }, { "epoch": 0.8156184871889175, "grad_norm": 1.3823107660763805, "learning_rate": 8.653477107427255e-07, "loss": 0.591, "step": 26612 }, { "epoch": 0.8156491357116586, "grad_norm": 1.4873286520489795, "learning_rate": 8.650686495059984e-07, "loss": 0.7245, "step": 26613 }, { "epoch": 0.8156797842343999, "grad_norm": 1.4129011826793627, "learning_rate": 8.64789629011809e-07, "loss": 0.5666, "step": 26614 }, { "epoch": 0.8157104327571411, "grad_norm": 1.2651260669592126, "learning_rate": 8.645106492629057e-07, "loss": 0.5312, "step": 26615 }, { "epoch": 0.8157410812798823, "grad_norm": 1.3639823462702274, "learning_rate": 8.642317102620346e-07, "loss": 0.5816, "step": 26616 }, { "epoch": 0.8157717298026235, "grad_norm": 1.2379839950986795, "learning_rate": 8.639528120119489e-07, "loss": 0.5378, "step": 26617 }, { "epoch": 0.8158023783253647, "grad_norm": 1.3528385377463379, "learning_rate": 8.636739545153944e-07, "loss": 0.5714, "step": 26618 }, { "epoch": 0.8158330268481059, "grad_norm": 0.4571249361405312, "learning_rate": 8.633951377751176e-07, "loss": 0.4053, "step": 26619 }, { "epoch": 0.8158636753708471, "grad_norm": 1.278109181428976, "learning_rate": 8.631163617938665e-07, "loss": 0.5422, "step": 26620 }, { "epoch": 0.8158943238935883, "grad_norm": 1.2354376339091875, "learning_rate": 8.628376265743898e-07, "loss": 0.5497, "step": 26621 }, { "epoch": 0.8159249724163296, "grad_norm": 1.4554564255162612, "learning_rate": 8.625589321194317e-07, "loss": 0.5937, "step": 26622 }, { "epoch": 0.8159556209390707, "grad_norm": 1.3138002199453336, "learning_rate": 8.622802784317385e-07, "loss": 0.6017, "step": 26623 }, { "epoch": 0.815986269461812, "grad_norm": 1.449569577573513, "learning_rate": 8.620016655140567e-07, "loss": 0.6405, "step": 26624 }, { "epoch": 0.8160169179845531, "grad_norm": 0.46494509084104396, "learning_rate": 8.617230933691329e-07, "loss": 0.3819, "step": 26625 }, { "epoch": 0.8160475665072944, "grad_norm": 1.3218097774505768, "learning_rate": 8.614445619997097e-07, "loss": 0.6727, "step": 26626 }, { "epoch": 0.8160782150300355, "grad_norm": 1.3501409088312308, "learning_rate": 8.611660714085296e-07, "loss": 0.5827, "step": 26627 }, { "epoch": 0.8161088635527768, "grad_norm": 1.3288816367931982, "learning_rate": 8.608876215983419e-07, "loss": 0.6948, "step": 26628 }, { "epoch": 0.8161395120755179, "grad_norm": 1.4659461887124126, "learning_rate": 8.606092125718873e-07, "loss": 0.5846, "step": 26629 }, { "epoch": 0.8161701605982592, "grad_norm": 1.2985044471691192, "learning_rate": 8.603308443319081e-07, "loss": 0.6845, "step": 26630 }, { "epoch": 0.8162008091210003, "grad_norm": 1.3522995855345403, "learning_rate": 8.600525168811485e-07, "loss": 0.5598, "step": 26631 }, { "epoch": 0.8162314576437416, "grad_norm": 0.4377799999137626, "learning_rate": 8.597742302223505e-07, "loss": 0.3702, "step": 26632 }, { "epoch": 0.8162621061664828, "grad_norm": 1.3445989484963672, "learning_rate": 8.594959843582573e-07, "loss": 0.5666, "step": 26633 }, { "epoch": 0.816292754689224, "grad_norm": 1.3976470857636905, "learning_rate": 8.592177792916084e-07, "loss": 0.608, "step": 26634 }, { "epoch": 0.8163234032119652, "grad_norm": 1.2226398552213698, "learning_rate": 8.589396150251467e-07, "loss": 0.5517, "step": 26635 }, { "epoch": 0.8163540517347064, "grad_norm": 1.3912673308720245, "learning_rate": 8.586614915616131e-07, "loss": 0.6794, "step": 26636 }, { "epoch": 0.8163847002574476, "grad_norm": 1.2608110052049335, "learning_rate": 8.583834089037479e-07, "loss": 0.6038, "step": 26637 }, { "epoch": 0.8164153487801888, "grad_norm": 1.2140579362973185, "learning_rate": 8.581053670542894e-07, "loss": 0.4905, "step": 26638 }, { "epoch": 0.81644599730293, "grad_norm": 1.3782291715794721, "learning_rate": 8.57827366015978e-07, "loss": 0.6354, "step": 26639 }, { "epoch": 0.8164766458256713, "grad_norm": 1.2706648466778572, "learning_rate": 8.57549405791554e-07, "loss": 0.5635, "step": 26640 }, { "epoch": 0.8165072943484124, "grad_norm": 1.5078650615635099, "learning_rate": 8.572714863837567e-07, "loss": 0.5439, "step": 26641 }, { "epoch": 0.8165379428711537, "grad_norm": 1.4450127596873719, "learning_rate": 8.569936077953217e-07, "loss": 0.5823, "step": 26642 }, { "epoch": 0.8165685913938948, "grad_norm": 1.4480023095131485, "learning_rate": 8.567157700289891e-07, "loss": 0.6776, "step": 26643 }, { "epoch": 0.816599239916636, "grad_norm": 1.3093352560798959, "learning_rate": 8.564379730874972e-07, "loss": 0.6599, "step": 26644 }, { "epoch": 0.8166298884393772, "grad_norm": 1.3224618950583982, "learning_rate": 8.561602169735822e-07, "loss": 0.634, "step": 26645 }, { "epoch": 0.8166605369621184, "grad_norm": 0.43864404892694187, "learning_rate": 8.558825016899785e-07, "loss": 0.3981, "step": 26646 }, { "epoch": 0.8166911854848596, "grad_norm": 1.287784493682532, "learning_rate": 8.556048272394274e-07, "loss": 0.6088, "step": 26647 }, { "epoch": 0.8167218340076008, "grad_norm": 1.2534613322456174, "learning_rate": 8.553271936246621e-07, "loss": 0.6086, "step": 26648 }, { "epoch": 0.816752482530342, "grad_norm": 1.2911359105349605, "learning_rate": 8.550496008484171e-07, "loss": 0.544, "step": 26649 }, { "epoch": 0.8167831310530832, "grad_norm": 1.2115263226590771, "learning_rate": 8.547720489134287e-07, "loss": 0.4535, "step": 26650 }, { "epoch": 0.8168137795758245, "grad_norm": 1.4220926405038143, "learning_rate": 8.544945378224323e-07, "loss": 0.6613, "step": 26651 }, { "epoch": 0.8168444280985656, "grad_norm": 1.4077113594861768, "learning_rate": 8.542170675781631e-07, "loss": 0.5448, "step": 26652 }, { "epoch": 0.8168750766213069, "grad_norm": 0.44401904848685925, "learning_rate": 8.539396381833526e-07, "loss": 0.3946, "step": 26653 }, { "epoch": 0.816905725144048, "grad_norm": 0.45109203948719234, "learning_rate": 8.536622496407354e-07, "loss": 0.408, "step": 26654 }, { "epoch": 0.8169363736667893, "grad_norm": 0.452715961640731, "learning_rate": 8.533849019530466e-07, "loss": 0.407, "step": 26655 }, { "epoch": 0.8169670221895304, "grad_norm": 1.3165497531841661, "learning_rate": 8.531075951230172e-07, "loss": 0.6673, "step": 26656 }, { "epoch": 0.8169976707122717, "grad_norm": 1.3137733201785045, "learning_rate": 8.528303291533774e-07, "loss": 0.6118, "step": 26657 }, { "epoch": 0.8170283192350128, "grad_norm": 0.4460581502410535, "learning_rate": 8.525531040468632e-07, "loss": 0.3675, "step": 26658 }, { "epoch": 0.8170589677577541, "grad_norm": 1.3627775607992525, "learning_rate": 8.522759198062036e-07, "loss": 0.6745, "step": 26659 }, { "epoch": 0.8170896162804953, "grad_norm": 1.2964403827348963, "learning_rate": 8.51998776434132e-07, "loss": 0.6124, "step": 26660 }, { "epoch": 0.8171202648032365, "grad_norm": 1.2812135587364764, "learning_rate": 8.517216739333767e-07, "loss": 0.5694, "step": 26661 }, { "epoch": 0.8171509133259777, "grad_norm": 0.44588212340722044, "learning_rate": 8.514446123066689e-07, "loss": 0.3968, "step": 26662 }, { "epoch": 0.8171815618487189, "grad_norm": 1.3348503886685334, "learning_rate": 8.511675915567402e-07, "loss": 0.6036, "step": 26663 }, { "epoch": 0.8172122103714601, "grad_norm": 1.1899944772292552, "learning_rate": 8.508906116863169e-07, "loss": 0.534, "step": 26664 }, { "epoch": 0.8172428588942013, "grad_norm": 1.4263355838542229, "learning_rate": 8.506136726981307e-07, "loss": 0.5982, "step": 26665 }, { "epoch": 0.8172735074169425, "grad_norm": 1.5246346800579569, "learning_rate": 8.503367745949103e-07, "loss": 0.5349, "step": 26666 }, { "epoch": 0.8173041559396838, "grad_norm": 1.1792324698692762, "learning_rate": 8.500599173793828e-07, "loss": 0.5449, "step": 26667 }, { "epoch": 0.8173348044624249, "grad_norm": 1.319204397816293, "learning_rate": 8.497831010542762e-07, "loss": 0.6373, "step": 26668 }, { "epoch": 0.8173654529851662, "grad_norm": 1.3258428034432568, "learning_rate": 8.495063256223201e-07, "loss": 0.5981, "step": 26669 }, { "epoch": 0.8173961015079073, "grad_norm": 1.2899964871516312, "learning_rate": 8.492295910862386e-07, "loss": 0.6087, "step": 26670 }, { "epoch": 0.8174267500306486, "grad_norm": 1.2877626680190224, "learning_rate": 8.489528974487615e-07, "loss": 0.5551, "step": 26671 }, { "epoch": 0.8174573985533897, "grad_norm": 1.4008383366383164, "learning_rate": 8.486762447126123e-07, "loss": 0.6589, "step": 26672 }, { "epoch": 0.817488047076131, "grad_norm": 1.2347180508105822, "learning_rate": 8.483996328805183e-07, "loss": 0.5831, "step": 26673 }, { "epoch": 0.8175186955988721, "grad_norm": 0.44172647621806105, "learning_rate": 8.481230619552061e-07, "loss": 0.4091, "step": 26674 }, { "epoch": 0.8175493441216133, "grad_norm": 1.4468572736483993, "learning_rate": 8.478465319393986e-07, "loss": 0.5363, "step": 26675 }, { "epoch": 0.8175799926443545, "grad_norm": 1.231582943150231, "learning_rate": 8.475700428358213e-07, "loss": 0.5566, "step": 26676 }, { "epoch": 0.8176106411670957, "grad_norm": 1.3753940706911336, "learning_rate": 8.472935946472e-07, "loss": 0.6482, "step": 26677 }, { "epoch": 0.817641289689837, "grad_norm": 1.4451362673775008, "learning_rate": 8.470171873762561e-07, "loss": 0.6562, "step": 26678 }, { "epoch": 0.8176719382125781, "grad_norm": 1.3646277371020201, "learning_rate": 8.46740821025715e-07, "loss": 0.5495, "step": 26679 }, { "epoch": 0.8177025867353194, "grad_norm": 1.69753629255282, "learning_rate": 8.464644955983004e-07, "loss": 0.6451, "step": 26680 }, { "epoch": 0.8177332352580605, "grad_norm": 1.4427476248552609, "learning_rate": 8.461882110967323e-07, "loss": 0.5296, "step": 26681 }, { "epoch": 0.8177638837808018, "grad_norm": 1.3393863811111468, "learning_rate": 8.459119675237354e-07, "loss": 0.5418, "step": 26682 }, { "epoch": 0.8177945323035429, "grad_norm": 1.2793054518511582, "learning_rate": 8.456357648820302e-07, "loss": 0.5766, "step": 26683 }, { "epoch": 0.8178251808262842, "grad_norm": 1.5945773001788834, "learning_rate": 8.453596031743388e-07, "loss": 0.5744, "step": 26684 }, { "epoch": 0.8178558293490253, "grad_norm": 1.4034433117500786, "learning_rate": 8.450834824033832e-07, "loss": 0.6719, "step": 26685 }, { "epoch": 0.8178864778717666, "grad_norm": 1.2640598071782725, "learning_rate": 8.448074025718816e-07, "loss": 0.6405, "step": 26686 }, { "epoch": 0.8179171263945078, "grad_norm": 0.44832353514905315, "learning_rate": 8.445313636825564e-07, "loss": 0.3755, "step": 26687 }, { "epoch": 0.817947774917249, "grad_norm": 0.4747340060527945, "learning_rate": 8.442553657381275e-07, "loss": 0.399, "step": 26688 }, { "epoch": 0.8179784234399902, "grad_norm": 1.3469209784574194, "learning_rate": 8.439794087413133e-07, "loss": 0.5536, "step": 26689 }, { "epoch": 0.8180090719627314, "grad_norm": 1.3647305048990137, "learning_rate": 8.43703492694834e-07, "loss": 0.5317, "step": 26690 }, { "epoch": 0.8180397204854726, "grad_norm": 1.4879581482482052, "learning_rate": 8.434276176014067e-07, "loss": 0.7011, "step": 26691 }, { "epoch": 0.8180703690082138, "grad_norm": 1.3300347689676604, "learning_rate": 8.431517834637504e-07, "loss": 0.5981, "step": 26692 }, { "epoch": 0.818101017530955, "grad_norm": 1.3983412639758241, "learning_rate": 8.428759902845846e-07, "loss": 0.6215, "step": 26693 }, { "epoch": 0.8181316660536962, "grad_norm": 1.4008148361436281, "learning_rate": 8.426002380666237e-07, "loss": 0.5706, "step": 26694 }, { "epoch": 0.8181623145764374, "grad_norm": 1.504450066526986, "learning_rate": 8.423245268125862e-07, "loss": 0.6734, "step": 26695 }, { "epoch": 0.8181929630991787, "grad_norm": 1.2589293551413339, "learning_rate": 8.420488565251911e-07, "loss": 0.6256, "step": 26696 }, { "epoch": 0.8182236116219198, "grad_norm": 1.1548540887771956, "learning_rate": 8.417732272071505e-07, "loss": 0.5452, "step": 26697 }, { "epoch": 0.8182542601446611, "grad_norm": 1.390339504036706, "learning_rate": 8.414976388611823e-07, "loss": 0.6788, "step": 26698 }, { "epoch": 0.8182849086674022, "grad_norm": 1.1678199341730302, "learning_rate": 8.412220914900032e-07, "loss": 0.6032, "step": 26699 }, { "epoch": 0.8183155571901435, "grad_norm": 1.299040430515019, "learning_rate": 8.409465850963255e-07, "loss": 0.6021, "step": 26700 }, { "epoch": 0.8183462057128846, "grad_norm": 1.3227888908867544, "learning_rate": 8.406711196828671e-07, "loss": 0.5938, "step": 26701 }, { "epoch": 0.8183768542356259, "grad_norm": 1.2595954804497231, "learning_rate": 8.403956952523384e-07, "loss": 0.5531, "step": 26702 }, { "epoch": 0.818407502758367, "grad_norm": 1.3186539582630399, "learning_rate": 8.401203118074558e-07, "loss": 0.6052, "step": 26703 }, { "epoch": 0.8184381512811083, "grad_norm": 1.3989750624456492, "learning_rate": 8.398449693509325e-07, "loss": 0.6235, "step": 26704 }, { "epoch": 0.8184687998038495, "grad_norm": 1.2536582950319932, "learning_rate": 8.395696678854809e-07, "loss": 0.4706, "step": 26705 }, { "epoch": 0.8184994483265906, "grad_norm": 1.1728077456497341, "learning_rate": 8.392944074138132e-07, "loss": 0.5626, "step": 26706 }, { "epoch": 0.8185300968493319, "grad_norm": 1.4641750307718897, "learning_rate": 8.390191879386439e-07, "loss": 0.5605, "step": 26707 }, { "epoch": 0.818560745372073, "grad_norm": 1.5179865232963623, "learning_rate": 8.387440094626815e-07, "loss": 0.6391, "step": 26708 }, { "epoch": 0.8185913938948143, "grad_norm": 1.2049233902822702, "learning_rate": 8.384688719886391e-07, "loss": 0.5872, "step": 26709 }, { "epoch": 0.8186220424175554, "grad_norm": 1.304492332854208, "learning_rate": 8.381937755192293e-07, "loss": 0.551, "step": 26710 }, { "epoch": 0.8186526909402967, "grad_norm": 1.3507897570872058, "learning_rate": 8.379187200571598e-07, "loss": 0.5703, "step": 26711 }, { "epoch": 0.8186833394630378, "grad_norm": 1.2461366044079705, "learning_rate": 8.376437056051429e-07, "loss": 0.5135, "step": 26712 }, { "epoch": 0.8187139879857791, "grad_norm": 0.4359079606275908, "learning_rate": 8.373687321658853e-07, "loss": 0.371, "step": 26713 }, { "epoch": 0.8187446365085203, "grad_norm": 1.479231832147593, "learning_rate": 8.370937997421014e-07, "loss": 0.6493, "step": 26714 }, { "epoch": 0.8187752850312615, "grad_norm": 0.4301542153689975, "learning_rate": 8.368189083364969e-07, "loss": 0.3718, "step": 26715 }, { "epoch": 0.8188059335540027, "grad_norm": 2.86411110853017, "learning_rate": 8.365440579517803e-07, "loss": 0.6859, "step": 26716 }, { "epoch": 0.8188365820767439, "grad_norm": 0.4383043062162877, "learning_rate": 8.362692485906599e-07, "loss": 0.4051, "step": 26717 }, { "epoch": 0.8188672305994851, "grad_norm": 1.2590256373983466, "learning_rate": 8.35994480255845e-07, "loss": 0.57, "step": 26718 }, { "epoch": 0.8188978791222263, "grad_norm": 0.4728727650769947, "learning_rate": 8.35719752950041e-07, "loss": 0.3923, "step": 26719 }, { "epoch": 0.8189285276449675, "grad_norm": 1.35234569533567, "learning_rate": 8.354450666759556e-07, "loss": 0.6194, "step": 26720 }, { "epoch": 0.8189591761677087, "grad_norm": 1.266340643200239, "learning_rate": 8.351704214362955e-07, "loss": 0.6241, "step": 26721 }, { "epoch": 0.8189898246904499, "grad_norm": 1.4256411442648833, "learning_rate": 8.348958172337684e-07, "loss": 0.649, "step": 26722 }, { "epoch": 0.8190204732131912, "grad_norm": 0.41901119935804676, "learning_rate": 8.34621254071078e-07, "loss": 0.3934, "step": 26723 }, { "epoch": 0.8190511217359323, "grad_norm": 1.3295378335169104, "learning_rate": 8.34346731950928e-07, "loss": 0.5366, "step": 26724 }, { "epoch": 0.8190817702586736, "grad_norm": 0.4557873948046033, "learning_rate": 8.34072250876028e-07, "loss": 0.3911, "step": 26725 }, { "epoch": 0.8191124187814147, "grad_norm": 1.3120335154229448, "learning_rate": 8.337978108490797e-07, "loss": 0.5861, "step": 26726 }, { "epoch": 0.819143067304156, "grad_norm": 1.3950364999085547, "learning_rate": 8.335234118727864e-07, "loss": 0.5442, "step": 26727 }, { "epoch": 0.8191737158268971, "grad_norm": 0.4237057526690912, "learning_rate": 8.332490539498533e-07, "loss": 0.3914, "step": 26728 }, { "epoch": 0.8192043643496384, "grad_norm": 1.240553211549207, "learning_rate": 8.329747370829844e-07, "loss": 0.5807, "step": 26729 }, { "epoch": 0.8192350128723795, "grad_norm": 1.2161088052727669, "learning_rate": 8.327004612748802e-07, "loss": 0.5643, "step": 26730 }, { "epoch": 0.8192656613951208, "grad_norm": 1.2859223853847823, "learning_rate": 8.324262265282446e-07, "loss": 0.5722, "step": 26731 }, { "epoch": 0.819296309917862, "grad_norm": 1.2455547053075127, "learning_rate": 8.3215203284578e-07, "loss": 0.5874, "step": 26732 }, { "epoch": 0.8193269584406032, "grad_norm": 1.3705974892440027, "learning_rate": 8.318778802301891e-07, "loss": 0.6077, "step": 26733 }, { "epoch": 0.8193576069633444, "grad_norm": 1.5368150612941824, "learning_rate": 8.316037686841716e-07, "loss": 0.5322, "step": 26734 }, { "epoch": 0.8193882554860856, "grad_norm": 1.414311432575938, "learning_rate": 8.313296982104274e-07, "loss": 0.6419, "step": 26735 }, { "epoch": 0.8194189040088268, "grad_norm": 0.44877171322941567, "learning_rate": 8.310556688116583e-07, "loss": 0.384, "step": 26736 }, { "epoch": 0.8194495525315679, "grad_norm": 0.4646807807381523, "learning_rate": 8.307816804905661e-07, "loss": 0.3951, "step": 26737 }, { "epoch": 0.8194802010543092, "grad_norm": 1.3550482962073978, "learning_rate": 8.305077332498468e-07, "loss": 0.554, "step": 26738 }, { "epoch": 0.8195108495770503, "grad_norm": 1.2313988933101054, "learning_rate": 8.302338270922022e-07, "loss": 0.6117, "step": 26739 }, { "epoch": 0.8195414980997916, "grad_norm": 1.3243553911853838, "learning_rate": 8.299599620203303e-07, "loss": 0.5807, "step": 26740 }, { "epoch": 0.8195721466225327, "grad_norm": 1.2056253795511498, "learning_rate": 8.296861380369308e-07, "loss": 0.6714, "step": 26741 }, { "epoch": 0.819602795145274, "grad_norm": 1.2373349873599433, "learning_rate": 8.294123551447014e-07, "loss": 0.5803, "step": 26742 }, { "epoch": 0.8196334436680152, "grad_norm": 1.3271121956618879, "learning_rate": 8.291386133463363e-07, "loss": 0.578, "step": 26743 }, { "epoch": 0.8196640921907564, "grad_norm": 1.3604811128067558, "learning_rate": 8.288649126445381e-07, "loss": 0.5568, "step": 26744 }, { "epoch": 0.8196947407134976, "grad_norm": 1.3592852229438066, "learning_rate": 8.285912530420015e-07, "loss": 0.6267, "step": 26745 }, { "epoch": 0.8197253892362388, "grad_norm": 1.244471374165741, "learning_rate": 8.283176345414212e-07, "loss": 0.6205, "step": 26746 }, { "epoch": 0.81975603775898, "grad_norm": 0.4476752703482219, "learning_rate": 8.280440571454945e-07, "loss": 0.3904, "step": 26747 }, { "epoch": 0.8197866862817212, "grad_norm": 1.344221041590853, "learning_rate": 8.277705208569181e-07, "loss": 0.5172, "step": 26748 }, { "epoch": 0.8198173348044624, "grad_norm": 0.4419942242154907, "learning_rate": 8.274970256783854e-07, "loss": 0.3707, "step": 26749 }, { "epoch": 0.8198479833272037, "grad_norm": 1.2996546239144053, "learning_rate": 8.272235716125921e-07, "loss": 0.5869, "step": 26750 }, { "epoch": 0.8198786318499448, "grad_norm": 1.3636589915675166, "learning_rate": 8.269501586622325e-07, "loss": 0.6153, "step": 26751 }, { "epoch": 0.8199092803726861, "grad_norm": 1.2492812375853068, "learning_rate": 8.266767868300019e-07, "loss": 0.5846, "step": 26752 }, { "epoch": 0.8199399288954272, "grad_norm": 1.6955544521709955, "learning_rate": 8.264034561185924e-07, "loss": 0.6476, "step": 26753 }, { "epoch": 0.8199705774181685, "grad_norm": 1.464574271678562, "learning_rate": 8.261301665306959e-07, "loss": 0.6327, "step": 26754 }, { "epoch": 0.8200012259409096, "grad_norm": 1.4446445793993563, "learning_rate": 8.258569180690085e-07, "loss": 0.6686, "step": 26755 }, { "epoch": 0.8200318744636509, "grad_norm": 1.2223570635792353, "learning_rate": 8.255837107362213e-07, "loss": 0.6014, "step": 26756 }, { "epoch": 0.820062522986392, "grad_norm": 1.437910348214571, "learning_rate": 8.253105445350245e-07, "loss": 0.6637, "step": 26757 }, { "epoch": 0.8200931715091333, "grad_norm": 1.3773987199601727, "learning_rate": 8.25037419468111e-07, "loss": 0.627, "step": 26758 }, { "epoch": 0.8201238200318745, "grad_norm": 1.2492578282636804, "learning_rate": 8.247643355381718e-07, "loss": 0.5574, "step": 26759 }, { "epoch": 0.8201544685546157, "grad_norm": 1.35471720971645, "learning_rate": 8.244912927478992e-07, "loss": 0.5894, "step": 26760 }, { "epoch": 0.8201851170773569, "grad_norm": 1.2488593286374505, "learning_rate": 8.242182910999813e-07, "loss": 0.5656, "step": 26761 }, { "epoch": 0.8202157656000981, "grad_norm": 1.3998440170188318, "learning_rate": 8.239453305971091e-07, "loss": 0.5736, "step": 26762 }, { "epoch": 0.8202464141228393, "grad_norm": 1.2058433620540756, "learning_rate": 8.236724112419731e-07, "loss": 0.6851, "step": 26763 }, { "epoch": 0.8202770626455805, "grad_norm": 1.392477566255443, "learning_rate": 8.233995330372613e-07, "loss": 0.6021, "step": 26764 }, { "epoch": 0.8203077111683217, "grad_norm": 1.2298671161298937, "learning_rate": 8.231266959856599e-07, "loss": 0.5934, "step": 26765 }, { "epoch": 0.820338359691063, "grad_norm": 1.304147687337827, "learning_rate": 8.228539000898627e-07, "loss": 0.6113, "step": 26766 }, { "epoch": 0.8203690082138041, "grad_norm": 1.4518071718450678, "learning_rate": 8.225811453525534e-07, "loss": 0.6256, "step": 26767 }, { "epoch": 0.8203996567365452, "grad_norm": 1.3254841926504612, "learning_rate": 8.223084317764219e-07, "loss": 0.5586, "step": 26768 }, { "epoch": 0.8204303052592865, "grad_norm": 1.3951557980902747, "learning_rate": 8.220357593641532e-07, "loss": 0.6544, "step": 26769 }, { "epoch": 0.8204609537820277, "grad_norm": 1.256838897378099, "learning_rate": 8.217631281184352e-07, "loss": 0.6686, "step": 26770 }, { "epoch": 0.8204916023047689, "grad_norm": 1.2386050408432079, "learning_rate": 8.214905380419557e-07, "loss": 0.5826, "step": 26771 }, { "epoch": 0.8205222508275101, "grad_norm": 1.3954927219097422, "learning_rate": 8.212179891373979e-07, "loss": 0.6163, "step": 26772 }, { "epoch": 0.8205528993502513, "grad_norm": 1.3231395258926146, "learning_rate": 8.20945481407448e-07, "loss": 0.5413, "step": 26773 }, { "epoch": 0.8205835478729925, "grad_norm": 1.3897397241216813, "learning_rate": 8.206730148547926e-07, "loss": 0.6217, "step": 26774 }, { "epoch": 0.8206141963957337, "grad_norm": 1.3745304268476022, "learning_rate": 8.204005894821154e-07, "loss": 0.5192, "step": 26775 }, { "epoch": 0.8206448449184749, "grad_norm": 1.5522734724494422, "learning_rate": 8.201282052920984e-07, "loss": 0.6916, "step": 26776 }, { "epoch": 0.8206754934412162, "grad_norm": 0.4596445759689757, "learning_rate": 8.198558622874303e-07, "loss": 0.4119, "step": 26777 }, { "epoch": 0.8207061419639573, "grad_norm": 1.2918897159255667, "learning_rate": 8.195835604707903e-07, "loss": 0.6264, "step": 26778 }, { "epoch": 0.8207367904866986, "grad_norm": 1.237992712439858, "learning_rate": 8.193112998448644e-07, "loss": 0.6145, "step": 26779 }, { "epoch": 0.8207674390094397, "grad_norm": 1.2296529732599317, "learning_rate": 8.190390804123327e-07, "loss": 0.5976, "step": 26780 }, { "epoch": 0.820798087532181, "grad_norm": 1.6422414418334312, "learning_rate": 8.187669021758788e-07, "loss": 0.5928, "step": 26781 }, { "epoch": 0.8208287360549221, "grad_norm": 1.518320496419632, "learning_rate": 8.184947651381853e-07, "loss": 0.4699, "step": 26782 }, { "epoch": 0.8208593845776634, "grad_norm": 1.2565913880036834, "learning_rate": 8.182226693019318e-07, "loss": 0.6053, "step": 26783 }, { "epoch": 0.8208900331004045, "grad_norm": 1.3646865967594883, "learning_rate": 8.179506146698008e-07, "loss": 0.6675, "step": 26784 }, { "epoch": 0.8209206816231458, "grad_norm": 1.3212386204212492, "learning_rate": 8.176786012444727e-07, "loss": 0.5773, "step": 26785 }, { "epoch": 0.820951330145887, "grad_norm": 1.3236224292398313, "learning_rate": 8.17406629028627e-07, "loss": 0.5626, "step": 26786 }, { "epoch": 0.8209819786686282, "grad_norm": 1.436172263868816, "learning_rate": 8.171346980249445e-07, "loss": 0.597, "step": 26787 }, { "epoch": 0.8210126271913694, "grad_norm": 1.3804146133822515, "learning_rate": 8.168628082361035e-07, "loss": 0.5944, "step": 26788 }, { "epoch": 0.8210432757141106, "grad_norm": 1.2755130086064623, "learning_rate": 8.165909596647831e-07, "loss": 0.5959, "step": 26789 }, { "epoch": 0.8210739242368518, "grad_norm": 1.3877273597132473, "learning_rate": 8.163191523136638e-07, "loss": 0.5341, "step": 26790 }, { "epoch": 0.821104572759593, "grad_norm": 1.418261366756428, "learning_rate": 8.160473861854206e-07, "loss": 0.5157, "step": 26791 }, { "epoch": 0.8211352212823342, "grad_norm": 1.3158579667800432, "learning_rate": 8.157756612827334e-07, "loss": 0.5703, "step": 26792 }, { "epoch": 0.8211658698050754, "grad_norm": 1.2709907505698612, "learning_rate": 8.155039776082807e-07, "loss": 0.6112, "step": 26793 }, { "epoch": 0.8211965183278166, "grad_norm": 1.4518475163214415, "learning_rate": 8.152323351647362e-07, "loss": 0.5561, "step": 26794 }, { "epoch": 0.8212271668505579, "grad_norm": 1.235898647394752, "learning_rate": 8.149607339547788e-07, "loss": 0.4514, "step": 26795 }, { "epoch": 0.821257815373299, "grad_norm": 1.3492959583089836, "learning_rate": 8.146891739810847e-07, "loss": 0.5525, "step": 26796 }, { "epoch": 0.8212884638960403, "grad_norm": 1.590513309699045, "learning_rate": 8.144176552463285e-07, "loss": 0.6378, "step": 26797 }, { "epoch": 0.8213191124187814, "grad_norm": 1.3637585384134554, "learning_rate": 8.141461777531867e-07, "loss": 0.5469, "step": 26798 }, { "epoch": 0.8213497609415226, "grad_norm": 1.3470636390572772, "learning_rate": 8.138747415043324e-07, "loss": 0.5691, "step": 26799 }, { "epoch": 0.8213804094642638, "grad_norm": 1.2759392274989243, "learning_rate": 8.136033465024417e-07, "loss": 0.5307, "step": 26800 }, { "epoch": 0.821411057987005, "grad_norm": 1.272095020561714, "learning_rate": 8.133319927501893e-07, "loss": 0.506, "step": 26801 }, { "epoch": 0.8214417065097462, "grad_norm": 1.4027398239615632, "learning_rate": 8.130606802502467e-07, "loss": 0.5391, "step": 26802 }, { "epoch": 0.8214723550324874, "grad_norm": 1.2419377722238014, "learning_rate": 8.127894090052884e-07, "loss": 0.519, "step": 26803 }, { "epoch": 0.8215030035552287, "grad_norm": 1.1960410851864676, "learning_rate": 8.125181790179892e-07, "loss": 0.5916, "step": 26804 }, { "epoch": 0.8215336520779698, "grad_norm": 1.2428995088880275, "learning_rate": 8.12246990291018e-07, "loss": 0.5437, "step": 26805 }, { "epoch": 0.8215643006007111, "grad_norm": 1.3129944936714026, "learning_rate": 8.119758428270491e-07, "loss": 0.5583, "step": 26806 }, { "epoch": 0.8215949491234522, "grad_norm": 1.22513432412577, "learning_rate": 8.117047366287545e-07, "loss": 0.549, "step": 26807 }, { "epoch": 0.8216255976461935, "grad_norm": 1.4225105058389615, "learning_rate": 8.114336716988041e-07, "loss": 0.6122, "step": 26808 }, { "epoch": 0.8216562461689346, "grad_norm": 1.2433827917307205, "learning_rate": 8.111626480398705e-07, "loss": 0.5152, "step": 26809 }, { "epoch": 0.8216868946916759, "grad_norm": 0.4326183473284732, "learning_rate": 8.108916656546218e-07, "loss": 0.3921, "step": 26810 }, { "epoch": 0.821717543214417, "grad_norm": 1.243974704269273, "learning_rate": 8.106207245457293e-07, "loss": 0.5972, "step": 26811 }, { "epoch": 0.8217481917371583, "grad_norm": 0.45347416732864615, "learning_rate": 8.103498247158636e-07, "loss": 0.3928, "step": 26812 }, { "epoch": 0.8217788402598994, "grad_norm": 1.4075654692055755, "learning_rate": 8.100789661676922e-07, "loss": 0.5506, "step": 26813 }, { "epoch": 0.8218094887826407, "grad_norm": 1.4777850975818785, "learning_rate": 8.098081489038845e-07, "loss": 0.652, "step": 26814 }, { "epoch": 0.8218401373053819, "grad_norm": 1.483398522890688, "learning_rate": 8.095373729271111e-07, "loss": 0.6501, "step": 26815 }, { "epoch": 0.8218707858281231, "grad_norm": 1.425413384134506, "learning_rate": 8.09266638240036e-07, "loss": 0.5916, "step": 26816 }, { "epoch": 0.8219014343508643, "grad_norm": 1.3745892934063577, "learning_rate": 8.0899594484533e-07, "loss": 0.6147, "step": 26817 }, { "epoch": 0.8219320828736055, "grad_norm": 1.3077150968276514, "learning_rate": 8.087252927456596e-07, "loss": 0.6262, "step": 26818 }, { "epoch": 0.8219627313963467, "grad_norm": 1.3780258822218738, "learning_rate": 8.084546819436906e-07, "loss": 0.5603, "step": 26819 }, { "epoch": 0.8219933799190879, "grad_norm": 1.195535462118183, "learning_rate": 8.081841124420909e-07, "loss": 0.6079, "step": 26820 }, { "epoch": 0.8220240284418291, "grad_norm": 1.2492717964216071, "learning_rate": 8.079135842435232e-07, "loss": 0.5734, "step": 26821 }, { "epoch": 0.8220546769645704, "grad_norm": 1.3941875603548142, "learning_rate": 8.076430973506583e-07, "loss": 0.5906, "step": 26822 }, { "epoch": 0.8220853254873115, "grad_norm": 1.3877185399768837, "learning_rate": 8.073726517661584e-07, "loss": 0.592, "step": 26823 }, { "epoch": 0.8221159740100528, "grad_norm": 1.3603349535428266, "learning_rate": 8.071022474926876e-07, "loss": 0.6428, "step": 26824 }, { "epoch": 0.8221466225327939, "grad_norm": 0.42524845457680444, "learning_rate": 8.06831884532911e-07, "loss": 0.3945, "step": 26825 }, { "epoch": 0.8221772710555352, "grad_norm": 1.3522825976212478, "learning_rate": 8.065615628894935e-07, "loss": 0.6396, "step": 26826 }, { "epoch": 0.8222079195782763, "grad_norm": 1.3414706458003063, "learning_rate": 8.06291282565097e-07, "loss": 0.6441, "step": 26827 }, { "epoch": 0.8222385681010176, "grad_norm": 3.2337517204420796, "learning_rate": 8.060210435623855e-07, "loss": 0.654, "step": 26828 }, { "epoch": 0.8222692166237587, "grad_norm": 1.3680360354093708, "learning_rate": 8.05750845884023e-07, "loss": 0.605, "step": 26829 }, { "epoch": 0.8222998651464999, "grad_norm": 0.4617626709182252, "learning_rate": 8.054806895326695e-07, "loss": 0.3809, "step": 26830 }, { "epoch": 0.8223305136692411, "grad_norm": 1.3694018279166413, "learning_rate": 8.052105745109889e-07, "loss": 0.6607, "step": 26831 }, { "epoch": 0.8223611621919823, "grad_norm": 1.4344642105655037, "learning_rate": 8.049405008216405e-07, "loss": 0.6358, "step": 26832 }, { "epoch": 0.8223918107147236, "grad_norm": 1.27882005546722, "learning_rate": 8.046704684672868e-07, "loss": 0.6153, "step": 26833 }, { "epoch": 0.8224224592374647, "grad_norm": 1.292601876009306, "learning_rate": 8.044004774505898e-07, "loss": 0.5477, "step": 26834 }, { "epoch": 0.822453107760206, "grad_norm": 1.2972843649246455, "learning_rate": 8.041305277742073e-07, "loss": 0.5375, "step": 26835 }, { "epoch": 0.8224837562829471, "grad_norm": 1.2148147401205969, "learning_rate": 8.038606194408e-07, "loss": 0.5889, "step": 26836 }, { "epoch": 0.8225144048056884, "grad_norm": 0.45267978688133875, "learning_rate": 8.035907524530289e-07, "loss": 0.3873, "step": 26837 }, { "epoch": 0.8225450533284295, "grad_norm": 1.3799234711644284, "learning_rate": 8.033209268135506e-07, "loss": 0.5525, "step": 26838 }, { "epoch": 0.8225757018511708, "grad_norm": 1.3177626300549103, "learning_rate": 8.03051142525026e-07, "loss": 0.6183, "step": 26839 }, { "epoch": 0.8226063503739119, "grad_norm": 0.438817762695084, "learning_rate": 8.027813995901101e-07, "loss": 0.3858, "step": 26840 }, { "epoch": 0.8226369988966532, "grad_norm": 1.441317440544987, "learning_rate": 8.025116980114656e-07, "loss": 0.6301, "step": 26841 }, { "epoch": 0.8226676474193944, "grad_norm": 1.2615637857342887, "learning_rate": 8.022420377917467e-07, "loss": 0.5935, "step": 26842 }, { "epoch": 0.8226982959421356, "grad_norm": 1.3594128633081486, "learning_rate": 8.019724189336103e-07, "loss": 0.5447, "step": 26843 }, { "epoch": 0.8227289444648768, "grad_norm": 1.2533345720261755, "learning_rate": 8.017028414397138e-07, "loss": 0.5675, "step": 26844 }, { "epoch": 0.822759592987618, "grad_norm": 0.4523717669120881, "learning_rate": 8.014333053127144e-07, "loss": 0.3907, "step": 26845 }, { "epoch": 0.8227902415103592, "grad_norm": 1.2349666475182064, "learning_rate": 8.011638105552655e-07, "loss": 0.5217, "step": 26846 }, { "epoch": 0.8228208900331004, "grad_norm": 1.3547551305019536, "learning_rate": 8.008943571700245e-07, "loss": 0.6082, "step": 26847 }, { "epoch": 0.8228515385558416, "grad_norm": 1.3564393200828408, "learning_rate": 8.006249451596454e-07, "loss": 0.5967, "step": 26848 }, { "epoch": 0.8228821870785828, "grad_norm": 1.3806649131657542, "learning_rate": 8.003555745267844e-07, "loss": 0.583, "step": 26849 }, { "epoch": 0.822912835601324, "grad_norm": 1.1876294131144534, "learning_rate": 8.000862452740943e-07, "loss": 0.5732, "step": 26850 }, { "epoch": 0.8229434841240653, "grad_norm": 1.344234884515141, "learning_rate": 7.998169574042269e-07, "loss": 0.4914, "step": 26851 }, { "epoch": 0.8229741326468064, "grad_norm": 1.3354968590711032, "learning_rate": 7.995477109198397e-07, "loss": 0.6062, "step": 26852 }, { "epoch": 0.8230047811695477, "grad_norm": 1.4840476560218343, "learning_rate": 7.992785058235841e-07, "loss": 0.5826, "step": 26853 }, { "epoch": 0.8230354296922888, "grad_norm": 1.2882407853014253, "learning_rate": 7.990093421181106e-07, "loss": 0.6585, "step": 26854 }, { "epoch": 0.8230660782150301, "grad_norm": 1.3001937363532647, "learning_rate": 7.987402198060734e-07, "loss": 0.578, "step": 26855 }, { "epoch": 0.8230967267377712, "grad_norm": 1.3591847723963026, "learning_rate": 7.984711388901246e-07, "loss": 0.619, "step": 26856 }, { "epoch": 0.8231273752605125, "grad_norm": 1.3551445530939754, "learning_rate": 7.982020993729134e-07, "loss": 0.5724, "step": 26857 }, { "epoch": 0.8231580237832536, "grad_norm": 1.2933129158557117, "learning_rate": 7.979331012570923e-07, "loss": 0.6032, "step": 26858 }, { "epoch": 0.8231886723059949, "grad_norm": 1.329168457354048, "learning_rate": 7.97664144545311e-07, "loss": 0.5443, "step": 26859 }, { "epoch": 0.8232193208287361, "grad_norm": 1.4282883733183065, "learning_rate": 7.973952292402215e-07, "loss": 0.6209, "step": 26860 }, { "epoch": 0.8232499693514772, "grad_norm": 1.397921810351825, "learning_rate": 7.971263553444725e-07, "loss": 0.564, "step": 26861 }, { "epoch": 0.8232806178742185, "grad_norm": 1.4847546712345023, "learning_rate": 7.9685752286071e-07, "loss": 0.5716, "step": 26862 }, { "epoch": 0.8233112663969596, "grad_norm": 1.3676474454921212, "learning_rate": 7.965887317915882e-07, "loss": 0.5336, "step": 26863 }, { "epoch": 0.8233419149197009, "grad_norm": 1.2385056774402947, "learning_rate": 7.963199821397533e-07, "loss": 0.5376, "step": 26864 }, { "epoch": 0.823372563442442, "grad_norm": 1.458249229056399, "learning_rate": 7.960512739078519e-07, "loss": 0.6156, "step": 26865 }, { "epoch": 0.8234032119651833, "grad_norm": 1.2087176160284496, "learning_rate": 7.957826070985331e-07, "loss": 0.6213, "step": 26866 }, { "epoch": 0.8234338604879244, "grad_norm": 1.2401898209792017, "learning_rate": 7.955139817144436e-07, "loss": 0.4992, "step": 26867 }, { "epoch": 0.8234645090106657, "grad_norm": 1.3227484240968101, "learning_rate": 7.952453977582325e-07, "loss": 0.5927, "step": 26868 }, { "epoch": 0.8234951575334069, "grad_norm": 1.4447155943551948, "learning_rate": 7.949768552325426e-07, "loss": 0.7069, "step": 26869 }, { "epoch": 0.8235258060561481, "grad_norm": 1.4439226932644769, "learning_rate": 7.947083541400213e-07, "loss": 0.6786, "step": 26870 }, { "epoch": 0.8235564545788893, "grad_norm": 1.4552112821451402, "learning_rate": 7.944398944833165e-07, "loss": 0.6237, "step": 26871 }, { "epoch": 0.8235871031016305, "grad_norm": 1.1900231820635507, "learning_rate": 7.941714762650709e-07, "loss": 0.5729, "step": 26872 }, { "epoch": 0.8236177516243717, "grad_norm": 1.3114804458843303, "learning_rate": 7.939030994879277e-07, "loss": 0.6211, "step": 26873 }, { "epoch": 0.8236484001471129, "grad_norm": 1.4866917362554846, "learning_rate": 7.936347641545356e-07, "loss": 0.5515, "step": 26874 }, { "epoch": 0.8236790486698541, "grad_norm": 0.44731462801154337, "learning_rate": 7.933664702675354e-07, "loss": 0.3887, "step": 26875 }, { "epoch": 0.8237096971925953, "grad_norm": 1.303270509297322, "learning_rate": 7.930982178295732e-07, "loss": 0.6577, "step": 26876 }, { "epoch": 0.8237403457153365, "grad_norm": 1.1652427476751046, "learning_rate": 7.928300068432887e-07, "loss": 0.4584, "step": 26877 }, { "epoch": 0.8237709942380778, "grad_norm": 1.3005567637646729, "learning_rate": 7.925618373113275e-07, "loss": 0.4906, "step": 26878 }, { "epoch": 0.8238016427608189, "grad_norm": 1.386287831610098, "learning_rate": 7.922937092363315e-07, "loss": 0.6084, "step": 26879 }, { "epoch": 0.8238322912835602, "grad_norm": 1.2869942231893219, "learning_rate": 7.920256226209411e-07, "loss": 0.6314, "step": 26880 }, { "epoch": 0.8238629398063013, "grad_norm": 0.4523038980682437, "learning_rate": 7.917575774677994e-07, "loss": 0.3925, "step": 26881 }, { "epoch": 0.8238935883290426, "grad_norm": 1.4417447340961391, "learning_rate": 7.914895737795475e-07, "loss": 0.5998, "step": 26882 }, { "epoch": 0.8239242368517837, "grad_norm": 1.2296116027526216, "learning_rate": 7.912216115588262e-07, "loss": 0.5216, "step": 26883 }, { "epoch": 0.823954885374525, "grad_norm": 1.2783441532922835, "learning_rate": 7.909536908082737e-07, "loss": 0.6367, "step": 26884 }, { "epoch": 0.8239855338972661, "grad_norm": 1.5113074612645876, "learning_rate": 7.906858115305316e-07, "loss": 0.589, "step": 26885 }, { "epoch": 0.8240161824200074, "grad_norm": 1.2847091122737488, "learning_rate": 7.904179737282392e-07, "loss": 0.5163, "step": 26886 }, { "epoch": 0.8240468309427486, "grad_norm": 1.3243208715874943, "learning_rate": 7.901501774040366e-07, "loss": 0.5518, "step": 26887 }, { "epoch": 0.8240774794654898, "grad_norm": 1.5168306042160677, "learning_rate": 7.8988242256056e-07, "loss": 0.599, "step": 26888 }, { "epoch": 0.824108127988231, "grad_norm": 0.4605964706747766, "learning_rate": 7.896147092004497e-07, "loss": 0.3773, "step": 26889 }, { "epoch": 0.8241387765109722, "grad_norm": 1.4088427091886064, "learning_rate": 7.893470373263434e-07, "loss": 0.6433, "step": 26890 }, { "epoch": 0.8241694250337134, "grad_norm": 1.5427406209755958, "learning_rate": 7.890794069408786e-07, "loss": 0.6016, "step": 26891 }, { "epoch": 0.8242000735564545, "grad_norm": 1.2903866867129268, "learning_rate": 7.888118180466897e-07, "loss": 0.5707, "step": 26892 }, { "epoch": 0.8242307220791958, "grad_norm": 1.2669599990394969, "learning_rate": 7.885442706464175e-07, "loss": 0.6795, "step": 26893 }, { "epoch": 0.8242613706019369, "grad_norm": 1.2848840668301351, "learning_rate": 7.882767647426947e-07, "loss": 0.6263, "step": 26894 }, { "epoch": 0.8242920191246782, "grad_norm": 1.448662857726696, "learning_rate": 7.8800930033816e-07, "loss": 0.6258, "step": 26895 }, { "epoch": 0.8243226676474193, "grad_norm": 1.3834656953599767, "learning_rate": 7.877418774354462e-07, "loss": 0.5218, "step": 26896 }, { "epoch": 0.8243533161701606, "grad_norm": 1.39230296817425, "learning_rate": 7.874744960371894e-07, "loss": 0.6559, "step": 26897 }, { "epoch": 0.8243839646929018, "grad_norm": 0.44142724030015335, "learning_rate": 7.872071561460254e-07, "loss": 0.3838, "step": 26898 }, { "epoch": 0.824414613215643, "grad_norm": 1.382179063762103, "learning_rate": 7.869398577645859e-07, "loss": 0.5098, "step": 26899 }, { "epoch": 0.8244452617383842, "grad_norm": 1.218263415504818, "learning_rate": 7.866726008955062e-07, "loss": 0.5753, "step": 26900 }, { "epoch": 0.8244759102611254, "grad_norm": 0.44364735743061245, "learning_rate": 7.8640538554142e-07, "loss": 0.3983, "step": 26901 }, { "epoch": 0.8245065587838666, "grad_norm": 1.4038800406345169, "learning_rate": 7.861382117049599e-07, "loss": 0.6657, "step": 26902 }, { "epoch": 0.8245372073066078, "grad_norm": 0.44540025061474053, "learning_rate": 7.858710793887558e-07, "loss": 0.3795, "step": 26903 }, { "epoch": 0.824567855829349, "grad_norm": 1.3222165969480077, "learning_rate": 7.856039885954447e-07, "loss": 0.574, "step": 26904 }, { "epoch": 0.8245985043520903, "grad_norm": 1.2748943960606656, "learning_rate": 7.85336939327655e-07, "loss": 0.567, "step": 26905 }, { "epoch": 0.8246291528748314, "grad_norm": 1.5190552028232827, "learning_rate": 7.850699315880195e-07, "loss": 0.6358, "step": 26906 }, { "epoch": 0.8246598013975727, "grad_norm": 1.414874815735248, "learning_rate": 7.848029653791673e-07, "loss": 0.5436, "step": 26907 }, { "epoch": 0.8246904499203138, "grad_norm": 0.4429203201442907, "learning_rate": 7.8453604070373e-07, "loss": 0.3794, "step": 26908 }, { "epoch": 0.8247210984430551, "grad_norm": 0.42817230477045737, "learning_rate": 7.842691575643385e-07, "loss": 0.3808, "step": 26909 }, { "epoch": 0.8247517469657962, "grad_norm": 0.4725135018264956, "learning_rate": 7.840023159636206e-07, "loss": 0.4051, "step": 26910 }, { "epoch": 0.8247823954885375, "grad_norm": 1.3367709313549225, "learning_rate": 7.83735515904207e-07, "loss": 0.6304, "step": 26911 }, { "epoch": 0.8248130440112786, "grad_norm": 1.419869499737059, "learning_rate": 7.834687573887273e-07, "loss": 0.6127, "step": 26912 }, { "epoch": 0.8248436925340199, "grad_norm": 1.3556597897326332, "learning_rate": 7.832020404198071e-07, "loss": 0.5676, "step": 26913 }, { "epoch": 0.824874341056761, "grad_norm": 1.5401850254954843, "learning_rate": 7.829353650000765e-07, "loss": 0.5315, "step": 26914 }, { "epoch": 0.8249049895795023, "grad_norm": 1.3448759240953074, "learning_rate": 7.826687311321635e-07, "loss": 0.6039, "step": 26915 }, { "epoch": 0.8249356381022435, "grad_norm": 1.3300892561808872, "learning_rate": 7.824021388186936e-07, "loss": 0.5553, "step": 26916 }, { "epoch": 0.8249662866249847, "grad_norm": 0.4347544494590674, "learning_rate": 7.82135588062296e-07, "loss": 0.3952, "step": 26917 }, { "epoch": 0.8249969351477259, "grad_norm": 1.4526559407669175, "learning_rate": 7.818690788655941e-07, "loss": 0.6816, "step": 26918 }, { "epoch": 0.8250275836704671, "grad_norm": 1.2633612915165349, "learning_rate": 7.816026112312159e-07, "loss": 0.5691, "step": 26919 }, { "epoch": 0.8250582321932083, "grad_norm": 0.4257812651468248, "learning_rate": 7.813361851617873e-07, "loss": 0.391, "step": 26920 }, { "epoch": 0.8250888807159495, "grad_norm": 1.29137584583099, "learning_rate": 7.810698006599316e-07, "loss": 0.5801, "step": 26921 }, { "epoch": 0.8251195292386907, "grad_norm": 1.3063246407105746, "learning_rate": 7.808034577282741e-07, "loss": 0.5852, "step": 26922 }, { "epoch": 0.8251501777614318, "grad_norm": 1.2028299120171555, "learning_rate": 7.805371563694413e-07, "loss": 0.5248, "step": 26923 }, { "epoch": 0.8251808262841731, "grad_norm": 0.4282410944163691, "learning_rate": 7.802708965860545e-07, "loss": 0.3795, "step": 26924 }, { "epoch": 0.8252114748069143, "grad_norm": 0.42991223382150257, "learning_rate": 7.80004678380738e-07, "loss": 0.3999, "step": 26925 }, { "epoch": 0.8252421233296555, "grad_norm": 1.4297025029477228, "learning_rate": 7.79738501756116e-07, "loss": 0.6264, "step": 26926 }, { "epoch": 0.8252727718523967, "grad_norm": 1.3739937806859046, "learning_rate": 7.794723667148097e-07, "loss": 0.6288, "step": 26927 }, { "epoch": 0.8253034203751379, "grad_norm": 1.5079393972413355, "learning_rate": 7.79206273259443e-07, "loss": 0.5801, "step": 26928 }, { "epoch": 0.8253340688978791, "grad_norm": 1.387483325708779, "learning_rate": 7.789402213926356e-07, "loss": 0.5552, "step": 26929 }, { "epoch": 0.8253647174206203, "grad_norm": 1.3462273120844177, "learning_rate": 7.786742111170104e-07, "loss": 0.5755, "step": 26930 }, { "epoch": 0.8253953659433615, "grad_norm": 1.4246284679593946, "learning_rate": 7.784082424351891e-07, "loss": 0.5683, "step": 26931 }, { "epoch": 0.8254260144661028, "grad_norm": 1.602078803569656, "learning_rate": 7.781423153497908e-07, "loss": 0.5092, "step": 26932 }, { "epoch": 0.8254566629888439, "grad_norm": 1.4199560834234823, "learning_rate": 7.778764298634361e-07, "loss": 0.548, "step": 26933 }, { "epoch": 0.8254873115115852, "grad_norm": 1.5154816754418374, "learning_rate": 7.776105859787464e-07, "loss": 0.6111, "step": 26934 }, { "epoch": 0.8255179600343263, "grad_norm": 1.3678811210993747, "learning_rate": 7.773447836983388e-07, "loss": 0.5728, "step": 26935 }, { "epoch": 0.8255486085570676, "grad_norm": 1.2361723168486691, "learning_rate": 7.770790230248349e-07, "loss": 0.529, "step": 26936 }, { "epoch": 0.8255792570798087, "grad_norm": 1.2482985535564912, "learning_rate": 7.768133039608506e-07, "loss": 0.6108, "step": 26937 }, { "epoch": 0.82560990560255, "grad_norm": 1.409429392377363, "learning_rate": 7.765476265090049e-07, "loss": 0.5908, "step": 26938 }, { "epoch": 0.8256405541252911, "grad_norm": 1.4307743987963508, "learning_rate": 7.762819906719177e-07, "loss": 0.6356, "step": 26939 }, { "epoch": 0.8256712026480324, "grad_norm": 1.381838720796806, "learning_rate": 7.760163964522033e-07, "loss": 0.5723, "step": 26940 }, { "epoch": 0.8257018511707735, "grad_norm": 1.253365817844322, "learning_rate": 7.757508438524803e-07, "loss": 0.4676, "step": 26941 }, { "epoch": 0.8257324996935148, "grad_norm": 1.4326950399159184, "learning_rate": 7.754853328753664e-07, "loss": 0.6757, "step": 26942 }, { "epoch": 0.825763148216256, "grad_norm": 1.2392282843990086, "learning_rate": 7.752198635234748e-07, "loss": 0.5618, "step": 26943 }, { "epoch": 0.8257937967389972, "grad_norm": 1.2634837262571395, "learning_rate": 7.749544357994232e-07, "loss": 0.6491, "step": 26944 }, { "epoch": 0.8258244452617384, "grad_norm": 0.45074012921921064, "learning_rate": 7.746890497058273e-07, "loss": 0.3939, "step": 26945 }, { "epoch": 0.8258550937844796, "grad_norm": 1.3476359974304453, "learning_rate": 7.744237052453007e-07, "loss": 0.5824, "step": 26946 }, { "epoch": 0.8258857423072208, "grad_norm": 1.3806479829983052, "learning_rate": 7.741584024204596e-07, "loss": 0.645, "step": 26947 }, { "epoch": 0.825916390829962, "grad_norm": 1.160821157706898, "learning_rate": 7.73893141233914e-07, "loss": 0.4894, "step": 26948 }, { "epoch": 0.8259470393527032, "grad_norm": 1.57157918733752, "learning_rate": 7.736279216882836e-07, "loss": 0.5591, "step": 26949 }, { "epoch": 0.8259776878754445, "grad_norm": 1.2012346161051621, "learning_rate": 7.733627437861784e-07, "loss": 0.516, "step": 26950 }, { "epoch": 0.8260083363981856, "grad_norm": 1.3677682781313742, "learning_rate": 7.730976075302099e-07, "loss": 0.5468, "step": 26951 }, { "epoch": 0.8260389849209269, "grad_norm": 1.1992552281160178, "learning_rate": 7.72832512922993e-07, "loss": 0.5355, "step": 26952 }, { "epoch": 0.826069633443668, "grad_norm": 1.1856473161650467, "learning_rate": 7.725674599671395e-07, "loss": 0.5235, "step": 26953 }, { "epoch": 0.8261002819664092, "grad_norm": 1.3773312938681277, "learning_rate": 7.723024486652598e-07, "loss": 0.5592, "step": 26954 }, { "epoch": 0.8261309304891504, "grad_norm": 1.3732980037939972, "learning_rate": 7.720374790199653e-07, "loss": 0.6231, "step": 26955 }, { "epoch": 0.8261615790118916, "grad_norm": 1.4873687207776436, "learning_rate": 7.717725510338686e-07, "loss": 0.6264, "step": 26956 }, { "epoch": 0.8261922275346328, "grad_norm": 1.5097835236376318, "learning_rate": 7.715076647095776e-07, "loss": 0.6568, "step": 26957 }, { "epoch": 0.826222876057374, "grad_norm": 1.295785660636487, "learning_rate": 7.712428200497047e-07, "loss": 0.6324, "step": 26958 }, { "epoch": 0.8262535245801153, "grad_norm": 1.3308276284293195, "learning_rate": 7.709780170568559e-07, "loss": 0.6793, "step": 26959 }, { "epoch": 0.8262841731028564, "grad_norm": 1.390780063322594, "learning_rate": 7.70713255733645e-07, "loss": 0.5894, "step": 26960 }, { "epoch": 0.8263148216255977, "grad_norm": 1.2936088262875713, "learning_rate": 7.704485360826785e-07, "loss": 0.6317, "step": 26961 }, { "epoch": 0.8263454701483388, "grad_norm": 0.4629638774911725, "learning_rate": 7.701838581065635e-07, "loss": 0.3968, "step": 26962 }, { "epoch": 0.8263761186710801, "grad_norm": 0.43962653420696945, "learning_rate": 7.699192218079093e-07, "loss": 0.3721, "step": 26963 }, { "epoch": 0.8264067671938212, "grad_norm": 1.4022239537783343, "learning_rate": 7.696546271893252e-07, "loss": 0.5671, "step": 26964 }, { "epoch": 0.8264374157165625, "grad_norm": 1.2361072832804398, "learning_rate": 7.693900742534144e-07, "loss": 0.648, "step": 26965 }, { "epoch": 0.8264680642393036, "grad_norm": 1.456177839522724, "learning_rate": 7.691255630027855e-07, "loss": 0.571, "step": 26966 }, { "epoch": 0.8264987127620449, "grad_norm": 1.292124381196753, "learning_rate": 7.688610934400453e-07, "loss": 0.5011, "step": 26967 }, { "epoch": 0.826529361284786, "grad_norm": 1.4895832603925208, "learning_rate": 7.685966655678006e-07, "loss": 0.568, "step": 26968 }, { "epoch": 0.8265600098075273, "grad_norm": 1.2943076995617053, "learning_rate": 7.683322793886555e-07, "loss": 0.5511, "step": 26969 }, { "epoch": 0.8265906583302685, "grad_norm": 1.4084463315203792, "learning_rate": 7.680679349052128e-07, "loss": 0.6455, "step": 26970 }, { "epoch": 0.8266213068530097, "grad_norm": 1.3383622277085456, "learning_rate": 7.678036321200821e-07, "loss": 0.668, "step": 26971 }, { "epoch": 0.8266519553757509, "grad_norm": 1.3502132346990867, "learning_rate": 7.675393710358647e-07, "loss": 0.6546, "step": 26972 }, { "epoch": 0.8266826038984921, "grad_norm": 1.3993339253922394, "learning_rate": 7.672751516551641e-07, "loss": 0.6914, "step": 26973 }, { "epoch": 0.8267132524212333, "grad_norm": 1.1726738120807534, "learning_rate": 7.670109739805842e-07, "loss": 0.5337, "step": 26974 }, { "epoch": 0.8267439009439745, "grad_norm": 1.3467602599324513, "learning_rate": 7.667468380147281e-07, "loss": 0.6051, "step": 26975 }, { "epoch": 0.8267745494667157, "grad_norm": 0.43601942001433763, "learning_rate": 7.664827437601996e-07, "loss": 0.3811, "step": 26976 }, { "epoch": 0.826805197989457, "grad_norm": 0.4834283452591938, "learning_rate": 7.66218691219599e-07, "loss": 0.398, "step": 26977 }, { "epoch": 0.8268358465121981, "grad_norm": 1.35064983286271, "learning_rate": 7.659546803955287e-07, "loss": 0.6298, "step": 26978 }, { "epoch": 0.8268664950349394, "grad_norm": 1.3135074910446125, "learning_rate": 7.656907112905915e-07, "loss": 0.524, "step": 26979 }, { "epoch": 0.8268971435576805, "grad_norm": 1.3814721003385433, "learning_rate": 7.65426783907387e-07, "loss": 0.6387, "step": 26980 }, { "epoch": 0.8269277920804218, "grad_norm": 0.4480753918932657, "learning_rate": 7.651628982485149e-07, "loss": 0.3974, "step": 26981 }, { "epoch": 0.8269584406031629, "grad_norm": 1.2843881841570386, "learning_rate": 7.648990543165757e-07, "loss": 0.5449, "step": 26982 }, { "epoch": 0.8269890891259042, "grad_norm": 0.4462092077648115, "learning_rate": 7.646352521141715e-07, "loss": 0.3778, "step": 26983 }, { "epoch": 0.8270197376486453, "grad_norm": 0.44986310045024813, "learning_rate": 7.643714916438982e-07, "loss": 0.3686, "step": 26984 }, { "epoch": 0.8270503861713865, "grad_norm": 1.3805547799326399, "learning_rate": 7.641077729083568e-07, "loss": 0.6383, "step": 26985 }, { "epoch": 0.8270810346941277, "grad_norm": 0.41649299550948654, "learning_rate": 7.638440959101451e-07, "loss": 0.3703, "step": 26986 }, { "epoch": 0.8271116832168689, "grad_norm": 1.5396156698634302, "learning_rate": 7.635804606518626e-07, "loss": 0.7179, "step": 26987 }, { "epoch": 0.8271423317396102, "grad_norm": 1.4678552942638965, "learning_rate": 7.633168671361058e-07, "loss": 0.6258, "step": 26988 }, { "epoch": 0.8271729802623513, "grad_norm": 1.4562155239090622, "learning_rate": 7.630533153654695e-07, "loss": 0.6564, "step": 26989 }, { "epoch": 0.8272036287850926, "grad_norm": 0.46401445399895386, "learning_rate": 7.627898053425553e-07, "loss": 0.3925, "step": 26990 }, { "epoch": 0.8272342773078337, "grad_norm": 1.402515059145131, "learning_rate": 7.62526337069957e-07, "loss": 0.562, "step": 26991 }, { "epoch": 0.827264925830575, "grad_norm": 1.3367331501874142, "learning_rate": 7.622629105502704e-07, "loss": 0.5279, "step": 26992 }, { "epoch": 0.8272955743533161, "grad_norm": 1.4686882105274788, "learning_rate": 7.619995257860913e-07, "loss": 0.6177, "step": 26993 }, { "epoch": 0.8273262228760574, "grad_norm": 1.356932948032657, "learning_rate": 7.617361827800152e-07, "loss": 0.5735, "step": 26994 }, { "epoch": 0.8273568713987985, "grad_norm": 1.3053987591347262, "learning_rate": 7.614728815346378e-07, "loss": 0.5708, "step": 26995 }, { "epoch": 0.8273875199215398, "grad_norm": 1.2980998684013558, "learning_rate": 7.612096220525517e-07, "loss": 0.6183, "step": 26996 }, { "epoch": 0.827418168444281, "grad_norm": 1.5440856403242256, "learning_rate": 7.609464043363513e-07, "loss": 0.6567, "step": 26997 }, { "epoch": 0.8274488169670222, "grad_norm": 1.2603043002285526, "learning_rate": 7.606832283886323e-07, "loss": 0.5791, "step": 26998 }, { "epoch": 0.8274794654897634, "grad_norm": 1.082242349215427, "learning_rate": 7.604200942119861e-07, "loss": 0.4887, "step": 26999 }, { "epoch": 0.8275101140125046, "grad_norm": 1.4159669302210285, "learning_rate": 7.601570018090027e-07, "loss": 0.6175, "step": 27000 }, { "epoch": 0.8275407625352458, "grad_norm": 1.215193370736338, "learning_rate": 7.5989395118228e-07, "loss": 0.6424, "step": 27001 }, { "epoch": 0.827571411057987, "grad_norm": 1.2504194668611583, "learning_rate": 7.596309423344055e-07, "loss": 0.5169, "step": 27002 }, { "epoch": 0.8276020595807282, "grad_norm": 1.4105156908942895, "learning_rate": 7.593679752679733e-07, "loss": 0.6183, "step": 27003 }, { "epoch": 0.8276327081034694, "grad_norm": 1.414391657389634, "learning_rate": 7.591050499855729e-07, "loss": 0.6065, "step": 27004 }, { "epoch": 0.8276633566262106, "grad_norm": 1.3428171126824902, "learning_rate": 7.588421664897949e-07, "loss": 0.5692, "step": 27005 }, { "epoch": 0.8276940051489519, "grad_norm": 1.2632748133625213, "learning_rate": 7.585793247832318e-07, "loss": 0.5513, "step": 27006 }, { "epoch": 0.827724653671693, "grad_norm": 1.3551322296228094, "learning_rate": 7.583165248684704e-07, "loss": 0.5554, "step": 27007 }, { "epoch": 0.8277553021944343, "grad_norm": 1.2542143020940133, "learning_rate": 7.580537667481019e-07, "loss": 0.6036, "step": 27008 }, { "epoch": 0.8277859507171754, "grad_norm": 1.35261261583318, "learning_rate": 7.57791050424716e-07, "loss": 0.6038, "step": 27009 }, { "epoch": 0.8278165992399167, "grad_norm": 1.5109167743092855, "learning_rate": 7.575283759009e-07, "loss": 0.6035, "step": 27010 }, { "epoch": 0.8278472477626578, "grad_norm": 1.3073422281756195, "learning_rate": 7.572657431792402e-07, "loss": 0.555, "step": 27011 }, { "epoch": 0.8278778962853991, "grad_norm": 1.1777573018519876, "learning_rate": 7.570031522623289e-07, "loss": 0.5654, "step": 27012 }, { "epoch": 0.8279085448081402, "grad_norm": 1.469850830577978, "learning_rate": 7.567406031527502e-07, "loss": 0.6828, "step": 27013 }, { "epoch": 0.8279391933308815, "grad_norm": 1.454316196519002, "learning_rate": 7.564780958530932e-07, "loss": 0.6236, "step": 27014 }, { "epoch": 0.8279698418536227, "grad_norm": 0.4542070456696742, "learning_rate": 7.562156303659419e-07, "loss": 0.3957, "step": 27015 }, { "epoch": 0.8280004903763638, "grad_norm": 1.4014517329211527, "learning_rate": 7.55953206693884e-07, "loss": 0.6045, "step": 27016 }, { "epoch": 0.8280311388991051, "grad_norm": 1.3043373521411816, "learning_rate": 7.556908248395062e-07, "loss": 0.6152, "step": 27017 }, { "epoch": 0.8280617874218462, "grad_norm": 0.4680190229916898, "learning_rate": 7.554284848053911e-07, "loss": 0.4048, "step": 27018 }, { "epoch": 0.8280924359445875, "grad_norm": 1.349887740181855, "learning_rate": 7.551661865941257e-07, "loss": 0.5043, "step": 27019 }, { "epoch": 0.8281230844673286, "grad_norm": 1.4734076079757026, "learning_rate": 7.54903930208295e-07, "loss": 0.6188, "step": 27020 }, { "epoch": 0.8281537329900699, "grad_norm": 1.3173985887485216, "learning_rate": 7.546417156504804e-07, "loss": 0.6794, "step": 27021 }, { "epoch": 0.828184381512811, "grad_norm": 1.5103795413006185, "learning_rate": 7.543795429232686e-07, "loss": 0.6073, "step": 27022 }, { "epoch": 0.8282150300355523, "grad_norm": 1.5554185251280066, "learning_rate": 7.541174120292405e-07, "loss": 0.6377, "step": 27023 }, { "epoch": 0.8282456785582935, "grad_norm": 1.5402571412520305, "learning_rate": 7.538553229709799e-07, "loss": 0.5393, "step": 27024 }, { "epoch": 0.8282763270810347, "grad_norm": 1.160918919858755, "learning_rate": 7.535932757510705e-07, "loss": 0.5651, "step": 27025 }, { "epoch": 0.8283069756037759, "grad_norm": 1.2049321907183337, "learning_rate": 7.533312703720913e-07, "loss": 0.5881, "step": 27026 }, { "epoch": 0.8283376241265171, "grad_norm": 1.345215780868165, "learning_rate": 7.530693068366263e-07, "loss": 0.5668, "step": 27027 }, { "epoch": 0.8283682726492583, "grad_norm": 1.4423024036675847, "learning_rate": 7.528073851472567e-07, "loss": 0.6615, "step": 27028 }, { "epoch": 0.8283989211719995, "grad_norm": 1.341743992734284, "learning_rate": 7.525455053065617e-07, "loss": 0.6504, "step": 27029 }, { "epoch": 0.8284295696947407, "grad_norm": 1.3499277895322288, "learning_rate": 7.522836673171224e-07, "loss": 0.6127, "step": 27030 }, { "epoch": 0.828460218217482, "grad_norm": 1.380231733602489, "learning_rate": 7.520218711815202e-07, "loss": 0.6684, "step": 27031 }, { "epoch": 0.8284908667402231, "grad_norm": 1.2555846417852272, "learning_rate": 7.517601169023326e-07, "loss": 0.6508, "step": 27032 }, { "epoch": 0.8285215152629644, "grad_norm": 1.2223031201428745, "learning_rate": 7.514984044821405e-07, "loss": 0.5566, "step": 27033 }, { "epoch": 0.8285521637857055, "grad_norm": 1.2974335543834905, "learning_rate": 7.512367339235205e-07, "loss": 0.5827, "step": 27034 }, { "epoch": 0.8285828123084468, "grad_norm": 0.46640550549044163, "learning_rate": 7.509751052290515e-07, "loss": 0.4112, "step": 27035 }, { "epoch": 0.8286134608311879, "grad_norm": 1.6646838009328326, "learning_rate": 7.507135184013137e-07, "loss": 0.5384, "step": 27036 }, { "epoch": 0.8286441093539292, "grad_norm": 1.3516516589670886, "learning_rate": 7.504519734428817e-07, "loss": 0.5722, "step": 27037 }, { "epoch": 0.8286747578766703, "grad_norm": 1.2626612068830758, "learning_rate": 7.501904703563334e-07, "loss": 0.5139, "step": 27038 }, { "epoch": 0.8287054063994116, "grad_norm": 1.3699675681141652, "learning_rate": 7.499290091442468e-07, "loss": 0.6803, "step": 27039 }, { "epoch": 0.8287360549221527, "grad_norm": 1.4555180414141105, "learning_rate": 7.496675898091965e-07, "loss": 0.668, "step": 27040 }, { "epoch": 0.828766703444894, "grad_norm": 1.197049537058634, "learning_rate": 7.494062123537588e-07, "loss": 0.5875, "step": 27041 }, { "epoch": 0.8287973519676352, "grad_norm": 1.4887057022759393, "learning_rate": 7.491448767805098e-07, "loss": 0.6114, "step": 27042 }, { "epoch": 0.8288280004903764, "grad_norm": 1.353053565478091, "learning_rate": 7.488835830920232e-07, "loss": 0.641, "step": 27043 }, { "epoch": 0.8288586490131176, "grad_norm": 1.2664651117164012, "learning_rate": 7.486223312908758e-07, "loss": 0.5851, "step": 27044 }, { "epoch": 0.8288892975358588, "grad_norm": 1.3792722939092894, "learning_rate": 7.483611213796388e-07, "loss": 0.5796, "step": 27045 }, { "epoch": 0.8289199460586, "grad_norm": 1.3175801644576284, "learning_rate": 7.480999533608874e-07, "loss": 0.6442, "step": 27046 }, { "epoch": 0.8289505945813411, "grad_norm": 1.3886877526781263, "learning_rate": 7.478388272371967e-07, "loss": 0.6029, "step": 27047 }, { "epoch": 0.8289812431040824, "grad_norm": 1.477942153237667, "learning_rate": 7.475777430111364e-07, "loss": 0.6521, "step": 27048 }, { "epoch": 0.8290118916268235, "grad_norm": 1.6391041007675944, "learning_rate": 7.473167006852805e-07, "loss": 0.676, "step": 27049 }, { "epoch": 0.8290425401495648, "grad_norm": 0.44114043349966275, "learning_rate": 7.470557002622031e-07, "loss": 0.3763, "step": 27050 }, { "epoch": 0.829073188672306, "grad_norm": 1.4778098708134229, "learning_rate": 7.46794741744472e-07, "loss": 0.5787, "step": 27051 }, { "epoch": 0.8291038371950472, "grad_norm": 1.251174381752039, "learning_rate": 7.465338251346616e-07, "loss": 0.6122, "step": 27052 }, { "epoch": 0.8291344857177884, "grad_norm": 1.1441208304630235, "learning_rate": 7.462729504353422e-07, "loss": 0.5738, "step": 27053 }, { "epoch": 0.8291651342405296, "grad_norm": 1.3641590879976309, "learning_rate": 7.460121176490826e-07, "loss": 0.642, "step": 27054 }, { "epoch": 0.8291957827632708, "grad_norm": 1.2741357768463475, "learning_rate": 7.45751326778455e-07, "loss": 0.4991, "step": 27055 }, { "epoch": 0.829226431286012, "grad_norm": 1.4295420522973599, "learning_rate": 7.454905778260263e-07, "loss": 0.5709, "step": 27056 }, { "epoch": 0.8292570798087532, "grad_norm": 1.3097027922426998, "learning_rate": 7.452298707943694e-07, "loss": 0.5191, "step": 27057 }, { "epoch": 0.8292877283314944, "grad_norm": 1.4453376873435406, "learning_rate": 7.449692056860513e-07, "loss": 0.7448, "step": 27058 }, { "epoch": 0.8293183768542356, "grad_norm": 1.3631014701890876, "learning_rate": 7.447085825036393e-07, "loss": 0.6414, "step": 27059 }, { "epoch": 0.8293490253769769, "grad_norm": 1.3021161439766442, "learning_rate": 7.444480012497024e-07, "loss": 0.5327, "step": 27060 }, { "epoch": 0.829379673899718, "grad_norm": 1.2813006391688238, "learning_rate": 7.441874619268091e-07, "loss": 0.568, "step": 27061 }, { "epoch": 0.8294103224224593, "grad_norm": 0.436334079011034, "learning_rate": 7.439269645375246e-07, "loss": 0.3853, "step": 27062 }, { "epoch": 0.8294409709452004, "grad_norm": 1.421577714575607, "learning_rate": 7.436665090844169e-07, "loss": 0.6372, "step": 27063 }, { "epoch": 0.8294716194679417, "grad_norm": 1.5304752026711483, "learning_rate": 7.434060955700534e-07, "loss": 0.6977, "step": 27064 }, { "epoch": 0.8295022679906828, "grad_norm": 1.336787269267318, "learning_rate": 7.431457239969969e-07, "loss": 0.5728, "step": 27065 }, { "epoch": 0.8295329165134241, "grad_norm": 0.436117678808172, "learning_rate": 7.428853943678166e-07, "loss": 0.3884, "step": 27066 }, { "epoch": 0.8295635650361652, "grad_norm": 1.3389598623900931, "learning_rate": 7.426251066850742e-07, "loss": 0.5759, "step": 27067 }, { "epoch": 0.8295942135589065, "grad_norm": 1.3934297739452601, "learning_rate": 7.423648609513356e-07, "loss": 0.5965, "step": 27068 }, { "epoch": 0.8296248620816477, "grad_norm": 1.383375539011934, "learning_rate": 7.42104657169167e-07, "loss": 0.5556, "step": 27069 }, { "epoch": 0.8296555106043889, "grad_norm": 1.4284413847866906, "learning_rate": 7.418444953411297e-07, "loss": 0.7007, "step": 27070 }, { "epoch": 0.8296861591271301, "grad_norm": 1.2643674633911477, "learning_rate": 7.415843754697876e-07, "loss": 0.597, "step": 27071 }, { "epoch": 0.8297168076498713, "grad_norm": 1.2184246088683681, "learning_rate": 7.413242975577056e-07, "loss": 0.641, "step": 27072 }, { "epoch": 0.8297474561726125, "grad_norm": 0.4473406199207575, "learning_rate": 7.410642616074437e-07, "loss": 0.3956, "step": 27073 }, { "epoch": 0.8297781046953537, "grad_norm": 1.2525851945854143, "learning_rate": 7.408042676215665e-07, "loss": 0.5619, "step": 27074 }, { "epoch": 0.8298087532180949, "grad_norm": 1.3567108573719944, "learning_rate": 7.405443156026327e-07, "loss": 0.5691, "step": 27075 }, { "epoch": 0.8298394017408361, "grad_norm": 0.42696112396958946, "learning_rate": 7.402844055532072e-07, "loss": 0.3851, "step": 27076 }, { "epoch": 0.8298700502635773, "grad_norm": 1.4606251913875243, "learning_rate": 7.400245374758496e-07, "loss": 0.646, "step": 27077 }, { "epoch": 0.8299006987863184, "grad_norm": 0.4516998500825558, "learning_rate": 7.397647113731194e-07, "loss": 0.4042, "step": 27078 }, { "epoch": 0.8299313473090597, "grad_norm": 1.4355243825447328, "learning_rate": 7.395049272475769e-07, "loss": 0.5857, "step": 27079 }, { "epoch": 0.8299619958318009, "grad_norm": 1.210240335048499, "learning_rate": 7.39245185101784e-07, "loss": 0.5648, "step": 27080 }, { "epoch": 0.8299926443545421, "grad_norm": 1.3685187025490948, "learning_rate": 7.389854849382972e-07, "loss": 0.5584, "step": 27081 }, { "epoch": 0.8300232928772833, "grad_norm": 1.290694815709498, "learning_rate": 7.38725826759677e-07, "loss": 0.5821, "step": 27082 }, { "epoch": 0.8300539414000245, "grad_norm": 1.46342034340943, "learning_rate": 7.38466210568482e-07, "loss": 0.5619, "step": 27083 }, { "epoch": 0.8300845899227657, "grad_norm": 1.4072020849278308, "learning_rate": 7.382066363672691e-07, "loss": 0.5774, "step": 27084 }, { "epoch": 0.8301152384455069, "grad_norm": 1.2525008513595641, "learning_rate": 7.379471041585979e-07, "loss": 0.6233, "step": 27085 }, { "epoch": 0.8301458869682481, "grad_norm": 1.3816752121933462, "learning_rate": 7.376876139450217e-07, "loss": 0.6113, "step": 27086 }, { "epoch": 0.8301765354909894, "grad_norm": 1.281557639357906, "learning_rate": 7.374281657291022e-07, "loss": 0.5414, "step": 27087 }, { "epoch": 0.8302071840137305, "grad_norm": 1.3702656185582576, "learning_rate": 7.371687595133942e-07, "loss": 0.6004, "step": 27088 }, { "epoch": 0.8302378325364718, "grad_norm": 1.2554461011136093, "learning_rate": 7.369093953004513e-07, "loss": 0.6289, "step": 27089 }, { "epoch": 0.8302684810592129, "grad_norm": 1.3977271835372687, "learning_rate": 7.366500730928311e-07, "loss": 0.5976, "step": 27090 }, { "epoch": 0.8302991295819542, "grad_norm": 0.46433328152430076, "learning_rate": 7.363907928930903e-07, "loss": 0.402, "step": 27091 }, { "epoch": 0.8303297781046953, "grad_norm": 1.2501013253167104, "learning_rate": 7.3613155470378e-07, "loss": 0.5827, "step": 27092 }, { "epoch": 0.8303604266274366, "grad_norm": 1.3766026712462502, "learning_rate": 7.358723585274569e-07, "loss": 0.6351, "step": 27093 }, { "epoch": 0.8303910751501777, "grad_norm": 1.4343927890045252, "learning_rate": 7.356132043666741e-07, "loss": 0.6027, "step": 27094 }, { "epoch": 0.830421723672919, "grad_norm": 1.4682953760906012, "learning_rate": 7.353540922239865e-07, "loss": 0.5935, "step": 27095 }, { "epoch": 0.8304523721956601, "grad_norm": 1.2521189189837292, "learning_rate": 7.350950221019471e-07, "loss": 0.4233, "step": 27096 }, { "epoch": 0.8304830207184014, "grad_norm": 1.383480789905015, "learning_rate": 7.348359940031046e-07, "loss": 0.5788, "step": 27097 }, { "epoch": 0.8305136692411426, "grad_norm": 1.5324852855277915, "learning_rate": 7.345770079300168e-07, "loss": 0.6807, "step": 27098 }, { "epoch": 0.8305443177638838, "grad_norm": 1.2892175481331853, "learning_rate": 7.343180638852332e-07, "loss": 0.5653, "step": 27099 }, { "epoch": 0.830574966286625, "grad_norm": 1.5305109675206374, "learning_rate": 7.340591618713039e-07, "loss": 0.6877, "step": 27100 }, { "epoch": 0.8306056148093662, "grad_norm": 1.5161979936530674, "learning_rate": 7.338003018907808e-07, "loss": 0.6075, "step": 27101 }, { "epoch": 0.8306362633321074, "grad_norm": 1.6120011100971359, "learning_rate": 7.335414839462157e-07, "loss": 0.5848, "step": 27102 }, { "epoch": 0.8306669118548486, "grad_norm": 1.359662793547962, "learning_rate": 7.332827080401584e-07, "loss": 0.666, "step": 27103 }, { "epoch": 0.8306975603775898, "grad_norm": 1.2922032023255303, "learning_rate": 7.330239741751577e-07, "loss": 0.6044, "step": 27104 }, { "epoch": 0.830728208900331, "grad_norm": 1.513636829600097, "learning_rate": 7.327652823537628e-07, "loss": 0.5516, "step": 27105 }, { "epoch": 0.8307588574230722, "grad_norm": 1.2561805164199151, "learning_rate": 7.325066325785252e-07, "loss": 0.5681, "step": 27106 }, { "epoch": 0.8307895059458135, "grad_norm": 1.273089527027228, "learning_rate": 7.322480248519915e-07, "loss": 0.5664, "step": 27107 }, { "epoch": 0.8308201544685546, "grad_norm": 1.2456139611998525, "learning_rate": 7.319894591767074e-07, "loss": 0.6591, "step": 27108 }, { "epoch": 0.8308508029912958, "grad_norm": 1.265591772576068, "learning_rate": 7.317309355552254e-07, "loss": 0.5785, "step": 27109 }, { "epoch": 0.830881451514037, "grad_norm": 1.737250913611357, "learning_rate": 7.314724539900913e-07, "loss": 0.6284, "step": 27110 }, { "epoch": 0.8309121000367782, "grad_norm": 1.2684080689370416, "learning_rate": 7.312140144838493e-07, "loss": 0.5909, "step": 27111 }, { "epoch": 0.8309427485595194, "grad_norm": 1.3512997411007293, "learning_rate": 7.30955617039048e-07, "loss": 0.5694, "step": 27112 }, { "epoch": 0.8309733970822606, "grad_norm": 1.3277076539867363, "learning_rate": 7.306972616582336e-07, "loss": 0.571, "step": 27113 }, { "epoch": 0.8310040456050019, "grad_norm": 1.352227990422445, "learning_rate": 7.304389483439528e-07, "loss": 0.6254, "step": 27114 }, { "epoch": 0.831034694127743, "grad_norm": 1.4412764407481633, "learning_rate": 7.30180677098748e-07, "loss": 0.6329, "step": 27115 }, { "epoch": 0.8310653426504843, "grad_norm": 1.4104767713933022, "learning_rate": 7.299224479251649e-07, "loss": 0.6462, "step": 27116 }, { "epoch": 0.8310959911732254, "grad_norm": 1.2794758687953967, "learning_rate": 7.296642608257503e-07, "loss": 0.624, "step": 27117 }, { "epoch": 0.8311266396959667, "grad_norm": 1.45807112893626, "learning_rate": 7.294061158030463e-07, "loss": 0.6111, "step": 27118 }, { "epoch": 0.8311572882187078, "grad_norm": 1.421883162251792, "learning_rate": 7.291480128595951e-07, "loss": 0.5954, "step": 27119 }, { "epoch": 0.8311879367414491, "grad_norm": 1.4367624673340191, "learning_rate": 7.288899519979414e-07, "loss": 0.5924, "step": 27120 }, { "epoch": 0.8312185852641902, "grad_norm": 1.465766132056573, "learning_rate": 7.286319332206276e-07, "loss": 0.5645, "step": 27121 }, { "epoch": 0.8312492337869315, "grad_norm": 1.2996826062387064, "learning_rate": 7.283739565301978e-07, "loss": 0.5773, "step": 27122 }, { "epoch": 0.8312798823096726, "grad_norm": 1.4358828521523195, "learning_rate": 7.281160219291911e-07, "loss": 0.6336, "step": 27123 }, { "epoch": 0.8313105308324139, "grad_norm": 0.44671408439817095, "learning_rate": 7.278581294201504e-07, "loss": 0.3882, "step": 27124 }, { "epoch": 0.8313411793551551, "grad_norm": 1.2625984107886181, "learning_rate": 7.276002790056175e-07, "loss": 0.5851, "step": 27125 }, { "epoch": 0.8313718278778963, "grad_norm": 1.4028094220583194, "learning_rate": 7.273424706881321e-07, "loss": 0.6796, "step": 27126 }, { "epoch": 0.8314024764006375, "grad_norm": 1.2476768988385816, "learning_rate": 7.270847044702322e-07, "loss": 0.6341, "step": 27127 }, { "epoch": 0.8314331249233787, "grad_norm": 1.3265898381746888, "learning_rate": 7.268269803544625e-07, "loss": 0.5847, "step": 27128 }, { "epoch": 0.8314637734461199, "grad_norm": 1.4579473367322446, "learning_rate": 7.265692983433586e-07, "loss": 0.5813, "step": 27129 }, { "epoch": 0.8314944219688611, "grad_norm": 1.3005644974615362, "learning_rate": 7.26311658439462e-07, "loss": 0.5603, "step": 27130 }, { "epoch": 0.8315250704916023, "grad_norm": 1.2657771084136165, "learning_rate": 7.260540606453092e-07, "loss": 0.5886, "step": 27131 }, { "epoch": 0.8315557190143436, "grad_norm": 1.327270602171577, "learning_rate": 7.257965049634391e-07, "loss": 0.5105, "step": 27132 }, { "epoch": 0.8315863675370847, "grad_norm": 1.3074365413607283, "learning_rate": 7.255389913963906e-07, "loss": 0.6255, "step": 27133 }, { "epoch": 0.831617016059826, "grad_norm": 1.3086631207609856, "learning_rate": 7.252815199466994e-07, "loss": 0.5032, "step": 27134 }, { "epoch": 0.8316476645825671, "grad_norm": 1.5083689803868787, "learning_rate": 7.250240906169026e-07, "loss": 0.6373, "step": 27135 }, { "epoch": 0.8316783131053084, "grad_norm": 0.42491208522138635, "learning_rate": 7.247667034095385e-07, "loss": 0.3934, "step": 27136 }, { "epoch": 0.8317089616280495, "grad_norm": 1.374823520416535, "learning_rate": 7.245093583271423e-07, "loss": 0.5193, "step": 27137 }, { "epoch": 0.8317396101507908, "grad_norm": 1.3886901988477378, "learning_rate": 7.242520553722466e-07, "loss": 0.6648, "step": 27138 }, { "epoch": 0.8317702586735319, "grad_norm": 1.2029493179635025, "learning_rate": 7.239947945473919e-07, "loss": 0.5649, "step": 27139 }, { "epoch": 0.8318009071962731, "grad_norm": 1.2082929296199199, "learning_rate": 7.237375758551096e-07, "loss": 0.5783, "step": 27140 }, { "epoch": 0.8318315557190143, "grad_norm": 0.44982675314300646, "learning_rate": 7.234803992979356e-07, "loss": 0.4059, "step": 27141 }, { "epoch": 0.8318622042417555, "grad_norm": 1.2911655744320971, "learning_rate": 7.232232648784026e-07, "loss": 0.659, "step": 27142 }, { "epoch": 0.8318928527644968, "grad_norm": 1.2676605872840356, "learning_rate": 7.229661725990455e-07, "loss": 0.545, "step": 27143 }, { "epoch": 0.8319235012872379, "grad_norm": 1.4229142142256004, "learning_rate": 7.227091224623978e-07, "loss": 0.5561, "step": 27144 }, { "epoch": 0.8319541498099792, "grad_norm": 1.3512140658983351, "learning_rate": 7.2245211447099e-07, "loss": 0.6632, "step": 27145 }, { "epoch": 0.8319847983327203, "grad_norm": 1.3825487962615663, "learning_rate": 7.221951486273566e-07, "loss": 0.6251, "step": 27146 }, { "epoch": 0.8320154468554616, "grad_norm": 1.4057578533240456, "learning_rate": 7.219382249340296e-07, "loss": 0.5732, "step": 27147 }, { "epoch": 0.8320460953782027, "grad_norm": 1.3794695357340785, "learning_rate": 7.216813433935388e-07, "loss": 0.6394, "step": 27148 }, { "epoch": 0.832076743900944, "grad_norm": 1.1908483623884678, "learning_rate": 7.214245040084167e-07, "loss": 0.6019, "step": 27149 }, { "epoch": 0.8321073924236851, "grad_norm": 0.4525488812041698, "learning_rate": 7.211677067811945e-07, "loss": 0.4096, "step": 27150 }, { "epoch": 0.8321380409464264, "grad_norm": 0.45219222592204955, "learning_rate": 7.209109517144008e-07, "loss": 0.3883, "step": 27151 }, { "epoch": 0.8321686894691676, "grad_norm": 1.5199508151126262, "learning_rate": 7.206542388105675e-07, "loss": 0.5778, "step": 27152 }, { "epoch": 0.8321993379919088, "grad_norm": 1.2046038105300434, "learning_rate": 7.203975680722214e-07, "loss": 0.5176, "step": 27153 }, { "epoch": 0.83222998651465, "grad_norm": 0.4527373880595574, "learning_rate": 7.201409395018932e-07, "loss": 0.38, "step": 27154 }, { "epoch": 0.8322606350373912, "grad_norm": 1.2369289673227375, "learning_rate": 7.198843531021127e-07, "loss": 0.4791, "step": 27155 }, { "epoch": 0.8322912835601324, "grad_norm": 1.3440299166727692, "learning_rate": 7.196278088754055e-07, "loss": 0.6367, "step": 27156 }, { "epoch": 0.8323219320828736, "grad_norm": 1.4456395629571968, "learning_rate": 7.193713068243007e-07, "loss": 0.7116, "step": 27157 }, { "epoch": 0.8323525806056148, "grad_norm": 1.2834423022534556, "learning_rate": 7.191148469513265e-07, "loss": 0.5455, "step": 27158 }, { "epoch": 0.832383229128356, "grad_norm": 1.3463173236057273, "learning_rate": 7.188584292590084e-07, "loss": 0.5198, "step": 27159 }, { "epoch": 0.8324138776510972, "grad_norm": 1.2626523991144254, "learning_rate": 7.186020537498733e-07, "loss": 0.5552, "step": 27160 }, { "epoch": 0.8324445261738385, "grad_norm": 1.1189519882059262, "learning_rate": 7.183457204264488e-07, "loss": 0.581, "step": 27161 }, { "epoch": 0.8324751746965796, "grad_norm": 1.2960384510990417, "learning_rate": 7.180894292912582e-07, "loss": 0.578, "step": 27162 }, { "epoch": 0.8325058232193209, "grad_norm": 1.4268656450136095, "learning_rate": 7.178331803468292e-07, "loss": 0.6521, "step": 27163 }, { "epoch": 0.832536471742062, "grad_norm": 1.433517487935742, "learning_rate": 7.175769735956844e-07, "loss": 0.6872, "step": 27164 }, { "epoch": 0.8325671202648033, "grad_norm": 1.1429841505616574, "learning_rate": 7.173208090403494e-07, "loss": 0.5817, "step": 27165 }, { "epoch": 0.8325977687875444, "grad_norm": 1.3607100710494635, "learning_rate": 7.170646866833491e-07, "loss": 0.535, "step": 27166 }, { "epoch": 0.8326284173102857, "grad_norm": 1.145775666241685, "learning_rate": 7.168086065272056e-07, "loss": 0.6763, "step": 27167 }, { "epoch": 0.8326590658330268, "grad_norm": 1.1994858150844854, "learning_rate": 7.165525685744429e-07, "loss": 0.5439, "step": 27168 }, { "epoch": 0.8326897143557681, "grad_norm": 1.542555249683266, "learning_rate": 7.162965728275844e-07, "loss": 0.6617, "step": 27169 }, { "epoch": 0.8327203628785093, "grad_norm": 1.4025702343967432, "learning_rate": 7.160406192891505e-07, "loss": 0.6316, "step": 27170 }, { "epoch": 0.8327510114012504, "grad_norm": 1.1628969234669526, "learning_rate": 7.157847079616658e-07, "loss": 0.582, "step": 27171 }, { "epoch": 0.8327816599239917, "grad_norm": 1.313732430571639, "learning_rate": 7.1552883884765e-07, "loss": 0.5743, "step": 27172 }, { "epoch": 0.8328123084467328, "grad_norm": 1.3486882236059612, "learning_rate": 7.152730119496243e-07, "loss": 0.6368, "step": 27173 }, { "epoch": 0.8328429569694741, "grad_norm": 1.384702958531385, "learning_rate": 7.150172272701111e-07, "loss": 0.5454, "step": 27174 }, { "epoch": 0.8328736054922152, "grad_norm": 1.321972598997167, "learning_rate": 7.14761484811628e-07, "loss": 0.523, "step": 27175 }, { "epoch": 0.8329042540149565, "grad_norm": 1.3715227827325613, "learning_rate": 7.145057845766967e-07, "loss": 0.6622, "step": 27176 }, { "epoch": 0.8329349025376976, "grad_norm": 1.346163267752655, "learning_rate": 7.142501265678376e-07, "loss": 0.6518, "step": 27177 }, { "epoch": 0.8329655510604389, "grad_norm": 0.44508805242532434, "learning_rate": 7.139945107875673e-07, "loss": 0.3833, "step": 27178 }, { "epoch": 0.83299619958318, "grad_norm": 1.5576524367484832, "learning_rate": 7.137389372384063e-07, "loss": 0.5608, "step": 27179 }, { "epoch": 0.8330268481059213, "grad_norm": 1.2858822598716872, "learning_rate": 7.134834059228729e-07, "loss": 0.5883, "step": 27180 }, { "epoch": 0.8330574966286625, "grad_norm": 1.4936602332898659, "learning_rate": 7.13227916843483e-07, "loss": 0.5774, "step": 27181 }, { "epoch": 0.8330881451514037, "grad_norm": 1.3066906475513442, "learning_rate": 7.129724700027562e-07, "loss": 0.5949, "step": 27182 }, { "epoch": 0.8331187936741449, "grad_norm": 1.4066660721363098, "learning_rate": 7.127170654032068e-07, "loss": 0.459, "step": 27183 }, { "epoch": 0.8331494421968861, "grad_norm": 1.2822435116540623, "learning_rate": 7.124617030473552e-07, "loss": 0.5517, "step": 27184 }, { "epoch": 0.8331800907196273, "grad_norm": 0.45254948089586156, "learning_rate": 7.122063829377151e-07, "loss": 0.3988, "step": 27185 }, { "epoch": 0.8332107392423685, "grad_norm": 1.3458378480055817, "learning_rate": 7.119511050768019e-07, "loss": 0.6047, "step": 27186 }, { "epoch": 0.8332413877651097, "grad_norm": 1.215064789578713, "learning_rate": 7.116958694671316e-07, "loss": 0.5794, "step": 27187 }, { "epoch": 0.833272036287851, "grad_norm": 1.4298436100646295, "learning_rate": 7.114406761112197e-07, "loss": 0.6695, "step": 27188 }, { "epoch": 0.8333026848105921, "grad_norm": 1.2414133333976154, "learning_rate": 7.111855250115795e-07, "loss": 0.4751, "step": 27189 }, { "epoch": 0.8333333333333334, "grad_norm": 0.4328954466990826, "learning_rate": 7.109304161707254e-07, "loss": 0.3984, "step": 27190 }, { "epoch": 0.8333639818560745, "grad_norm": 0.45856906836571315, "learning_rate": 7.106753495911727e-07, "loss": 0.3923, "step": 27191 }, { "epoch": 0.8333946303788158, "grad_norm": 1.4446218750872277, "learning_rate": 7.104203252754322e-07, "loss": 0.6034, "step": 27192 }, { "epoch": 0.8334252789015569, "grad_norm": 1.3012521411971791, "learning_rate": 7.101653432260186e-07, "loss": 0.5981, "step": 27193 }, { "epoch": 0.8334559274242982, "grad_norm": 1.3370597519331031, "learning_rate": 7.099104034454413e-07, "loss": 0.5467, "step": 27194 }, { "epoch": 0.8334865759470393, "grad_norm": 1.2872660339562227, "learning_rate": 7.096555059362164e-07, "loss": 0.6551, "step": 27195 }, { "epoch": 0.8335172244697806, "grad_norm": 1.468089419559575, "learning_rate": 7.094006507008539e-07, "loss": 0.5901, "step": 27196 }, { "epoch": 0.8335478729925218, "grad_norm": 1.2621190579551664, "learning_rate": 7.091458377418636e-07, "loss": 0.6534, "step": 27197 }, { "epoch": 0.833578521515263, "grad_norm": 1.3184333462602984, "learning_rate": 7.088910670617572e-07, "loss": 0.5454, "step": 27198 }, { "epoch": 0.8336091700380042, "grad_norm": 1.4214021925160847, "learning_rate": 7.086363386630457e-07, "loss": 0.5917, "step": 27199 }, { "epoch": 0.8336398185607454, "grad_norm": 1.3042141714429651, "learning_rate": 7.083816525482373e-07, "loss": 0.5109, "step": 27200 }, { "epoch": 0.8336704670834866, "grad_norm": 1.5889919025255248, "learning_rate": 7.081270087198428e-07, "loss": 0.6454, "step": 27201 }, { "epoch": 0.8337011156062277, "grad_norm": 1.1773500571759963, "learning_rate": 7.078724071803711e-07, "loss": 0.5067, "step": 27202 }, { "epoch": 0.833731764128969, "grad_norm": 1.3969805686448773, "learning_rate": 7.07617847932332e-07, "loss": 0.5764, "step": 27203 }, { "epoch": 0.8337624126517101, "grad_norm": 1.4081029304191512, "learning_rate": 7.073633309782319e-07, "loss": 0.575, "step": 27204 }, { "epoch": 0.8337930611744514, "grad_norm": 1.294310188132839, "learning_rate": 7.071088563205774e-07, "loss": 0.5724, "step": 27205 }, { "epoch": 0.8338237096971925, "grad_norm": 0.43462315943308255, "learning_rate": 7.068544239618802e-07, "loss": 0.3806, "step": 27206 }, { "epoch": 0.8338543582199338, "grad_norm": 1.3740960696068791, "learning_rate": 7.066000339046442e-07, "loss": 0.6135, "step": 27207 }, { "epoch": 0.833885006742675, "grad_norm": 1.2423259468408525, "learning_rate": 7.063456861513756e-07, "loss": 0.6658, "step": 27208 }, { "epoch": 0.8339156552654162, "grad_norm": 1.3496420169341172, "learning_rate": 7.060913807045816e-07, "loss": 0.5427, "step": 27209 }, { "epoch": 0.8339463037881574, "grad_norm": 1.1244861539286568, "learning_rate": 7.058371175667683e-07, "loss": 0.5298, "step": 27210 }, { "epoch": 0.8339769523108986, "grad_norm": 1.3664507062475468, "learning_rate": 7.055828967404415e-07, "loss": 0.627, "step": 27211 }, { "epoch": 0.8340076008336398, "grad_norm": 1.427150742749484, "learning_rate": 7.053287182281038e-07, "loss": 0.5985, "step": 27212 }, { "epoch": 0.834038249356381, "grad_norm": 1.2720878408537373, "learning_rate": 7.050745820322613e-07, "loss": 0.5848, "step": 27213 }, { "epoch": 0.8340688978791222, "grad_norm": 1.3949526595872004, "learning_rate": 7.048204881554188e-07, "loss": 0.4694, "step": 27214 }, { "epoch": 0.8340995464018635, "grad_norm": 0.4393093131121656, "learning_rate": 7.045664366000787e-07, "loss": 0.3957, "step": 27215 }, { "epoch": 0.8341301949246046, "grad_norm": 1.427728299030341, "learning_rate": 7.043124273687441e-07, "loss": 0.6365, "step": 27216 }, { "epoch": 0.8341608434473459, "grad_norm": 0.4335799069042906, "learning_rate": 7.040584604639178e-07, "loss": 0.3674, "step": 27217 }, { "epoch": 0.834191491970087, "grad_norm": 0.45012972427401826, "learning_rate": 7.038045358881041e-07, "loss": 0.3833, "step": 27218 }, { "epoch": 0.8342221404928283, "grad_norm": 1.6027130064423432, "learning_rate": 7.03550653643802e-07, "loss": 0.7098, "step": 27219 }, { "epoch": 0.8342527890155694, "grad_norm": 1.3641385237236217, "learning_rate": 7.03296813733515e-07, "loss": 0.5573, "step": 27220 }, { "epoch": 0.8342834375383107, "grad_norm": 1.2162262306907972, "learning_rate": 7.030430161597435e-07, "loss": 0.5633, "step": 27221 }, { "epoch": 0.8343140860610518, "grad_norm": 1.508023699732306, "learning_rate": 7.027892609249903e-07, "loss": 0.7237, "step": 27222 }, { "epoch": 0.8343447345837931, "grad_norm": 1.2806628352656666, "learning_rate": 7.025355480317536e-07, "loss": 0.5807, "step": 27223 }, { "epoch": 0.8343753831065343, "grad_norm": 1.4697751737387363, "learning_rate": 7.022818774825313e-07, "loss": 0.6696, "step": 27224 }, { "epoch": 0.8344060316292755, "grad_norm": 1.3436572663016721, "learning_rate": 7.020282492798275e-07, "loss": 0.5944, "step": 27225 }, { "epoch": 0.8344366801520167, "grad_norm": 1.4080623208467908, "learning_rate": 7.017746634261391e-07, "loss": 0.5491, "step": 27226 }, { "epoch": 0.8344673286747579, "grad_norm": 1.2788940614005488, "learning_rate": 7.015211199239641e-07, "loss": 0.4942, "step": 27227 }, { "epoch": 0.8344979771974991, "grad_norm": 1.2389617445423784, "learning_rate": 7.012676187758006e-07, "loss": 0.5766, "step": 27228 }, { "epoch": 0.8345286257202403, "grad_norm": 1.3522106008648922, "learning_rate": 7.010141599841474e-07, "loss": 0.6735, "step": 27229 }, { "epoch": 0.8345592742429815, "grad_norm": 1.3232017793575124, "learning_rate": 7.00760743551503e-07, "loss": 0.6383, "step": 27230 }, { "epoch": 0.8345899227657227, "grad_norm": 1.4168601348487753, "learning_rate": 7.005073694803615e-07, "loss": 0.6146, "step": 27231 }, { "epoch": 0.8346205712884639, "grad_norm": 1.4726057594444535, "learning_rate": 7.002540377732215e-07, "loss": 0.6342, "step": 27232 }, { "epoch": 0.834651219811205, "grad_norm": 1.2914966745326077, "learning_rate": 7.000007484325788e-07, "loss": 0.5605, "step": 27233 }, { "epoch": 0.8346818683339463, "grad_norm": 1.4403352845821673, "learning_rate": 6.997475014609295e-07, "loss": 0.6032, "step": 27234 }, { "epoch": 0.8347125168566875, "grad_norm": 1.4057403537980204, "learning_rate": 6.994942968607665e-07, "loss": 0.7352, "step": 27235 }, { "epoch": 0.8347431653794287, "grad_norm": 1.2384777596795147, "learning_rate": 6.992411346345884e-07, "loss": 0.5218, "step": 27236 }, { "epoch": 0.8347738139021699, "grad_norm": 1.350151077358252, "learning_rate": 6.989880147848865e-07, "loss": 0.6189, "step": 27237 }, { "epoch": 0.8348044624249111, "grad_norm": 0.41706070948401397, "learning_rate": 6.987349373141572e-07, "loss": 0.3794, "step": 27238 }, { "epoch": 0.8348351109476523, "grad_norm": 1.722305964068198, "learning_rate": 6.984819022248923e-07, "loss": 0.5893, "step": 27239 }, { "epoch": 0.8348657594703935, "grad_norm": 1.5611485201496682, "learning_rate": 6.982289095195855e-07, "loss": 0.694, "step": 27240 }, { "epoch": 0.8348964079931347, "grad_norm": 1.3158373879920862, "learning_rate": 6.979759592007312e-07, "loss": 0.5943, "step": 27241 }, { "epoch": 0.834927056515876, "grad_norm": 1.2819495960258547, "learning_rate": 6.977230512708194e-07, "loss": 0.6133, "step": 27242 }, { "epoch": 0.8349577050386171, "grad_norm": 0.46165630250635625, "learning_rate": 6.974701857323429e-07, "loss": 0.4012, "step": 27243 }, { "epoch": 0.8349883535613584, "grad_norm": 1.3252957037873743, "learning_rate": 6.972173625877949e-07, "loss": 0.7044, "step": 27244 }, { "epoch": 0.8350190020840995, "grad_norm": 1.412079708547951, "learning_rate": 6.969645818396654e-07, "loss": 0.5634, "step": 27245 }, { "epoch": 0.8350496506068408, "grad_norm": 0.41034634367830264, "learning_rate": 6.967118434904424e-07, "loss": 0.3675, "step": 27246 }, { "epoch": 0.8350802991295819, "grad_norm": 1.41143418802195, "learning_rate": 6.964591475426208e-07, "loss": 0.5363, "step": 27247 }, { "epoch": 0.8351109476523232, "grad_norm": 1.2741027444933923, "learning_rate": 6.962064939986868e-07, "loss": 0.6252, "step": 27248 }, { "epoch": 0.8351415961750643, "grad_norm": 1.1900553679080665, "learning_rate": 6.959538828611329e-07, "loss": 0.5295, "step": 27249 }, { "epoch": 0.8351722446978056, "grad_norm": 1.1518222031410965, "learning_rate": 6.95701314132446e-07, "loss": 0.5218, "step": 27250 }, { "epoch": 0.8352028932205467, "grad_norm": 1.2384795836678122, "learning_rate": 6.954487878151145e-07, "loss": 0.5578, "step": 27251 }, { "epoch": 0.835233541743288, "grad_norm": 0.45004567001350715, "learning_rate": 6.951963039116288e-07, "loss": 0.3845, "step": 27252 }, { "epoch": 0.8352641902660292, "grad_norm": 1.284970514046675, "learning_rate": 6.949438624244748e-07, "loss": 0.5705, "step": 27253 }, { "epoch": 0.8352948387887704, "grad_norm": 1.6699616603976692, "learning_rate": 6.946914633561397e-07, "loss": 0.634, "step": 27254 }, { "epoch": 0.8353254873115116, "grad_norm": 1.536359115395839, "learning_rate": 6.944391067091127e-07, "loss": 0.5201, "step": 27255 }, { "epoch": 0.8353561358342528, "grad_norm": 1.2967212742214196, "learning_rate": 6.941867924858775e-07, "loss": 0.5142, "step": 27256 }, { "epoch": 0.835386784356994, "grad_norm": 1.24858531211286, "learning_rate": 6.93934520688922e-07, "loss": 0.4826, "step": 27257 }, { "epoch": 0.8354174328797352, "grad_norm": 0.42812236503903944, "learning_rate": 6.936822913207319e-07, "loss": 0.3789, "step": 27258 }, { "epoch": 0.8354480814024764, "grad_norm": 1.2780448459934313, "learning_rate": 6.93430104383791e-07, "loss": 0.5362, "step": 27259 }, { "epoch": 0.8354787299252177, "grad_norm": 0.4726155807232836, "learning_rate": 6.931779598805865e-07, "loss": 0.3946, "step": 27260 }, { "epoch": 0.8355093784479588, "grad_norm": 1.3478664225945618, "learning_rate": 6.929258578136005e-07, "loss": 0.6134, "step": 27261 }, { "epoch": 0.8355400269707001, "grad_norm": 0.44621295682166323, "learning_rate": 6.926737981853177e-07, "loss": 0.4218, "step": 27262 }, { "epoch": 0.8355706754934412, "grad_norm": 1.6786368472593647, "learning_rate": 6.924217809982231e-07, "loss": 0.5947, "step": 27263 }, { "epoch": 0.8356013240161824, "grad_norm": 1.3030702745936877, "learning_rate": 6.921698062547983e-07, "loss": 0.663, "step": 27264 }, { "epoch": 0.8356319725389236, "grad_norm": 1.374610800601697, "learning_rate": 6.919178739575261e-07, "loss": 0.5588, "step": 27265 }, { "epoch": 0.8356626210616648, "grad_norm": 1.47912589442563, "learning_rate": 6.916659841088908e-07, "loss": 0.6137, "step": 27266 }, { "epoch": 0.835693269584406, "grad_norm": 1.4513882476302473, "learning_rate": 6.914141367113714e-07, "loss": 0.6082, "step": 27267 }, { "epoch": 0.8357239181071472, "grad_norm": 1.3876492869285588, "learning_rate": 6.911623317674521e-07, "loss": 0.6081, "step": 27268 }, { "epoch": 0.8357545666298885, "grad_norm": 0.45111767808203157, "learning_rate": 6.90910569279612e-07, "loss": 0.4134, "step": 27269 }, { "epoch": 0.8357852151526296, "grad_norm": 1.4174635507495612, "learning_rate": 6.906588492503325e-07, "loss": 0.5233, "step": 27270 }, { "epoch": 0.8358158636753709, "grad_norm": 0.4399383952441351, "learning_rate": 6.904071716820948e-07, "loss": 0.381, "step": 27271 }, { "epoch": 0.835846512198112, "grad_norm": 1.4498471930512775, "learning_rate": 6.901555365773766e-07, "loss": 0.5788, "step": 27272 }, { "epoch": 0.8358771607208533, "grad_norm": 0.42524662073002445, "learning_rate": 6.89903943938659e-07, "loss": 0.3852, "step": 27273 }, { "epoch": 0.8359078092435944, "grad_norm": 1.3249268211376268, "learning_rate": 6.896523937684219e-07, "loss": 0.5773, "step": 27274 }, { "epoch": 0.8359384577663357, "grad_norm": 1.4171362729020744, "learning_rate": 6.894008860691415e-07, "loss": 0.7066, "step": 27275 }, { "epoch": 0.8359691062890768, "grad_norm": 1.5251269375626741, "learning_rate": 6.891494208432964e-07, "loss": 0.7052, "step": 27276 }, { "epoch": 0.8359997548118181, "grad_norm": 1.2425585871546891, "learning_rate": 6.888979980933669e-07, "loss": 0.5238, "step": 27277 }, { "epoch": 0.8360304033345592, "grad_norm": 1.2909838895739083, "learning_rate": 6.88646617821827e-07, "loss": 0.6588, "step": 27278 }, { "epoch": 0.8360610518573005, "grad_norm": 1.3469314168179858, "learning_rate": 6.883952800311561e-07, "loss": 0.6623, "step": 27279 }, { "epoch": 0.8360917003800417, "grad_norm": 0.44327786390548374, "learning_rate": 6.881439847238292e-07, "loss": 0.392, "step": 27280 }, { "epoch": 0.8361223489027829, "grad_norm": 1.3098800648611606, "learning_rate": 6.878927319023221e-07, "loss": 0.6109, "step": 27281 }, { "epoch": 0.8361529974255241, "grad_norm": 1.193926837010547, "learning_rate": 6.876415215691124e-07, "loss": 0.606, "step": 27282 }, { "epoch": 0.8361836459482653, "grad_norm": 1.4811418460306602, "learning_rate": 6.873903537266735e-07, "loss": 0.6256, "step": 27283 }, { "epoch": 0.8362142944710065, "grad_norm": 0.451368174215699, "learning_rate": 6.871392283774808e-07, "loss": 0.3785, "step": 27284 }, { "epoch": 0.8362449429937477, "grad_norm": 1.3143629957235925, "learning_rate": 6.868881455240095e-07, "loss": 0.5295, "step": 27285 }, { "epoch": 0.8362755915164889, "grad_norm": 1.5366070441738238, "learning_rate": 6.866371051687321e-07, "loss": 0.6162, "step": 27286 }, { "epoch": 0.8363062400392302, "grad_norm": 1.398622612291061, "learning_rate": 6.863861073141226e-07, "loss": 0.5927, "step": 27287 }, { "epoch": 0.8363368885619713, "grad_norm": 1.352632735085192, "learning_rate": 6.861351519626558e-07, "loss": 0.5746, "step": 27288 }, { "epoch": 0.8363675370847126, "grad_norm": 1.3009433657914105, "learning_rate": 6.858842391168019e-07, "loss": 0.5511, "step": 27289 }, { "epoch": 0.8363981856074537, "grad_norm": 1.1254345200382743, "learning_rate": 6.856333687790357e-07, "loss": 0.6469, "step": 27290 }, { "epoch": 0.836428834130195, "grad_norm": 1.327730168634819, "learning_rate": 6.853825409518266e-07, "loss": 0.5817, "step": 27291 }, { "epoch": 0.8364594826529361, "grad_norm": 0.43569494493860694, "learning_rate": 6.851317556376469e-07, "loss": 0.3934, "step": 27292 }, { "epoch": 0.8364901311756774, "grad_norm": 1.5071955214117256, "learning_rate": 6.848810128389699e-07, "loss": 0.5254, "step": 27293 }, { "epoch": 0.8365207796984185, "grad_norm": 1.4055609612473936, "learning_rate": 6.846303125582626e-07, "loss": 0.6477, "step": 27294 }, { "epoch": 0.8365514282211597, "grad_norm": 1.4778661292252302, "learning_rate": 6.84379654797997e-07, "loss": 0.6861, "step": 27295 }, { "epoch": 0.836582076743901, "grad_norm": 1.6996035649021104, "learning_rate": 6.841290395606443e-07, "loss": 0.6348, "step": 27296 }, { "epoch": 0.8366127252666421, "grad_norm": 1.3359915574901384, "learning_rate": 6.838784668486708e-07, "loss": 0.6536, "step": 27297 }, { "epoch": 0.8366433737893834, "grad_norm": 0.4385259101072941, "learning_rate": 6.836279366645477e-07, "loss": 0.3972, "step": 27298 }, { "epoch": 0.8366740223121245, "grad_norm": 1.2377034320358808, "learning_rate": 6.833774490107437e-07, "loss": 0.5928, "step": 27299 }, { "epoch": 0.8367046708348658, "grad_norm": 1.327702452158604, "learning_rate": 6.831270038897253e-07, "loss": 0.5929, "step": 27300 }, { "epoch": 0.8367353193576069, "grad_norm": 1.4053348973200472, "learning_rate": 6.828766013039617e-07, "loss": 0.6994, "step": 27301 }, { "epoch": 0.8367659678803482, "grad_norm": 1.4225455847258297, "learning_rate": 6.826262412559176e-07, "loss": 0.6125, "step": 27302 }, { "epoch": 0.8367966164030893, "grad_norm": 0.4634296929353601, "learning_rate": 6.823759237480643e-07, "loss": 0.4137, "step": 27303 }, { "epoch": 0.8368272649258306, "grad_norm": 1.2910514754879863, "learning_rate": 6.821256487828654e-07, "loss": 0.5508, "step": 27304 }, { "epoch": 0.8368579134485717, "grad_norm": 1.4388110430400147, "learning_rate": 6.818754163627861e-07, "loss": 0.6051, "step": 27305 }, { "epoch": 0.836888561971313, "grad_norm": 1.4096885223087166, "learning_rate": 6.816252264902934e-07, "loss": 0.5706, "step": 27306 }, { "epoch": 0.8369192104940542, "grad_norm": 1.2373364500552473, "learning_rate": 6.813750791678531e-07, "loss": 0.6313, "step": 27307 }, { "epoch": 0.8369498590167954, "grad_norm": 1.45868228443854, "learning_rate": 6.811249743979281e-07, "loss": 0.5693, "step": 27308 }, { "epoch": 0.8369805075395366, "grad_norm": 1.1589918284268113, "learning_rate": 6.808749121829839e-07, "loss": 0.5668, "step": 27309 }, { "epoch": 0.8370111560622778, "grad_norm": 1.1528535527509436, "learning_rate": 6.806248925254844e-07, "loss": 0.5738, "step": 27310 }, { "epoch": 0.837041804585019, "grad_norm": 0.45748811772438863, "learning_rate": 6.803749154278938e-07, "loss": 0.3916, "step": 27311 }, { "epoch": 0.8370724531077602, "grad_norm": 0.43678765410518083, "learning_rate": 6.801249808926741e-07, "loss": 0.359, "step": 27312 }, { "epoch": 0.8371031016305014, "grad_norm": 1.1262648429682525, "learning_rate": 6.798750889222877e-07, "loss": 0.5587, "step": 27313 }, { "epoch": 0.8371337501532427, "grad_norm": 1.2947461994113019, "learning_rate": 6.796252395191971e-07, "loss": 0.6103, "step": 27314 }, { "epoch": 0.8371643986759838, "grad_norm": 0.427744848617837, "learning_rate": 6.793754326858659e-07, "loss": 0.3749, "step": 27315 }, { "epoch": 0.8371950471987251, "grad_norm": 0.46139226642339504, "learning_rate": 6.791256684247521e-07, "loss": 0.3983, "step": 27316 }, { "epoch": 0.8372256957214662, "grad_norm": 1.3733779553041243, "learning_rate": 6.788759467383194e-07, "loss": 0.6096, "step": 27317 }, { "epoch": 0.8372563442442075, "grad_norm": 1.3730917235521647, "learning_rate": 6.786262676290284e-07, "loss": 0.6435, "step": 27318 }, { "epoch": 0.8372869927669486, "grad_norm": 1.3519126046292322, "learning_rate": 6.783766310993378e-07, "loss": 0.6367, "step": 27319 }, { "epoch": 0.8373176412896899, "grad_norm": 1.401294111503827, "learning_rate": 6.781270371517084e-07, "loss": 0.6642, "step": 27320 }, { "epoch": 0.837348289812431, "grad_norm": 1.3759438766133902, "learning_rate": 6.778774857885973e-07, "loss": 0.6526, "step": 27321 }, { "epoch": 0.8373789383351723, "grad_norm": 1.3132439549664028, "learning_rate": 6.776279770124677e-07, "loss": 0.5667, "step": 27322 }, { "epoch": 0.8374095868579134, "grad_norm": 1.4371208246942524, "learning_rate": 6.773785108257752e-07, "loss": 0.5866, "step": 27323 }, { "epoch": 0.8374402353806547, "grad_norm": 0.4315982660542905, "learning_rate": 6.771290872309771e-07, "loss": 0.3907, "step": 27324 }, { "epoch": 0.8374708839033959, "grad_norm": 0.45630706313009844, "learning_rate": 6.768797062305321e-07, "loss": 0.4047, "step": 27325 }, { "epoch": 0.837501532426137, "grad_norm": 1.2141220154936834, "learning_rate": 6.766303678268987e-07, "loss": 0.65, "step": 27326 }, { "epoch": 0.8375321809488783, "grad_norm": 1.4801302026813052, "learning_rate": 6.763810720225311e-07, "loss": 0.5191, "step": 27327 }, { "epoch": 0.8375628294716194, "grad_norm": 0.4223674367930062, "learning_rate": 6.761318188198873e-07, "loss": 0.3704, "step": 27328 }, { "epoch": 0.8375934779943607, "grad_norm": 0.44268964014786405, "learning_rate": 6.758826082214232e-07, "loss": 0.4023, "step": 27329 }, { "epoch": 0.8376241265171018, "grad_norm": 1.3847606040921776, "learning_rate": 6.75633440229595e-07, "loss": 0.6311, "step": 27330 }, { "epoch": 0.8376547750398431, "grad_norm": 1.447505845552852, "learning_rate": 6.75384314846857e-07, "loss": 0.6908, "step": 27331 }, { "epoch": 0.8376854235625842, "grad_norm": 0.4541384977007583, "learning_rate": 6.751352320756615e-07, "loss": 0.3892, "step": 27332 }, { "epoch": 0.8377160720853255, "grad_norm": 1.4600766670819918, "learning_rate": 6.748861919184673e-07, "loss": 0.6253, "step": 27333 }, { "epoch": 0.8377467206080667, "grad_norm": 1.2479111888878858, "learning_rate": 6.746371943777263e-07, "loss": 0.5396, "step": 27334 }, { "epoch": 0.8377773691308079, "grad_norm": 1.313523918934003, "learning_rate": 6.743882394558903e-07, "loss": 0.571, "step": 27335 }, { "epoch": 0.8378080176535491, "grad_norm": 1.1983805525274343, "learning_rate": 6.741393271554142e-07, "loss": 0.5193, "step": 27336 }, { "epoch": 0.8378386661762903, "grad_norm": 0.4345770181084205, "learning_rate": 6.738904574787497e-07, "loss": 0.381, "step": 27337 }, { "epoch": 0.8378693146990315, "grad_norm": 1.237739369491192, "learning_rate": 6.73641630428351e-07, "loss": 0.597, "step": 27338 }, { "epoch": 0.8378999632217727, "grad_norm": 1.373665845558965, "learning_rate": 6.733928460066669e-07, "loss": 0.6273, "step": 27339 }, { "epoch": 0.8379306117445139, "grad_norm": 1.3487716871620328, "learning_rate": 6.731441042161496e-07, "loss": 0.6144, "step": 27340 }, { "epoch": 0.8379612602672551, "grad_norm": 1.2878596893591634, "learning_rate": 6.728954050592523e-07, "loss": 0.684, "step": 27341 }, { "epoch": 0.8379919087899963, "grad_norm": 0.4267268225561685, "learning_rate": 6.726467485384236e-07, "loss": 0.3798, "step": 27342 }, { "epoch": 0.8380225573127376, "grad_norm": 1.2130881329513004, "learning_rate": 6.723981346561115e-07, "loss": 0.5578, "step": 27343 }, { "epoch": 0.8380532058354787, "grad_norm": 1.781273293465387, "learning_rate": 6.721495634147696e-07, "loss": 0.6132, "step": 27344 }, { "epoch": 0.83808385435822, "grad_norm": 1.2639985378824166, "learning_rate": 6.719010348168459e-07, "loss": 0.602, "step": 27345 }, { "epoch": 0.8381145028809611, "grad_norm": 1.3949483789883754, "learning_rate": 6.716525488647874e-07, "loss": 0.6559, "step": 27346 }, { "epoch": 0.8381451514037024, "grad_norm": 1.3560008730763766, "learning_rate": 6.714041055610437e-07, "loss": 0.6755, "step": 27347 }, { "epoch": 0.8381757999264435, "grad_norm": 1.4931314026138447, "learning_rate": 6.71155704908063e-07, "loss": 0.662, "step": 27348 }, { "epoch": 0.8382064484491848, "grad_norm": 1.4165436920055485, "learning_rate": 6.709073469082938e-07, "loss": 0.5134, "step": 27349 }, { "epoch": 0.8382370969719259, "grad_norm": 1.256017659523304, "learning_rate": 6.706590315641809e-07, "loss": 0.5557, "step": 27350 }, { "epoch": 0.8382677454946672, "grad_norm": 1.437288003224166, "learning_rate": 6.704107588781722e-07, "loss": 0.6081, "step": 27351 }, { "epoch": 0.8382983940174084, "grad_norm": 1.3006182106855761, "learning_rate": 6.701625288527152e-07, "loss": 0.6385, "step": 27352 }, { "epoch": 0.8383290425401496, "grad_norm": 1.4897236554621944, "learning_rate": 6.699143414902548e-07, "loss": 0.587, "step": 27353 }, { "epoch": 0.8383596910628908, "grad_norm": 1.2399177884022536, "learning_rate": 6.696661967932344e-07, "loss": 0.6194, "step": 27354 }, { "epoch": 0.838390339585632, "grad_norm": 1.4187469090743576, "learning_rate": 6.694180947641027e-07, "loss": 0.6118, "step": 27355 }, { "epoch": 0.8384209881083732, "grad_norm": 1.2721338225709455, "learning_rate": 6.691700354053016e-07, "loss": 0.6111, "step": 27356 }, { "epoch": 0.8384516366311143, "grad_norm": 1.3712588994929753, "learning_rate": 6.689220187192774e-07, "loss": 0.5919, "step": 27357 }, { "epoch": 0.8384822851538556, "grad_norm": 1.2185406339101688, "learning_rate": 6.686740447084711e-07, "loss": 0.481, "step": 27358 }, { "epoch": 0.8385129336765967, "grad_norm": 1.3360858395789899, "learning_rate": 6.684261133753283e-07, "loss": 0.5837, "step": 27359 }, { "epoch": 0.838543582199338, "grad_norm": 1.3738135577972732, "learning_rate": 6.681782247222923e-07, "loss": 0.6132, "step": 27360 }, { "epoch": 0.8385742307220792, "grad_norm": 1.3459719062610478, "learning_rate": 6.679303787518032e-07, "loss": 0.5771, "step": 27361 }, { "epoch": 0.8386048792448204, "grad_norm": 1.4068624379256107, "learning_rate": 6.676825754663046e-07, "loss": 0.5862, "step": 27362 }, { "epoch": 0.8386355277675616, "grad_norm": 0.45962609236924384, "learning_rate": 6.674348148682391e-07, "loss": 0.3922, "step": 27363 }, { "epoch": 0.8386661762903028, "grad_norm": 1.3543340935717274, "learning_rate": 6.671870969600458e-07, "loss": 0.7106, "step": 27364 }, { "epoch": 0.838696824813044, "grad_norm": 0.4546205242104432, "learning_rate": 6.669394217441677e-07, "loss": 0.3955, "step": 27365 }, { "epoch": 0.8387274733357852, "grad_norm": 1.5244419194360597, "learning_rate": 6.666917892230434e-07, "loss": 0.5851, "step": 27366 }, { "epoch": 0.8387581218585264, "grad_norm": 1.4585072485717623, "learning_rate": 6.664441993991134e-07, "loss": 0.7591, "step": 27367 }, { "epoch": 0.8387887703812676, "grad_norm": 1.42018556995669, "learning_rate": 6.661966522748187e-07, "loss": 0.5407, "step": 27368 }, { "epoch": 0.8388194189040088, "grad_norm": 1.3812460057982543, "learning_rate": 6.659491478525959e-07, "loss": 0.6465, "step": 27369 }, { "epoch": 0.8388500674267501, "grad_norm": 1.2778953445064738, "learning_rate": 6.657016861348852e-07, "loss": 0.639, "step": 27370 }, { "epoch": 0.8388807159494912, "grad_norm": 1.3166731210354894, "learning_rate": 6.654542671241254e-07, "loss": 0.5619, "step": 27371 }, { "epoch": 0.8389113644722325, "grad_norm": 0.4360403420889066, "learning_rate": 6.652068908227543e-07, "loss": 0.3867, "step": 27372 }, { "epoch": 0.8389420129949736, "grad_norm": 1.3291539113123354, "learning_rate": 6.649595572332068e-07, "loss": 0.618, "step": 27373 }, { "epoch": 0.8389726615177149, "grad_norm": 1.186571238634224, "learning_rate": 6.647122663579236e-07, "loss": 0.5673, "step": 27374 }, { "epoch": 0.839003310040456, "grad_norm": 1.321464616209272, "learning_rate": 6.644650181993389e-07, "loss": 0.5669, "step": 27375 }, { "epoch": 0.8390339585631973, "grad_norm": 1.2938580366343435, "learning_rate": 6.642178127598908e-07, "loss": 0.6848, "step": 27376 }, { "epoch": 0.8390646070859384, "grad_norm": 1.3883076056364343, "learning_rate": 6.639706500420124e-07, "loss": 0.5877, "step": 27377 }, { "epoch": 0.8390952556086797, "grad_norm": 1.4992769080736648, "learning_rate": 6.637235300481409e-07, "loss": 0.5968, "step": 27378 }, { "epoch": 0.8391259041314209, "grad_norm": 1.40933503546094, "learning_rate": 6.63476452780712e-07, "loss": 0.5108, "step": 27379 }, { "epoch": 0.8391565526541621, "grad_norm": 0.4463481499619044, "learning_rate": 6.632294182421584e-07, "loss": 0.4061, "step": 27380 }, { "epoch": 0.8391872011769033, "grad_norm": 1.2261126082698932, "learning_rate": 6.629824264349144e-07, "loss": 0.5729, "step": 27381 }, { "epoch": 0.8392178496996445, "grad_norm": 1.5160505621245246, "learning_rate": 6.627354773614159e-07, "loss": 0.6045, "step": 27382 }, { "epoch": 0.8392484982223857, "grad_norm": 1.2759179244501333, "learning_rate": 6.624885710240931e-07, "loss": 0.6513, "step": 27383 }, { "epoch": 0.8392791467451269, "grad_norm": 1.3111455281282254, "learning_rate": 6.6224170742538e-07, "loss": 0.5761, "step": 27384 }, { "epoch": 0.8393097952678681, "grad_norm": 1.3334165398824276, "learning_rate": 6.619948865677107e-07, "loss": 0.5713, "step": 27385 }, { "epoch": 0.8393404437906093, "grad_norm": 1.5408514292467548, "learning_rate": 6.617481084535143e-07, "loss": 0.5928, "step": 27386 }, { "epoch": 0.8393710923133505, "grad_norm": 1.409011479139791, "learning_rate": 6.615013730852249e-07, "loss": 0.6226, "step": 27387 }, { "epoch": 0.8394017408360916, "grad_norm": 0.43789016037000345, "learning_rate": 6.612546804652714e-07, "loss": 0.3786, "step": 27388 }, { "epoch": 0.8394323893588329, "grad_norm": 0.45488543119474706, "learning_rate": 6.610080305960859e-07, "loss": 0.4056, "step": 27389 }, { "epoch": 0.8394630378815741, "grad_norm": 1.5995255966042494, "learning_rate": 6.607614234800997e-07, "loss": 0.5913, "step": 27390 }, { "epoch": 0.8394936864043153, "grad_norm": 1.2507067749436167, "learning_rate": 6.605148591197396e-07, "loss": 0.5826, "step": 27391 }, { "epoch": 0.8395243349270565, "grad_norm": 0.44220402558407834, "learning_rate": 6.602683375174374e-07, "loss": 0.4094, "step": 27392 }, { "epoch": 0.8395549834497977, "grad_norm": 1.3336136443819955, "learning_rate": 6.600218586756229e-07, "loss": 0.6166, "step": 27393 }, { "epoch": 0.8395856319725389, "grad_norm": 1.3575239889213826, "learning_rate": 6.597754225967223e-07, "loss": 0.7634, "step": 27394 }, { "epoch": 0.8396162804952801, "grad_norm": 0.4553163237758224, "learning_rate": 6.595290292831647e-07, "loss": 0.3958, "step": 27395 }, { "epoch": 0.8396469290180213, "grad_norm": 1.3209679384940696, "learning_rate": 6.592826787373791e-07, "loss": 0.5957, "step": 27396 }, { "epoch": 0.8396775775407626, "grad_norm": 1.4647652120905106, "learning_rate": 6.590363709617914e-07, "loss": 0.544, "step": 27397 }, { "epoch": 0.8397082260635037, "grad_norm": 1.2268916854153382, "learning_rate": 6.587901059588298e-07, "loss": 0.5537, "step": 27398 }, { "epoch": 0.839738874586245, "grad_norm": 1.2316186715701902, "learning_rate": 6.585438837309188e-07, "loss": 0.5964, "step": 27399 }, { "epoch": 0.8397695231089861, "grad_norm": 0.4510060273746933, "learning_rate": 6.582977042804856e-07, "loss": 0.3948, "step": 27400 }, { "epoch": 0.8398001716317274, "grad_norm": 1.3424456928468993, "learning_rate": 6.580515676099575e-07, "loss": 0.5766, "step": 27401 }, { "epoch": 0.8398308201544685, "grad_norm": 0.44430690071949624, "learning_rate": 6.578054737217565e-07, "loss": 0.3708, "step": 27402 }, { "epoch": 0.8398614686772098, "grad_norm": 1.2587177269006162, "learning_rate": 6.575594226183096e-07, "loss": 0.5499, "step": 27403 }, { "epoch": 0.8398921171999509, "grad_norm": 1.2183701321900213, "learning_rate": 6.573134143020421e-07, "loss": 0.5812, "step": 27404 }, { "epoch": 0.8399227657226922, "grad_norm": 1.3298319951025832, "learning_rate": 6.570674487753753e-07, "loss": 0.6006, "step": 27405 }, { "epoch": 0.8399534142454333, "grad_norm": 0.4523667014444339, "learning_rate": 6.56821526040734e-07, "loss": 0.3854, "step": 27406 }, { "epoch": 0.8399840627681746, "grad_norm": 1.4085704766276694, "learning_rate": 6.565756461005429e-07, "loss": 0.628, "step": 27407 }, { "epoch": 0.8400147112909158, "grad_norm": 1.4416446091371748, "learning_rate": 6.563298089572218e-07, "loss": 0.6276, "step": 27408 }, { "epoch": 0.840045359813657, "grad_norm": 1.305858420736392, "learning_rate": 6.560840146131958e-07, "loss": 0.5713, "step": 27409 }, { "epoch": 0.8400760083363982, "grad_norm": 1.2693263277588955, "learning_rate": 6.558382630708843e-07, "loss": 0.5152, "step": 27410 }, { "epoch": 0.8401066568591394, "grad_norm": 1.1404034447894158, "learning_rate": 6.555925543327097e-07, "loss": 0.5298, "step": 27411 }, { "epoch": 0.8401373053818806, "grad_norm": 5.6199037310086295, "learning_rate": 6.553468884010949e-07, "loss": 0.5557, "step": 27412 }, { "epoch": 0.8401679539046218, "grad_norm": 1.3629671864659403, "learning_rate": 6.551012652784572e-07, "loss": 0.4891, "step": 27413 }, { "epoch": 0.840198602427363, "grad_norm": 1.4402502944546842, "learning_rate": 6.548556849672183e-07, "loss": 0.597, "step": 27414 }, { "epoch": 0.8402292509501043, "grad_norm": 1.2756929597773432, "learning_rate": 6.546101474697996e-07, "loss": 0.562, "step": 27415 }, { "epoch": 0.8402598994728454, "grad_norm": 0.4402941070327019, "learning_rate": 6.543646527886177e-07, "loss": 0.3884, "step": 27416 }, { "epoch": 0.8402905479955867, "grad_norm": 1.3629108821034395, "learning_rate": 6.541192009260938e-07, "loss": 0.6628, "step": 27417 }, { "epoch": 0.8403211965183278, "grad_norm": 1.5010165846275552, "learning_rate": 6.538737918846444e-07, "loss": 0.5619, "step": 27418 }, { "epoch": 0.840351845041069, "grad_norm": 1.2650727045942998, "learning_rate": 6.536284256666881e-07, "loss": 0.6857, "step": 27419 }, { "epoch": 0.8403824935638102, "grad_norm": 1.2420431828998661, "learning_rate": 6.533831022746445e-07, "loss": 0.5632, "step": 27420 }, { "epoch": 0.8404131420865514, "grad_norm": 1.4411913632717277, "learning_rate": 6.53137821710928e-07, "loss": 0.5587, "step": 27421 }, { "epoch": 0.8404437906092926, "grad_norm": 1.4971171521750486, "learning_rate": 6.52892583977957e-07, "loss": 0.7529, "step": 27422 }, { "epoch": 0.8404744391320338, "grad_norm": 1.239521803358078, "learning_rate": 6.526473890781487e-07, "loss": 0.603, "step": 27423 }, { "epoch": 0.840505087654775, "grad_norm": 1.4188475806968919, "learning_rate": 6.524022370139166e-07, "loss": 0.7106, "step": 27424 }, { "epoch": 0.8405357361775162, "grad_norm": 1.4252959922945432, "learning_rate": 6.521571277876782e-07, "loss": 0.6239, "step": 27425 }, { "epoch": 0.8405663847002575, "grad_norm": 1.3729822980001007, "learning_rate": 6.519120614018487e-07, "loss": 0.5817, "step": 27426 }, { "epoch": 0.8405970332229986, "grad_norm": 1.434030702469281, "learning_rate": 6.516670378588413e-07, "loss": 0.5502, "step": 27427 }, { "epoch": 0.8406276817457399, "grad_norm": 1.2433926245304905, "learning_rate": 6.514220571610719e-07, "loss": 0.626, "step": 27428 }, { "epoch": 0.840658330268481, "grad_norm": 1.3699733892297237, "learning_rate": 6.511771193109517e-07, "loss": 0.6109, "step": 27429 }, { "epoch": 0.8406889787912223, "grad_norm": 0.4830064957233476, "learning_rate": 6.509322243108978e-07, "loss": 0.4029, "step": 27430 }, { "epoch": 0.8407196273139634, "grad_norm": 1.315456814428822, "learning_rate": 6.506873721633223e-07, "loss": 0.6164, "step": 27431 }, { "epoch": 0.8407502758367047, "grad_norm": 1.4831773577360352, "learning_rate": 6.504425628706352e-07, "loss": 0.6354, "step": 27432 }, { "epoch": 0.8407809243594458, "grad_norm": 1.5162005193818608, "learning_rate": 6.501977964352502e-07, "loss": 0.6356, "step": 27433 }, { "epoch": 0.8408115728821871, "grad_norm": 1.1784861739796881, "learning_rate": 6.499530728595809e-07, "loss": 0.5524, "step": 27434 }, { "epoch": 0.8408422214049283, "grad_norm": 1.3912358455058185, "learning_rate": 6.497083921460356e-07, "loss": 0.5391, "step": 27435 }, { "epoch": 0.8408728699276695, "grad_norm": 1.6746033554431563, "learning_rate": 6.494637542970267e-07, "loss": 0.5591, "step": 27436 }, { "epoch": 0.8409035184504107, "grad_norm": 1.4414329789003404, "learning_rate": 6.492191593149643e-07, "loss": 0.6264, "step": 27437 }, { "epoch": 0.8409341669731519, "grad_norm": 1.3762249219350084, "learning_rate": 6.489746072022601e-07, "loss": 0.6227, "step": 27438 }, { "epoch": 0.8409648154958931, "grad_norm": 1.310469005323932, "learning_rate": 6.487300979613226e-07, "loss": 0.6054, "step": 27439 }, { "epoch": 0.8409954640186343, "grad_norm": 1.4073122641722817, "learning_rate": 6.484856315945581e-07, "loss": 0.6272, "step": 27440 }, { "epoch": 0.8410261125413755, "grad_norm": 1.3731828484414081, "learning_rate": 6.482412081043804e-07, "loss": 0.5701, "step": 27441 }, { "epoch": 0.8410567610641168, "grad_norm": 1.1999479427777648, "learning_rate": 6.479968274931952e-07, "loss": 0.5472, "step": 27442 }, { "epoch": 0.8410874095868579, "grad_norm": 1.4380268193884644, "learning_rate": 6.477524897634102e-07, "loss": 0.551, "step": 27443 }, { "epoch": 0.8411180581095992, "grad_norm": 1.348886304857681, "learning_rate": 6.475081949174334e-07, "loss": 0.5979, "step": 27444 }, { "epoch": 0.8411487066323403, "grad_norm": 1.181774512702217, "learning_rate": 6.472639429576732e-07, "loss": 0.5712, "step": 27445 }, { "epoch": 0.8411793551550816, "grad_norm": 1.529565010586399, "learning_rate": 6.470197338865336e-07, "loss": 0.7266, "step": 27446 }, { "epoch": 0.8412100036778227, "grad_norm": 1.154861367656536, "learning_rate": 6.467755677064225e-07, "loss": 0.5495, "step": 27447 }, { "epoch": 0.841240652200564, "grad_norm": 1.2148139815082515, "learning_rate": 6.465314444197457e-07, "loss": 0.5947, "step": 27448 }, { "epoch": 0.8412713007233051, "grad_norm": 1.4131189267601143, "learning_rate": 6.462873640289091e-07, "loss": 0.6509, "step": 27449 }, { "epoch": 0.8413019492460463, "grad_norm": 1.2495697213282906, "learning_rate": 6.460433265363175e-07, "loss": 0.651, "step": 27450 }, { "epoch": 0.8413325977687875, "grad_norm": 1.3631705400998986, "learning_rate": 6.457993319443723e-07, "loss": 0.5684, "step": 27451 }, { "epoch": 0.8413632462915287, "grad_norm": 1.3206389416270383, "learning_rate": 6.455553802554832e-07, "loss": 0.6107, "step": 27452 }, { "epoch": 0.84139389481427, "grad_norm": 1.5208718593889459, "learning_rate": 6.453114714720504e-07, "loss": 0.5976, "step": 27453 }, { "epoch": 0.8414245433370111, "grad_norm": 1.6131544871230932, "learning_rate": 6.450676055964772e-07, "loss": 0.6312, "step": 27454 }, { "epoch": 0.8414551918597524, "grad_norm": 1.3566391997421137, "learning_rate": 6.448237826311671e-07, "loss": 0.4853, "step": 27455 }, { "epoch": 0.8414858403824935, "grad_norm": 0.47140200646775426, "learning_rate": 6.445800025785226e-07, "loss": 0.4041, "step": 27456 }, { "epoch": 0.8415164889052348, "grad_norm": 0.44687820810102175, "learning_rate": 6.443362654409469e-07, "loss": 0.3926, "step": 27457 }, { "epoch": 0.8415471374279759, "grad_norm": 0.4504421622366165, "learning_rate": 6.44092571220839e-07, "loss": 0.4045, "step": 27458 }, { "epoch": 0.8415777859507172, "grad_norm": 1.397872208995692, "learning_rate": 6.438489199206017e-07, "loss": 0.5244, "step": 27459 }, { "epoch": 0.8416084344734583, "grad_norm": 1.3350217368003856, "learning_rate": 6.436053115426366e-07, "loss": 0.6197, "step": 27460 }, { "epoch": 0.8416390829961996, "grad_norm": 1.375716100426041, "learning_rate": 6.433617460893432e-07, "loss": 0.5704, "step": 27461 }, { "epoch": 0.8416697315189408, "grad_norm": 1.2165725048023397, "learning_rate": 6.431182235631201e-07, "loss": 0.5012, "step": 27462 }, { "epoch": 0.841700380041682, "grad_norm": 1.3664851168888763, "learning_rate": 6.428747439663674e-07, "loss": 0.5913, "step": 27463 }, { "epoch": 0.8417310285644232, "grad_norm": 1.3343017169616938, "learning_rate": 6.42631307301485e-07, "loss": 0.6183, "step": 27464 }, { "epoch": 0.8417616770871644, "grad_norm": 1.4525820798232352, "learning_rate": 6.423879135708728e-07, "loss": 0.7019, "step": 27465 }, { "epoch": 0.8417923256099056, "grad_norm": 1.3945567906038236, "learning_rate": 6.421445627769258e-07, "loss": 0.6546, "step": 27466 }, { "epoch": 0.8418229741326468, "grad_norm": 1.423865539005113, "learning_rate": 6.419012549220433e-07, "loss": 0.6757, "step": 27467 }, { "epoch": 0.841853622655388, "grad_norm": 1.4542201894448707, "learning_rate": 6.41657990008624e-07, "loss": 0.5749, "step": 27468 }, { "epoch": 0.8418842711781293, "grad_norm": 1.2501750096519535, "learning_rate": 6.414147680390637e-07, "loss": 0.5377, "step": 27469 }, { "epoch": 0.8419149197008704, "grad_norm": 1.5163343822392588, "learning_rate": 6.411715890157572e-07, "loss": 0.6378, "step": 27470 }, { "epoch": 0.8419455682236117, "grad_norm": 0.4441691998514363, "learning_rate": 6.409284529411036e-07, "loss": 0.3731, "step": 27471 }, { "epoch": 0.8419762167463528, "grad_norm": 1.4113598829620035, "learning_rate": 6.406853598174978e-07, "loss": 0.7025, "step": 27472 }, { "epoch": 0.8420068652690941, "grad_norm": 1.3763721607802006, "learning_rate": 6.404423096473334e-07, "loss": 0.6146, "step": 27473 }, { "epoch": 0.8420375137918352, "grad_norm": 1.2976572367460035, "learning_rate": 6.401993024330061e-07, "loss": 0.5407, "step": 27474 }, { "epoch": 0.8420681623145765, "grad_norm": 1.4443984727350447, "learning_rate": 6.399563381769108e-07, "loss": 0.6105, "step": 27475 }, { "epoch": 0.8420988108373176, "grad_norm": 1.3919648763258483, "learning_rate": 6.397134168814422e-07, "loss": 0.5966, "step": 27476 }, { "epoch": 0.8421294593600589, "grad_norm": 0.4195785081358361, "learning_rate": 6.394705385489925e-07, "loss": 0.3815, "step": 27477 }, { "epoch": 0.8421601078828, "grad_norm": 1.4874524173334804, "learning_rate": 6.392277031819544e-07, "loss": 0.709, "step": 27478 }, { "epoch": 0.8421907564055413, "grad_norm": 1.2107767393359332, "learning_rate": 6.389849107827229e-07, "loss": 0.5706, "step": 27479 }, { "epoch": 0.8422214049282825, "grad_norm": 1.3721050735136548, "learning_rate": 6.387421613536887e-07, "loss": 0.5813, "step": 27480 }, { "epoch": 0.8422520534510236, "grad_norm": 1.257417778354546, "learning_rate": 6.384994548972423e-07, "loss": 0.5268, "step": 27481 }, { "epoch": 0.8422827019737649, "grad_norm": 0.45905258654238235, "learning_rate": 6.382567914157784e-07, "loss": 0.3807, "step": 27482 }, { "epoch": 0.842313350496506, "grad_norm": 0.451937259912927, "learning_rate": 6.38014170911685e-07, "loss": 0.387, "step": 27483 }, { "epoch": 0.8423439990192473, "grad_norm": 1.5629504436511725, "learning_rate": 6.377715933873557e-07, "loss": 0.5638, "step": 27484 }, { "epoch": 0.8423746475419884, "grad_norm": 1.3962686817345347, "learning_rate": 6.375290588451777e-07, "loss": 0.6131, "step": 27485 }, { "epoch": 0.8424052960647297, "grad_norm": 1.3706442533021381, "learning_rate": 6.372865672875416e-07, "loss": 0.5585, "step": 27486 }, { "epoch": 0.8424359445874708, "grad_norm": 0.4723825827736736, "learning_rate": 6.370441187168392e-07, "loss": 0.3986, "step": 27487 }, { "epoch": 0.8424665931102121, "grad_norm": 1.2440345043420227, "learning_rate": 6.368017131354559e-07, "loss": 0.6068, "step": 27488 }, { "epoch": 0.8424972416329533, "grad_norm": 1.499288714550723, "learning_rate": 6.365593505457812e-07, "loss": 0.6611, "step": 27489 }, { "epoch": 0.8425278901556945, "grad_norm": 1.3751139382957442, "learning_rate": 6.363170309502054e-07, "loss": 0.5716, "step": 27490 }, { "epoch": 0.8425585386784357, "grad_norm": 1.4237435061158945, "learning_rate": 6.360747543511131e-07, "loss": 0.668, "step": 27491 }, { "epoch": 0.8425891872011769, "grad_norm": 1.2760439351824329, "learning_rate": 6.35832520750893e-07, "loss": 0.5625, "step": 27492 }, { "epoch": 0.8426198357239181, "grad_norm": 1.3373529542221678, "learning_rate": 6.355903301519323e-07, "loss": 0.5867, "step": 27493 }, { "epoch": 0.8426504842466593, "grad_norm": 1.3601101817203698, "learning_rate": 6.35348182556616e-07, "loss": 0.6186, "step": 27494 }, { "epoch": 0.8426811327694005, "grad_norm": 1.2630486589908039, "learning_rate": 6.351060779673318e-07, "loss": 0.5709, "step": 27495 }, { "epoch": 0.8427117812921417, "grad_norm": 1.3928498706032775, "learning_rate": 6.348640163864633e-07, "loss": 0.6222, "step": 27496 }, { "epoch": 0.8427424298148829, "grad_norm": 0.4376743709668822, "learning_rate": 6.346219978163964e-07, "loss": 0.3917, "step": 27497 }, { "epoch": 0.8427730783376242, "grad_norm": 1.398124635456512, "learning_rate": 6.34380022259517e-07, "loss": 0.5825, "step": 27498 }, { "epoch": 0.8428037268603653, "grad_norm": 1.4475189204847987, "learning_rate": 6.34138089718207e-07, "loss": 0.6559, "step": 27499 }, { "epoch": 0.8428343753831066, "grad_norm": 1.3702028435371096, "learning_rate": 6.338962001948512e-07, "loss": 0.5459, "step": 27500 }, { "epoch": 0.8428650239058477, "grad_norm": 1.184692890797689, "learning_rate": 6.336543536918349e-07, "loss": 0.5745, "step": 27501 }, { "epoch": 0.842895672428589, "grad_norm": 1.2370603684780992, "learning_rate": 6.334125502115385e-07, "loss": 0.6071, "step": 27502 }, { "epoch": 0.8429263209513301, "grad_norm": 1.3372143560377803, "learning_rate": 6.331707897563449e-07, "loss": 0.555, "step": 27503 }, { "epoch": 0.8429569694740714, "grad_norm": 0.4435167137977552, "learning_rate": 6.32929072328638e-07, "loss": 0.3804, "step": 27504 }, { "epoch": 0.8429876179968125, "grad_norm": 1.2866075386626186, "learning_rate": 6.326873979307973e-07, "loss": 0.5573, "step": 27505 }, { "epoch": 0.8430182665195538, "grad_norm": 1.3589442803803022, "learning_rate": 6.324457665652062e-07, "loss": 0.4664, "step": 27506 }, { "epoch": 0.843048915042295, "grad_norm": 1.4093555523292907, "learning_rate": 6.322041782342437e-07, "loss": 0.6515, "step": 27507 }, { "epoch": 0.8430795635650362, "grad_norm": 0.4427947252456996, "learning_rate": 6.319626329402906e-07, "loss": 0.3952, "step": 27508 }, { "epoch": 0.8431102120877774, "grad_norm": 1.2852727255487943, "learning_rate": 6.317211306857285e-07, "loss": 0.5815, "step": 27509 }, { "epoch": 0.8431408606105186, "grad_norm": 1.5023497900696126, "learning_rate": 6.314796714729343e-07, "loss": 0.678, "step": 27510 }, { "epoch": 0.8431715091332598, "grad_norm": 1.3090696554010963, "learning_rate": 6.312382553042889e-07, "loss": 0.6194, "step": 27511 }, { "epoch": 0.8432021576560009, "grad_norm": 1.5128479159792287, "learning_rate": 6.309968821821721e-07, "loss": 0.5556, "step": 27512 }, { "epoch": 0.8432328061787422, "grad_norm": 0.45344228240799794, "learning_rate": 6.307555521089598e-07, "loss": 0.406, "step": 27513 }, { "epoch": 0.8432634547014833, "grad_norm": 1.3757770396427413, "learning_rate": 6.305142650870316e-07, "loss": 0.6629, "step": 27514 }, { "epoch": 0.8432941032242246, "grad_norm": 1.3464742741504583, "learning_rate": 6.302730211187635e-07, "loss": 0.5322, "step": 27515 }, { "epoch": 0.8433247517469658, "grad_norm": 1.2488574930308396, "learning_rate": 6.300318202065337e-07, "loss": 0.4565, "step": 27516 }, { "epoch": 0.843355400269707, "grad_norm": 1.2734900610533988, "learning_rate": 6.297906623527189e-07, "loss": 0.6577, "step": 27517 }, { "epoch": 0.8433860487924482, "grad_norm": 1.4210526104980217, "learning_rate": 6.295495475596941e-07, "loss": 0.667, "step": 27518 }, { "epoch": 0.8434166973151894, "grad_norm": 1.4632630136848193, "learning_rate": 6.293084758298356e-07, "loss": 0.6001, "step": 27519 }, { "epoch": 0.8434473458379306, "grad_norm": 1.4750544273048387, "learning_rate": 6.290674471655206e-07, "loss": 0.5743, "step": 27520 }, { "epoch": 0.8434779943606718, "grad_norm": 1.3441641992479636, "learning_rate": 6.288264615691209e-07, "loss": 0.571, "step": 27521 }, { "epoch": 0.843508642883413, "grad_norm": 1.3377910659942063, "learning_rate": 6.285855190430129e-07, "loss": 0.5148, "step": 27522 }, { "epoch": 0.8435392914061542, "grad_norm": 1.255329945394721, "learning_rate": 6.28344619589571e-07, "loss": 0.5395, "step": 27523 }, { "epoch": 0.8435699399288954, "grad_norm": 1.3600034089419146, "learning_rate": 6.28103763211167e-07, "loss": 0.5609, "step": 27524 }, { "epoch": 0.8436005884516367, "grad_norm": 1.222927250038367, "learning_rate": 6.278629499101763e-07, "loss": 0.5815, "step": 27525 }, { "epoch": 0.8436312369743778, "grad_norm": 1.075648967034476, "learning_rate": 6.276221796889692e-07, "loss": 0.5091, "step": 27526 }, { "epoch": 0.8436618854971191, "grad_norm": 1.30257688567541, "learning_rate": 6.2738145254992e-07, "loss": 0.6597, "step": 27527 }, { "epoch": 0.8436925340198602, "grad_norm": 1.3009576978397022, "learning_rate": 6.271407684954012e-07, "loss": 0.5971, "step": 27528 }, { "epoch": 0.8437231825426015, "grad_norm": 0.44169331305191284, "learning_rate": 6.269001275277819e-07, "loss": 0.399, "step": 27529 }, { "epoch": 0.8437538310653426, "grad_norm": 1.4553951576996138, "learning_rate": 6.266595296494349e-07, "loss": 0.6009, "step": 27530 }, { "epoch": 0.8437844795880839, "grad_norm": 0.4576064690645319, "learning_rate": 6.264189748627314e-07, "loss": 0.4008, "step": 27531 }, { "epoch": 0.843815128110825, "grad_norm": 1.3396458709373689, "learning_rate": 6.261784631700396e-07, "loss": 0.6149, "step": 27532 }, { "epoch": 0.8438457766335663, "grad_norm": 1.458350969611451, "learning_rate": 6.25937994573731e-07, "loss": 0.4804, "step": 27533 }, { "epoch": 0.8438764251563075, "grad_norm": 1.463184228562719, "learning_rate": 6.256975690761746e-07, "loss": 0.6072, "step": 27534 }, { "epoch": 0.8439070736790487, "grad_norm": 0.4520234060327795, "learning_rate": 6.254571866797388e-07, "loss": 0.4159, "step": 27535 }, { "epoch": 0.8439377222017899, "grad_norm": 1.4323475996298594, "learning_rate": 6.252168473867937e-07, "loss": 0.5896, "step": 27536 }, { "epoch": 0.8439683707245311, "grad_norm": 1.4177854843349105, "learning_rate": 6.249765511997041e-07, "loss": 0.6348, "step": 27537 }, { "epoch": 0.8439990192472723, "grad_norm": 1.4555627481728746, "learning_rate": 6.247362981208422e-07, "loss": 0.5948, "step": 27538 }, { "epoch": 0.8440296677700135, "grad_norm": 1.220805752580172, "learning_rate": 6.24496088152573e-07, "loss": 0.5171, "step": 27539 }, { "epoch": 0.8440603162927547, "grad_norm": 0.43434917429993786, "learning_rate": 6.242559212972621e-07, "loss": 0.396, "step": 27540 }, { "epoch": 0.844090964815496, "grad_norm": 1.3645867652176555, "learning_rate": 6.240157975572775e-07, "loss": 0.6042, "step": 27541 }, { "epoch": 0.8441216133382371, "grad_norm": 1.5254778197171337, "learning_rate": 6.237757169349856e-07, "loss": 0.6086, "step": 27542 }, { "epoch": 0.8441522618609782, "grad_norm": 0.43476714961786195, "learning_rate": 6.235356794327507e-07, "loss": 0.3746, "step": 27543 }, { "epoch": 0.8441829103837195, "grad_norm": 1.3904899705598688, "learning_rate": 6.232956850529381e-07, "loss": 0.6343, "step": 27544 }, { "epoch": 0.8442135589064607, "grad_norm": 0.44687323148834396, "learning_rate": 6.230557337979126e-07, "loss": 0.3761, "step": 27545 }, { "epoch": 0.8442442074292019, "grad_norm": 1.2285500262051907, "learning_rate": 6.228158256700407e-07, "loss": 0.5642, "step": 27546 }, { "epoch": 0.8442748559519431, "grad_norm": 1.2737418291851197, "learning_rate": 6.225759606716841e-07, "loss": 0.594, "step": 27547 }, { "epoch": 0.8443055044746843, "grad_norm": 1.2153136468609913, "learning_rate": 6.223361388052041e-07, "loss": 0.5167, "step": 27548 }, { "epoch": 0.8443361529974255, "grad_norm": 1.2442385542076952, "learning_rate": 6.22096360072969e-07, "loss": 0.5664, "step": 27549 }, { "epoch": 0.8443668015201667, "grad_norm": 1.251517466147103, "learning_rate": 6.218566244773383e-07, "loss": 0.5717, "step": 27550 }, { "epoch": 0.8443974500429079, "grad_norm": 0.4307107594369123, "learning_rate": 6.216169320206733e-07, "loss": 0.3801, "step": 27551 }, { "epoch": 0.8444280985656492, "grad_norm": 1.5090943587046626, "learning_rate": 6.213772827053366e-07, "loss": 0.5982, "step": 27552 }, { "epoch": 0.8444587470883903, "grad_norm": 1.2821819406291808, "learning_rate": 6.211376765336913e-07, "loss": 0.6335, "step": 27553 }, { "epoch": 0.8444893956111316, "grad_norm": 1.4466889176403945, "learning_rate": 6.20898113508096e-07, "loss": 0.5472, "step": 27554 }, { "epoch": 0.8445200441338727, "grad_norm": 1.3000847898062426, "learning_rate": 6.206585936309117e-07, "loss": 0.5695, "step": 27555 }, { "epoch": 0.844550692656614, "grad_norm": 1.5698230525940027, "learning_rate": 6.204191169044987e-07, "loss": 0.6798, "step": 27556 }, { "epoch": 0.8445813411793551, "grad_norm": 0.4247951641019828, "learning_rate": 6.201796833312179e-07, "loss": 0.3728, "step": 27557 }, { "epoch": 0.8446119897020964, "grad_norm": 1.454693515975355, "learning_rate": 6.199402929134273e-07, "loss": 0.5575, "step": 27558 }, { "epoch": 0.8446426382248375, "grad_norm": 1.2400976482012822, "learning_rate": 6.197009456534847e-07, "loss": 0.5615, "step": 27559 }, { "epoch": 0.8446732867475788, "grad_norm": 1.3907439691859476, "learning_rate": 6.194616415537496e-07, "loss": 0.6197, "step": 27560 }, { "epoch": 0.84470393527032, "grad_norm": 1.3162312267642702, "learning_rate": 6.192223806165809e-07, "loss": 0.5314, "step": 27561 }, { "epoch": 0.8447345837930612, "grad_norm": 1.374964504295594, "learning_rate": 6.189831628443333e-07, "loss": 0.5751, "step": 27562 }, { "epoch": 0.8447652323158024, "grad_norm": 1.3077592764577959, "learning_rate": 6.187439882393659e-07, "loss": 0.5638, "step": 27563 }, { "epoch": 0.8447958808385436, "grad_norm": 1.226457129110003, "learning_rate": 6.185048568040347e-07, "loss": 0.5853, "step": 27564 }, { "epoch": 0.8448265293612848, "grad_norm": 1.4450822680683089, "learning_rate": 6.182657685406979e-07, "loss": 0.6337, "step": 27565 }, { "epoch": 0.844857177884026, "grad_norm": 1.4053895314634801, "learning_rate": 6.18026723451709e-07, "loss": 0.6933, "step": 27566 }, { "epoch": 0.8448878264067672, "grad_norm": 1.4739937415231046, "learning_rate": 6.177877215394218e-07, "loss": 0.5763, "step": 27567 }, { "epoch": 0.8449184749295084, "grad_norm": 1.595581692615995, "learning_rate": 6.17548762806196e-07, "loss": 0.64, "step": 27568 }, { "epoch": 0.8449491234522496, "grad_norm": 1.4234288374778599, "learning_rate": 6.173098472543831e-07, "loss": 0.5561, "step": 27569 }, { "epoch": 0.8449797719749909, "grad_norm": 1.2604763751403207, "learning_rate": 6.170709748863368e-07, "loss": 0.5806, "step": 27570 }, { "epoch": 0.845010420497732, "grad_norm": 1.4579135601909172, "learning_rate": 6.168321457044119e-07, "loss": 0.5678, "step": 27571 }, { "epoch": 0.8450410690204733, "grad_norm": 1.3394359076335824, "learning_rate": 6.165933597109608e-07, "loss": 0.5706, "step": 27572 }, { "epoch": 0.8450717175432144, "grad_norm": 1.360128544319688, "learning_rate": 6.163546169083384e-07, "loss": 0.535, "step": 27573 }, { "epoch": 0.8451023660659556, "grad_norm": 1.3625840694382418, "learning_rate": 6.161159172988939e-07, "loss": 0.5308, "step": 27574 }, { "epoch": 0.8451330145886968, "grad_norm": 1.3749747622962891, "learning_rate": 6.158772608849817e-07, "loss": 0.5484, "step": 27575 }, { "epoch": 0.845163663111438, "grad_norm": 1.1909127700311875, "learning_rate": 6.156386476689529e-07, "loss": 0.5726, "step": 27576 }, { "epoch": 0.8451943116341792, "grad_norm": 0.4661301725790641, "learning_rate": 6.154000776531588e-07, "loss": 0.3736, "step": 27577 }, { "epoch": 0.8452249601569204, "grad_norm": 1.367234655504543, "learning_rate": 6.151615508399472e-07, "loss": 0.6732, "step": 27578 }, { "epoch": 0.8452556086796617, "grad_norm": 1.3477596668446048, "learning_rate": 6.149230672316731e-07, "loss": 0.6555, "step": 27579 }, { "epoch": 0.8452862572024028, "grad_norm": 1.4037549270932839, "learning_rate": 6.146846268306839e-07, "loss": 0.6187, "step": 27580 }, { "epoch": 0.8453169057251441, "grad_norm": 1.391462779835049, "learning_rate": 6.144462296393277e-07, "loss": 0.5687, "step": 27581 }, { "epoch": 0.8453475542478852, "grad_norm": 1.3489023437685148, "learning_rate": 6.142078756599551e-07, "loss": 0.6162, "step": 27582 }, { "epoch": 0.8453782027706265, "grad_norm": 1.4306549528037718, "learning_rate": 6.139695648949145e-07, "loss": 0.6162, "step": 27583 }, { "epoch": 0.8454088512933676, "grad_norm": 0.4098105092097837, "learning_rate": 6.137312973465553e-07, "loss": 0.3726, "step": 27584 }, { "epoch": 0.8454394998161089, "grad_norm": 1.5812071702065427, "learning_rate": 6.134930730172223e-07, "loss": 0.6523, "step": 27585 }, { "epoch": 0.84547014833885, "grad_norm": 1.3549259265926483, "learning_rate": 6.132548919092652e-07, "loss": 0.6981, "step": 27586 }, { "epoch": 0.8455007968615913, "grad_norm": 0.4272395444565684, "learning_rate": 6.130167540250304e-07, "loss": 0.3868, "step": 27587 }, { "epoch": 0.8455314453843324, "grad_norm": 1.3091856357174179, "learning_rate": 6.127786593668644e-07, "loss": 0.5013, "step": 27588 }, { "epoch": 0.8455620939070737, "grad_norm": 1.3491413385549014, "learning_rate": 6.125406079371104e-07, "loss": 0.6014, "step": 27589 }, { "epoch": 0.8455927424298149, "grad_norm": 1.2550391123497355, "learning_rate": 6.123025997381182e-07, "loss": 0.5431, "step": 27590 }, { "epoch": 0.8456233909525561, "grad_norm": 1.2802816894131979, "learning_rate": 6.120646347722304e-07, "loss": 0.6176, "step": 27591 }, { "epoch": 0.8456540394752973, "grad_norm": 1.299332258219843, "learning_rate": 6.118267130417938e-07, "loss": 0.5709, "step": 27592 }, { "epoch": 0.8456846879980385, "grad_norm": 1.5961302528954369, "learning_rate": 6.115888345491499e-07, "loss": 0.6098, "step": 27593 }, { "epoch": 0.8457153365207797, "grad_norm": 0.45801970503735406, "learning_rate": 6.113509992966443e-07, "loss": 0.4112, "step": 27594 }, { "epoch": 0.8457459850435209, "grad_norm": 1.2944877275456517, "learning_rate": 6.11113207286621e-07, "loss": 0.5416, "step": 27595 }, { "epoch": 0.8457766335662621, "grad_norm": 1.2846555700432185, "learning_rate": 6.108754585214211e-07, "loss": 0.6001, "step": 27596 }, { "epoch": 0.8458072820890034, "grad_norm": 1.379048119120304, "learning_rate": 6.106377530033885e-07, "loss": 0.575, "step": 27597 }, { "epoch": 0.8458379306117445, "grad_norm": 1.4988965711666502, "learning_rate": 6.104000907348662e-07, "loss": 0.6375, "step": 27598 }, { "epoch": 0.8458685791344858, "grad_norm": 1.4081606063980274, "learning_rate": 6.101624717181953e-07, "loss": 0.6325, "step": 27599 }, { "epoch": 0.8458992276572269, "grad_norm": 1.3793076864032965, "learning_rate": 6.099248959557141e-07, "loss": 0.561, "step": 27600 }, { "epoch": 0.8459298761799682, "grad_norm": 1.2983136763678287, "learning_rate": 6.096873634497685e-07, "loss": 0.6235, "step": 27601 }, { "epoch": 0.8459605247027093, "grad_norm": 0.46078001710075334, "learning_rate": 6.094498742026955e-07, "loss": 0.4161, "step": 27602 }, { "epoch": 0.8459911732254506, "grad_norm": 1.3665018799692812, "learning_rate": 6.092124282168377e-07, "loss": 0.5315, "step": 27603 }, { "epoch": 0.8460218217481917, "grad_norm": 1.3546146492059608, "learning_rate": 6.089750254945314e-07, "loss": 0.6529, "step": 27604 }, { "epoch": 0.8460524702709329, "grad_norm": 1.3307952290242604, "learning_rate": 6.087376660381183e-07, "loss": 0.627, "step": 27605 }, { "epoch": 0.8460831187936741, "grad_norm": 1.2820281122504105, "learning_rate": 6.085003498499376e-07, "loss": 0.494, "step": 27606 }, { "epoch": 0.8461137673164153, "grad_norm": 1.5879606997923337, "learning_rate": 6.08263076932325e-07, "loss": 0.5926, "step": 27607 }, { "epoch": 0.8461444158391566, "grad_norm": 0.4644985467536817, "learning_rate": 6.080258472876205e-07, "loss": 0.3773, "step": 27608 }, { "epoch": 0.8461750643618977, "grad_norm": 1.4210160260613975, "learning_rate": 6.077886609181621e-07, "loss": 0.6992, "step": 27609 }, { "epoch": 0.846205712884639, "grad_norm": 1.2840544022424996, "learning_rate": 6.075515178262841e-07, "loss": 0.6056, "step": 27610 }, { "epoch": 0.8462363614073801, "grad_norm": 1.1824983908919071, "learning_rate": 6.073144180143265e-07, "loss": 0.5787, "step": 27611 }, { "epoch": 0.8462670099301214, "grad_norm": 1.319863462242611, "learning_rate": 6.070773614846226e-07, "loss": 0.562, "step": 27612 }, { "epoch": 0.8462976584528625, "grad_norm": 1.3595277802155257, "learning_rate": 6.068403482395097e-07, "loss": 0.6051, "step": 27613 }, { "epoch": 0.8463283069756038, "grad_norm": 1.217516358803818, "learning_rate": 6.066033782813241e-07, "loss": 0.6433, "step": 27614 }, { "epoch": 0.846358955498345, "grad_norm": 1.3927105773096349, "learning_rate": 6.063664516123979e-07, "loss": 0.6006, "step": 27615 }, { "epoch": 0.8463896040210862, "grad_norm": 1.2121170752587267, "learning_rate": 6.061295682350676e-07, "loss": 0.5177, "step": 27616 }, { "epoch": 0.8464202525438274, "grad_norm": 1.3788838807433565, "learning_rate": 6.058927281516675e-07, "loss": 0.5895, "step": 27617 }, { "epoch": 0.8464509010665686, "grad_norm": 1.3706724752265937, "learning_rate": 6.056559313645299e-07, "loss": 0.5681, "step": 27618 }, { "epoch": 0.8464815495893098, "grad_norm": 1.2941232935098026, "learning_rate": 6.054191778759888e-07, "loss": 0.6075, "step": 27619 }, { "epoch": 0.846512198112051, "grad_norm": 1.4247779142935517, "learning_rate": 6.051824676883777e-07, "loss": 0.5838, "step": 27620 }, { "epoch": 0.8465428466347922, "grad_norm": 1.5513414058374602, "learning_rate": 6.049458008040276e-07, "loss": 0.6927, "step": 27621 }, { "epoch": 0.8465734951575334, "grad_norm": 1.3977571216704605, "learning_rate": 6.047091772252716e-07, "loss": 0.546, "step": 27622 }, { "epoch": 0.8466041436802746, "grad_norm": 1.4251954892812855, "learning_rate": 6.044725969544401e-07, "loss": 0.6234, "step": 27623 }, { "epoch": 0.8466347922030159, "grad_norm": 0.44991238843505527, "learning_rate": 6.042360599938646e-07, "loss": 0.3759, "step": 27624 }, { "epoch": 0.846665440725757, "grad_norm": 0.46086489151339455, "learning_rate": 6.039995663458765e-07, "loss": 0.3982, "step": 27625 }, { "epoch": 0.8466960892484983, "grad_norm": 1.1680558290575864, "learning_rate": 6.037631160128049e-07, "loss": 0.6088, "step": 27626 }, { "epoch": 0.8467267377712394, "grad_norm": 1.3384550956600998, "learning_rate": 6.035267089969798e-07, "loss": 0.5082, "step": 27627 }, { "epoch": 0.8467573862939807, "grad_norm": 1.5139138332536617, "learning_rate": 6.032903453007322e-07, "loss": 0.7068, "step": 27628 }, { "epoch": 0.8467880348167218, "grad_norm": 0.44817271438784634, "learning_rate": 6.030540249263889e-07, "loss": 0.3838, "step": 27629 }, { "epoch": 0.8468186833394631, "grad_norm": 1.3049395620895243, "learning_rate": 6.028177478762793e-07, "loss": 0.586, "step": 27630 }, { "epoch": 0.8468493318622042, "grad_norm": 1.3166495638493634, "learning_rate": 6.025815141527319e-07, "loss": 0.51, "step": 27631 }, { "epoch": 0.8468799803849455, "grad_norm": 1.2945307125815888, "learning_rate": 6.023453237580734e-07, "loss": 0.5113, "step": 27632 }, { "epoch": 0.8469106289076866, "grad_norm": 1.4392218027921608, "learning_rate": 6.021091766946329e-07, "loss": 0.5925, "step": 27633 }, { "epoch": 0.8469412774304279, "grad_norm": 1.4680053330174196, "learning_rate": 6.018730729647343e-07, "loss": 0.646, "step": 27634 }, { "epoch": 0.8469719259531691, "grad_norm": 1.3395597260659906, "learning_rate": 6.01637012570706e-07, "loss": 0.5771, "step": 27635 }, { "epoch": 0.8470025744759102, "grad_norm": 1.3745546962100739, "learning_rate": 6.014009955148748e-07, "loss": 0.637, "step": 27636 }, { "epoch": 0.8470332229986515, "grad_norm": 1.392915481421158, "learning_rate": 6.011650217995634e-07, "loss": 0.6376, "step": 27637 }, { "epoch": 0.8470638715213926, "grad_norm": 0.45362575428795765, "learning_rate": 6.009290914270993e-07, "loss": 0.3908, "step": 27638 }, { "epoch": 0.8470945200441339, "grad_norm": 1.206922347876401, "learning_rate": 6.00693204399807e-07, "loss": 0.497, "step": 27639 }, { "epoch": 0.847125168566875, "grad_norm": 1.4160914499044288, "learning_rate": 6.004573607200087e-07, "loss": 0.6579, "step": 27640 }, { "epoch": 0.8471558170896163, "grad_norm": 1.3570893912582942, "learning_rate": 6.002215603900302e-07, "loss": 0.6416, "step": 27641 }, { "epoch": 0.8471864656123574, "grad_norm": 1.288670747349475, "learning_rate": 5.999858034121958e-07, "loss": 0.6414, "step": 27642 }, { "epoch": 0.8472171141350987, "grad_norm": 0.42280537358776565, "learning_rate": 5.997500897888253e-07, "loss": 0.3736, "step": 27643 }, { "epoch": 0.8472477626578399, "grad_norm": 1.3388966593477838, "learning_rate": 5.995144195222447e-07, "loss": 0.4916, "step": 27644 }, { "epoch": 0.8472784111805811, "grad_norm": 1.3686499698231578, "learning_rate": 5.992787926147731e-07, "loss": 0.5992, "step": 27645 }, { "epoch": 0.8473090597033223, "grad_norm": 1.2491921216286486, "learning_rate": 5.990432090687332e-07, "loss": 0.6513, "step": 27646 }, { "epoch": 0.8473397082260635, "grad_norm": 1.2982956664217964, "learning_rate": 5.98807668886448e-07, "loss": 0.5131, "step": 27647 }, { "epoch": 0.8473703567488047, "grad_norm": 1.2626189695923156, "learning_rate": 5.985721720702359e-07, "loss": 0.5567, "step": 27648 }, { "epoch": 0.8474010052715459, "grad_norm": 0.47846381365614515, "learning_rate": 5.983367186224182e-07, "loss": 0.4142, "step": 27649 }, { "epoch": 0.8474316537942871, "grad_norm": 1.265615535835182, "learning_rate": 5.981013085453158e-07, "loss": 0.5365, "step": 27650 }, { "epoch": 0.8474623023170283, "grad_norm": 0.4478915323199312, "learning_rate": 5.978659418412469e-07, "loss": 0.3928, "step": 27651 }, { "epoch": 0.8474929508397695, "grad_norm": 1.2258404918193033, "learning_rate": 5.976306185125314e-07, "loss": 0.5641, "step": 27652 }, { "epoch": 0.8475235993625108, "grad_norm": 1.3249445833600777, "learning_rate": 5.973953385614883e-07, "loss": 0.5703, "step": 27653 }, { "epoch": 0.8475542478852519, "grad_norm": 1.5250745965751853, "learning_rate": 5.971601019904344e-07, "loss": 0.6596, "step": 27654 }, { "epoch": 0.8475848964079932, "grad_norm": 1.1974275720822967, "learning_rate": 5.969249088016899e-07, "loss": 0.591, "step": 27655 }, { "epoch": 0.8476155449307343, "grad_norm": 1.3208319394286905, "learning_rate": 5.966897589975695e-07, "loss": 0.5859, "step": 27656 }, { "epoch": 0.8476461934534756, "grad_norm": 0.42958694904398387, "learning_rate": 5.964546525803916e-07, "loss": 0.3876, "step": 27657 }, { "epoch": 0.8476768419762167, "grad_norm": 1.382310059085222, "learning_rate": 5.962195895524742e-07, "loss": 0.6161, "step": 27658 }, { "epoch": 0.847707490498958, "grad_norm": 1.374577627875036, "learning_rate": 5.959845699161304e-07, "loss": 0.6393, "step": 27659 }, { "epoch": 0.8477381390216991, "grad_norm": 0.4497899305966744, "learning_rate": 5.957495936736774e-07, "loss": 0.3836, "step": 27660 }, { "epoch": 0.8477687875444404, "grad_norm": 1.3548447941605124, "learning_rate": 5.955146608274321e-07, "loss": 0.6242, "step": 27661 }, { "epoch": 0.8477994360671816, "grad_norm": 1.3634756605710925, "learning_rate": 5.952797713797065e-07, "loss": 0.5783, "step": 27662 }, { "epoch": 0.8478300845899228, "grad_norm": 0.41411226407580454, "learning_rate": 5.950449253328172e-07, "loss": 0.378, "step": 27663 }, { "epoch": 0.847860733112664, "grad_norm": 0.44986391436054995, "learning_rate": 5.948101226890752e-07, "loss": 0.3835, "step": 27664 }, { "epoch": 0.8478913816354052, "grad_norm": 1.3994265251297437, "learning_rate": 5.945753634507983e-07, "loss": 0.5966, "step": 27665 }, { "epoch": 0.8479220301581464, "grad_norm": 1.1251568164907322, "learning_rate": 5.943406476202973e-07, "loss": 0.5505, "step": 27666 }, { "epoch": 0.8479526786808875, "grad_norm": 0.4519124425851746, "learning_rate": 5.941059751998846e-07, "loss": 0.3881, "step": 27667 }, { "epoch": 0.8479833272036288, "grad_norm": 1.216989178154445, "learning_rate": 5.938713461918727e-07, "loss": 0.5814, "step": 27668 }, { "epoch": 0.8480139757263699, "grad_norm": 1.399992393060292, "learning_rate": 5.936367605985749e-07, "loss": 0.6314, "step": 27669 }, { "epoch": 0.8480446242491112, "grad_norm": 1.351629556562131, "learning_rate": 5.934022184223004e-07, "loss": 0.5822, "step": 27670 }, { "epoch": 0.8480752727718524, "grad_norm": 0.46010905992489504, "learning_rate": 5.931677196653612e-07, "loss": 0.4222, "step": 27671 }, { "epoch": 0.8481059212945936, "grad_norm": 0.4530808270398782, "learning_rate": 5.929332643300683e-07, "loss": 0.3949, "step": 27672 }, { "epoch": 0.8481365698173348, "grad_norm": 1.2777529027565262, "learning_rate": 5.926988524187327e-07, "loss": 0.6182, "step": 27673 }, { "epoch": 0.848167218340076, "grad_norm": 1.38090181979719, "learning_rate": 5.924644839336625e-07, "loss": 0.578, "step": 27674 }, { "epoch": 0.8481978668628172, "grad_norm": 1.3395671272433338, "learning_rate": 5.922301588771657e-07, "loss": 0.5697, "step": 27675 }, { "epoch": 0.8482285153855584, "grad_norm": 1.5522883570626649, "learning_rate": 5.919958772515549e-07, "loss": 0.6921, "step": 27676 }, { "epoch": 0.8482591639082996, "grad_norm": 1.3569993829034803, "learning_rate": 5.917616390591363e-07, "loss": 0.6116, "step": 27677 }, { "epoch": 0.8482898124310408, "grad_norm": 1.2811274513500466, "learning_rate": 5.915274443022179e-07, "loss": 0.5783, "step": 27678 }, { "epoch": 0.848320460953782, "grad_norm": 1.1904011903007563, "learning_rate": 5.912932929831066e-07, "loss": 0.5968, "step": 27679 }, { "epoch": 0.8483511094765233, "grad_norm": 1.53749058047642, "learning_rate": 5.91059185104112e-07, "loss": 0.5898, "step": 27680 }, { "epoch": 0.8483817579992644, "grad_norm": 1.5723031427997463, "learning_rate": 5.908251206675386e-07, "loss": 0.571, "step": 27681 }, { "epoch": 0.8484124065220057, "grad_norm": 1.29027402787324, "learning_rate": 5.90591099675693e-07, "loss": 0.6431, "step": 27682 }, { "epoch": 0.8484430550447468, "grad_norm": 1.2236395402811469, "learning_rate": 5.903571221308813e-07, "loss": 0.631, "step": 27683 }, { "epoch": 0.8484737035674881, "grad_norm": 1.3428217143698131, "learning_rate": 5.901231880354108e-07, "loss": 0.5955, "step": 27684 }, { "epoch": 0.8485043520902292, "grad_norm": 1.1359048985408031, "learning_rate": 5.898892973915843e-07, "loss": 0.5989, "step": 27685 }, { "epoch": 0.8485350006129705, "grad_norm": 1.3242035598262942, "learning_rate": 5.896554502017049e-07, "loss": 0.5933, "step": 27686 }, { "epoch": 0.8485656491357116, "grad_norm": 1.331545559475532, "learning_rate": 5.894216464680813e-07, "loss": 0.5992, "step": 27687 }, { "epoch": 0.8485962976584529, "grad_norm": 1.4254507410292756, "learning_rate": 5.891878861930139e-07, "loss": 0.562, "step": 27688 }, { "epoch": 0.848626946181194, "grad_norm": 0.44054288884569076, "learning_rate": 5.889541693788064e-07, "loss": 0.3912, "step": 27689 }, { "epoch": 0.8486575947039353, "grad_norm": 1.435087565729186, "learning_rate": 5.88720496027762e-07, "loss": 0.5388, "step": 27690 }, { "epoch": 0.8486882432266765, "grad_norm": 1.3589693807715812, "learning_rate": 5.884868661421833e-07, "loss": 0.6169, "step": 27691 }, { "epoch": 0.8487188917494177, "grad_norm": 1.4088129429727672, "learning_rate": 5.882532797243734e-07, "loss": 0.5351, "step": 27692 }, { "epoch": 0.8487495402721589, "grad_norm": 1.3697190754344262, "learning_rate": 5.88019736776631e-07, "loss": 0.6133, "step": 27693 }, { "epoch": 0.8487801887949001, "grad_norm": 1.2933173846360981, "learning_rate": 5.877862373012599e-07, "loss": 0.4566, "step": 27694 }, { "epoch": 0.8488108373176413, "grad_norm": 1.2686701584141342, "learning_rate": 5.875527813005604e-07, "loss": 0.6197, "step": 27695 }, { "epoch": 0.8488414858403825, "grad_norm": 1.518871578324658, "learning_rate": 5.873193687768325e-07, "loss": 0.5977, "step": 27696 }, { "epoch": 0.8488721343631237, "grad_norm": 1.4171745967241525, "learning_rate": 5.870859997323746e-07, "loss": 0.5738, "step": 27697 }, { "epoch": 0.8489027828858648, "grad_norm": 1.2105502565874795, "learning_rate": 5.868526741694875e-07, "loss": 0.6655, "step": 27698 }, { "epoch": 0.8489334314086061, "grad_norm": 1.661013040855178, "learning_rate": 5.866193920904706e-07, "loss": 0.6721, "step": 27699 }, { "epoch": 0.8489640799313473, "grad_norm": 1.4230813887271512, "learning_rate": 5.863861534976228e-07, "loss": 0.6808, "step": 27700 }, { "epoch": 0.8489947284540885, "grad_norm": 1.3584974688970006, "learning_rate": 5.861529583932402e-07, "loss": 0.5991, "step": 27701 }, { "epoch": 0.8490253769768297, "grad_norm": 1.4760499739228867, "learning_rate": 5.859198067796218e-07, "loss": 0.7326, "step": 27702 }, { "epoch": 0.8490560254995709, "grad_norm": 1.7187376289225778, "learning_rate": 5.856866986590665e-07, "loss": 0.6435, "step": 27703 }, { "epoch": 0.8490866740223121, "grad_norm": 0.45781309446577745, "learning_rate": 5.854536340338685e-07, "loss": 0.3897, "step": 27704 }, { "epoch": 0.8491173225450533, "grad_norm": 1.3170624487465732, "learning_rate": 5.852206129063248e-07, "loss": 0.6109, "step": 27705 }, { "epoch": 0.8491479710677945, "grad_norm": 1.4401800811337446, "learning_rate": 5.849876352787337e-07, "loss": 0.651, "step": 27706 }, { "epoch": 0.8491786195905358, "grad_norm": 1.1465859564191694, "learning_rate": 5.847547011533882e-07, "loss": 0.5101, "step": 27707 }, { "epoch": 0.8492092681132769, "grad_norm": 1.4798059238890955, "learning_rate": 5.845218105325839e-07, "loss": 0.6466, "step": 27708 }, { "epoch": 0.8492399166360182, "grad_norm": 1.3897293584325148, "learning_rate": 5.842889634186161e-07, "loss": 0.6575, "step": 27709 }, { "epoch": 0.8492705651587593, "grad_norm": 0.45670467718400737, "learning_rate": 5.840561598137784e-07, "loss": 0.3979, "step": 27710 }, { "epoch": 0.8493012136815006, "grad_norm": 1.3390248296973695, "learning_rate": 5.838233997203668e-07, "loss": 0.5845, "step": 27711 }, { "epoch": 0.8493318622042417, "grad_norm": 1.5882900068853245, "learning_rate": 5.835906831406718e-07, "loss": 0.6401, "step": 27712 }, { "epoch": 0.849362510726983, "grad_norm": 1.5865607581474377, "learning_rate": 5.833580100769881e-07, "loss": 0.6206, "step": 27713 }, { "epoch": 0.8493931592497241, "grad_norm": 0.4516935160070642, "learning_rate": 5.831253805316084e-07, "loss": 0.3956, "step": 27714 }, { "epoch": 0.8494238077724654, "grad_norm": 0.4292448174174833, "learning_rate": 5.828927945068252e-07, "loss": 0.3764, "step": 27715 }, { "epoch": 0.8494544562952066, "grad_norm": 1.2230210003576059, "learning_rate": 5.826602520049268e-07, "loss": 0.5548, "step": 27716 }, { "epoch": 0.8494851048179478, "grad_norm": 1.5955747893016363, "learning_rate": 5.824277530282096e-07, "loss": 0.648, "step": 27717 }, { "epoch": 0.849515753340689, "grad_norm": 1.1944847813307355, "learning_rate": 5.821952975789608e-07, "loss": 0.4955, "step": 27718 }, { "epoch": 0.8495464018634302, "grad_norm": 1.4640577027768957, "learning_rate": 5.819628856594733e-07, "loss": 0.5423, "step": 27719 }, { "epoch": 0.8495770503861714, "grad_norm": 1.3080760366562576, "learning_rate": 5.817305172720344e-07, "loss": 0.5221, "step": 27720 }, { "epoch": 0.8496076989089126, "grad_norm": 0.4635024140896345, "learning_rate": 5.814981924189356e-07, "loss": 0.3927, "step": 27721 }, { "epoch": 0.8496383474316538, "grad_norm": 1.3653375396537153, "learning_rate": 5.812659111024666e-07, "loss": 0.6517, "step": 27722 }, { "epoch": 0.849668995954395, "grad_norm": 1.2612323139692108, "learning_rate": 5.810336733249139e-07, "loss": 0.4883, "step": 27723 }, { "epoch": 0.8496996444771362, "grad_norm": 0.45583787503373785, "learning_rate": 5.808014790885674e-07, "loss": 0.4116, "step": 27724 }, { "epoch": 0.8497302929998775, "grad_norm": 1.3842202957786414, "learning_rate": 5.805693283957154e-07, "loss": 0.5586, "step": 27725 }, { "epoch": 0.8497609415226186, "grad_norm": 1.3937398257288953, "learning_rate": 5.803372212486436e-07, "loss": 0.5973, "step": 27726 }, { "epoch": 0.8497915900453599, "grad_norm": 1.3066947647592908, "learning_rate": 5.801051576496402e-07, "loss": 0.5938, "step": 27727 }, { "epoch": 0.849822238568101, "grad_norm": 1.3726137537744383, "learning_rate": 5.798731376009925e-07, "loss": 0.5872, "step": 27728 }, { "epoch": 0.8498528870908422, "grad_norm": 1.4314366241787093, "learning_rate": 5.796411611049846e-07, "loss": 0.572, "step": 27729 }, { "epoch": 0.8498835356135834, "grad_norm": 1.572837908303401, "learning_rate": 5.794092281639041e-07, "loss": 0.592, "step": 27730 }, { "epoch": 0.8499141841363246, "grad_norm": 1.2984975526038518, "learning_rate": 5.791773387800348e-07, "loss": 0.6263, "step": 27731 }, { "epoch": 0.8499448326590658, "grad_norm": 1.2332101030968348, "learning_rate": 5.78945492955662e-07, "loss": 0.5817, "step": 27732 }, { "epoch": 0.849975481181807, "grad_norm": 1.3913498662065893, "learning_rate": 5.787136906930719e-07, "loss": 0.6129, "step": 27733 }, { "epoch": 0.8500061297045483, "grad_norm": 0.45035182761785897, "learning_rate": 5.784819319945456e-07, "loss": 0.3766, "step": 27734 }, { "epoch": 0.8500367782272894, "grad_norm": 1.328405836385254, "learning_rate": 5.782502168623688e-07, "loss": 0.5535, "step": 27735 }, { "epoch": 0.8500674267500307, "grad_norm": 1.2196272123695895, "learning_rate": 5.780185452988241e-07, "loss": 0.6139, "step": 27736 }, { "epoch": 0.8500980752727718, "grad_norm": 1.2525789555847437, "learning_rate": 5.777869173061939e-07, "loss": 0.6374, "step": 27737 }, { "epoch": 0.8501287237955131, "grad_norm": 1.210415942017628, "learning_rate": 5.7755533288676e-07, "loss": 0.5825, "step": 27738 }, { "epoch": 0.8501593723182542, "grad_norm": 1.1775165451788383, "learning_rate": 5.773237920428065e-07, "loss": 0.6359, "step": 27739 }, { "epoch": 0.8501900208409955, "grad_norm": 1.2768844187581119, "learning_rate": 5.770922947766116e-07, "loss": 0.5825, "step": 27740 }, { "epoch": 0.8502206693637366, "grad_norm": 1.4902717620919, "learning_rate": 5.768608410904597e-07, "loss": 0.5553, "step": 27741 }, { "epoch": 0.8502513178864779, "grad_norm": 1.2552839312611128, "learning_rate": 5.766294309866283e-07, "loss": 0.6134, "step": 27742 }, { "epoch": 0.850281966409219, "grad_norm": 1.2177656809638993, "learning_rate": 5.763980644673989e-07, "loss": 0.5539, "step": 27743 }, { "epoch": 0.8503126149319603, "grad_norm": 1.604791222118447, "learning_rate": 5.761667415350519e-07, "loss": 0.5802, "step": 27744 }, { "epoch": 0.8503432634547015, "grad_norm": 1.6029912921636849, "learning_rate": 5.75935462191865e-07, "loss": 0.6432, "step": 27745 }, { "epoch": 0.8503739119774427, "grad_norm": 0.4433416834465471, "learning_rate": 5.757042264401186e-07, "loss": 0.404, "step": 27746 }, { "epoch": 0.8504045605001839, "grad_norm": 1.4503451769737437, "learning_rate": 5.754730342820908e-07, "loss": 0.5443, "step": 27747 }, { "epoch": 0.8504352090229251, "grad_norm": 1.475558048476782, "learning_rate": 5.752418857200582e-07, "loss": 0.5909, "step": 27748 }, { "epoch": 0.8504658575456663, "grad_norm": 1.3195624909671502, "learning_rate": 5.75010780756301e-07, "loss": 0.671, "step": 27749 }, { "epoch": 0.8504965060684075, "grad_norm": 1.3727072173040271, "learning_rate": 5.747797193930932e-07, "loss": 0.6711, "step": 27750 }, { "epoch": 0.8505271545911487, "grad_norm": 1.465689717703273, "learning_rate": 5.745487016327134e-07, "loss": 0.6075, "step": 27751 }, { "epoch": 0.85055780311389, "grad_norm": 1.3464626563972897, "learning_rate": 5.74317727477438e-07, "loss": 0.5829, "step": 27752 }, { "epoch": 0.8505884516366311, "grad_norm": 1.3691188238098284, "learning_rate": 5.740867969295422e-07, "loss": 0.6013, "step": 27753 }, { "epoch": 0.8506191001593724, "grad_norm": 1.4319279866224603, "learning_rate": 5.73855909991301e-07, "loss": 0.5804, "step": 27754 }, { "epoch": 0.8506497486821135, "grad_norm": 1.7003380500045402, "learning_rate": 5.736250666649911e-07, "loss": 0.6872, "step": 27755 }, { "epoch": 0.8506803972048548, "grad_norm": 1.3737747947018897, "learning_rate": 5.733942669528852e-07, "loss": 0.5632, "step": 27756 }, { "epoch": 0.8507110457275959, "grad_norm": 1.4289371309438985, "learning_rate": 5.731635108572581e-07, "loss": 0.6314, "step": 27757 }, { "epoch": 0.8507416942503372, "grad_norm": 0.4623571288308893, "learning_rate": 5.729327983803845e-07, "loss": 0.3776, "step": 27758 }, { "epoch": 0.8507723427730783, "grad_norm": 1.2073060984521746, "learning_rate": 5.727021295245356e-07, "loss": 0.5378, "step": 27759 }, { "epoch": 0.8508029912958195, "grad_norm": 1.4155433484819453, "learning_rate": 5.724715042919865e-07, "loss": 0.5987, "step": 27760 }, { "epoch": 0.8508336398185607, "grad_norm": 1.2527058440975474, "learning_rate": 5.722409226850078e-07, "loss": 0.4997, "step": 27761 }, { "epoch": 0.8508642883413019, "grad_norm": 1.1903595030295557, "learning_rate": 5.720103847058717e-07, "loss": 0.5913, "step": 27762 }, { "epoch": 0.8508949368640432, "grad_norm": 1.374788904599195, "learning_rate": 5.717798903568517e-07, "loss": 0.5701, "step": 27763 }, { "epoch": 0.8509255853867843, "grad_norm": 1.442135014522179, "learning_rate": 5.71549439640216e-07, "loss": 0.6687, "step": 27764 }, { "epoch": 0.8509562339095256, "grad_norm": 1.4997264376003454, "learning_rate": 5.713190325582374e-07, "loss": 0.6565, "step": 27765 }, { "epoch": 0.8509868824322667, "grad_norm": 1.286105142212265, "learning_rate": 5.710886691131856e-07, "loss": 0.6126, "step": 27766 }, { "epoch": 0.851017530955008, "grad_norm": 1.2972520145571536, "learning_rate": 5.708583493073299e-07, "loss": 0.6741, "step": 27767 }, { "epoch": 0.8510481794777491, "grad_norm": 1.240018961184876, "learning_rate": 5.706280731429404e-07, "loss": 0.559, "step": 27768 }, { "epoch": 0.8510788280004904, "grad_norm": 1.1958148863742208, "learning_rate": 5.70397840622286e-07, "loss": 0.5556, "step": 27769 }, { "epoch": 0.8511094765232315, "grad_norm": 1.250457276192844, "learning_rate": 5.701676517476345e-07, "loss": 0.5416, "step": 27770 }, { "epoch": 0.8511401250459728, "grad_norm": 1.2513337925766281, "learning_rate": 5.699375065212553e-07, "loss": 0.5786, "step": 27771 }, { "epoch": 0.851170773568714, "grad_norm": 1.3388711437283396, "learning_rate": 5.697074049454138e-07, "loss": 0.5987, "step": 27772 }, { "epoch": 0.8512014220914552, "grad_norm": 1.2854850683759538, "learning_rate": 5.694773470223807e-07, "loss": 0.6061, "step": 27773 }, { "epoch": 0.8512320706141964, "grad_norm": 1.4599287942945605, "learning_rate": 5.692473327544206e-07, "loss": 0.6186, "step": 27774 }, { "epoch": 0.8512627191369376, "grad_norm": 1.4195815270206793, "learning_rate": 5.690173621437995e-07, "loss": 0.5744, "step": 27775 }, { "epoch": 0.8512933676596788, "grad_norm": 1.2308854741872388, "learning_rate": 5.687874351927835e-07, "loss": 0.5901, "step": 27776 }, { "epoch": 0.85132401618242, "grad_norm": 0.4420291902845707, "learning_rate": 5.685575519036402e-07, "loss": 0.3848, "step": 27777 }, { "epoch": 0.8513546647051612, "grad_norm": 1.32537048755221, "learning_rate": 5.683277122786318e-07, "loss": 0.6169, "step": 27778 }, { "epoch": 0.8513853132279025, "grad_norm": 1.2339225263378193, "learning_rate": 5.680979163200246e-07, "loss": 0.5768, "step": 27779 }, { "epoch": 0.8514159617506436, "grad_norm": 1.4243051184548794, "learning_rate": 5.678681640300837e-07, "loss": 0.6762, "step": 27780 }, { "epoch": 0.8514466102733849, "grad_norm": 1.352641999313135, "learning_rate": 5.676384554110703e-07, "loss": 0.5991, "step": 27781 }, { "epoch": 0.851477258796126, "grad_norm": 0.44748639553776326, "learning_rate": 5.674087904652509e-07, "loss": 0.3999, "step": 27782 }, { "epoch": 0.8515079073188673, "grad_norm": 1.4504580603531363, "learning_rate": 5.671791691948842e-07, "loss": 0.6616, "step": 27783 }, { "epoch": 0.8515385558416084, "grad_norm": 1.3076027607351046, "learning_rate": 5.669495916022377e-07, "loss": 0.6003, "step": 27784 }, { "epoch": 0.8515692043643497, "grad_norm": 1.3661728649573792, "learning_rate": 5.667200576895709e-07, "loss": 0.5765, "step": 27785 }, { "epoch": 0.8515998528870908, "grad_norm": 0.4414071310765432, "learning_rate": 5.664905674591448e-07, "loss": 0.4088, "step": 27786 }, { "epoch": 0.8516305014098321, "grad_norm": 1.4547670389122178, "learning_rate": 5.662611209132219e-07, "loss": 0.6175, "step": 27787 }, { "epoch": 0.8516611499325732, "grad_norm": 1.2757756983804691, "learning_rate": 5.660317180540631e-07, "loss": 0.6075, "step": 27788 }, { "epoch": 0.8516917984553145, "grad_norm": 1.6066893828911544, "learning_rate": 5.658023588839273e-07, "loss": 0.7175, "step": 27789 }, { "epoch": 0.8517224469780557, "grad_norm": 1.4347934226254135, "learning_rate": 5.655730434050755e-07, "loss": 0.6385, "step": 27790 }, { "epoch": 0.8517530955007968, "grad_norm": 1.3709429623582856, "learning_rate": 5.653437716197669e-07, "loss": 0.6379, "step": 27791 }, { "epoch": 0.8517837440235381, "grad_norm": 1.290158728492238, "learning_rate": 5.651145435302618e-07, "loss": 0.5793, "step": 27792 }, { "epoch": 0.8518143925462792, "grad_norm": 1.453184952012686, "learning_rate": 5.648853591388181e-07, "loss": 0.6702, "step": 27793 }, { "epoch": 0.8518450410690205, "grad_norm": 1.3880626833278924, "learning_rate": 5.646562184476928e-07, "loss": 0.6441, "step": 27794 }, { "epoch": 0.8518756895917616, "grad_norm": 1.296228436230317, "learning_rate": 5.644271214591446e-07, "loss": 0.5833, "step": 27795 }, { "epoch": 0.8519063381145029, "grad_norm": 0.44426084620831185, "learning_rate": 5.641980681754317e-07, "loss": 0.3993, "step": 27796 }, { "epoch": 0.851936986637244, "grad_norm": 1.4604460711030653, "learning_rate": 5.639690585988089e-07, "loss": 0.6039, "step": 27797 }, { "epoch": 0.8519676351599853, "grad_norm": 0.45379926118991476, "learning_rate": 5.637400927315339e-07, "loss": 0.3886, "step": 27798 }, { "epoch": 0.8519982836827265, "grad_norm": 1.3779136492340234, "learning_rate": 5.635111705758633e-07, "loss": 0.6666, "step": 27799 }, { "epoch": 0.8520289322054677, "grad_norm": 1.4796765644921734, "learning_rate": 5.63282292134053e-07, "loss": 0.6923, "step": 27800 }, { "epoch": 0.8520595807282089, "grad_norm": 1.494355178606516, "learning_rate": 5.63053457408358e-07, "loss": 0.6068, "step": 27801 }, { "epoch": 0.8520902292509501, "grad_norm": 1.4352884896207316, "learning_rate": 5.6282466640103e-07, "loss": 0.6771, "step": 27802 }, { "epoch": 0.8521208777736913, "grad_norm": 1.5127834675724416, "learning_rate": 5.625959191143277e-07, "loss": 0.5381, "step": 27803 }, { "epoch": 0.8521515262964325, "grad_norm": 1.2721717903579304, "learning_rate": 5.623672155505038e-07, "loss": 0.5465, "step": 27804 }, { "epoch": 0.8521821748191737, "grad_norm": 1.507516515038443, "learning_rate": 5.621385557118097e-07, "loss": 0.5749, "step": 27805 }, { "epoch": 0.852212823341915, "grad_norm": 1.4568601555179108, "learning_rate": 5.619099396004996e-07, "loss": 0.6296, "step": 27806 }, { "epoch": 0.8522434718646561, "grad_norm": 1.3498188798412483, "learning_rate": 5.616813672188281e-07, "loss": 0.6771, "step": 27807 }, { "epoch": 0.8522741203873974, "grad_norm": 1.2865983133928924, "learning_rate": 5.614528385690443e-07, "loss": 0.5766, "step": 27808 }, { "epoch": 0.8523047689101385, "grad_norm": 1.4496682910162815, "learning_rate": 5.612243536534012e-07, "loss": 0.6193, "step": 27809 }, { "epoch": 0.8523354174328798, "grad_norm": 1.4585527131738907, "learning_rate": 5.609959124741504e-07, "loss": 0.6102, "step": 27810 }, { "epoch": 0.8523660659556209, "grad_norm": 1.6534715720575153, "learning_rate": 5.60767515033544e-07, "loss": 0.6377, "step": 27811 }, { "epoch": 0.8523967144783622, "grad_norm": 1.4175254668481696, "learning_rate": 5.605391613338307e-07, "loss": 0.5848, "step": 27812 }, { "epoch": 0.8524273630011033, "grad_norm": 1.5543992501768706, "learning_rate": 5.603108513772587e-07, "loss": 0.6268, "step": 27813 }, { "epoch": 0.8524580115238446, "grad_norm": 1.4232178313569732, "learning_rate": 5.600825851660824e-07, "loss": 0.6351, "step": 27814 }, { "epoch": 0.8524886600465857, "grad_norm": 0.4340270902392345, "learning_rate": 5.598543627025483e-07, "loss": 0.3705, "step": 27815 }, { "epoch": 0.852519308569327, "grad_norm": 1.3770459934101063, "learning_rate": 5.596261839889039e-07, "loss": 0.603, "step": 27816 }, { "epoch": 0.8525499570920682, "grad_norm": 1.6622074226403591, "learning_rate": 5.593980490273987e-07, "loss": 0.6645, "step": 27817 }, { "epoch": 0.8525806056148094, "grad_norm": 1.386575151779242, "learning_rate": 5.591699578202808e-07, "loss": 0.6371, "step": 27818 }, { "epoch": 0.8526112541375506, "grad_norm": 1.442477782597746, "learning_rate": 5.589419103697991e-07, "loss": 0.6536, "step": 27819 }, { "epoch": 0.8526419026602918, "grad_norm": 1.3417931854929896, "learning_rate": 5.587139066781977e-07, "loss": 0.5924, "step": 27820 }, { "epoch": 0.852672551183033, "grad_norm": 1.2009733260993145, "learning_rate": 5.584859467477243e-07, "loss": 0.5454, "step": 27821 }, { "epoch": 0.8527031997057741, "grad_norm": 1.3557488901059724, "learning_rate": 5.582580305806262e-07, "loss": 0.5902, "step": 27822 }, { "epoch": 0.8527338482285154, "grad_norm": 1.3267690489947062, "learning_rate": 5.580301581791487e-07, "loss": 0.595, "step": 27823 }, { "epoch": 0.8527644967512565, "grad_norm": 1.3486380237664761, "learning_rate": 5.578023295455343e-07, "loss": 0.5409, "step": 27824 }, { "epoch": 0.8527951452739978, "grad_norm": 1.4390072080033687, "learning_rate": 5.575745446820325e-07, "loss": 0.6375, "step": 27825 }, { "epoch": 0.852825793796739, "grad_norm": 1.489902449741047, "learning_rate": 5.573468035908835e-07, "loss": 0.6086, "step": 27826 }, { "epoch": 0.8528564423194802, "grad_norm": 1.520807590260623, "learning_rate": 5.571191062743347e-07, "loss": 0.59, "step": 27827 }, { "epoch": 0.8528870908422214, "grad_norm": 1.302022452452733, "learning_rate": 5.568914527346269e-07, "loss": 0.5506, "step": 27828 }, { "epoch": 0.8529177393649626, "grad_norm": 1.295820566677237, "learning_rate": 5.566638429740051e-07, "loss": 0.5583, "step": 27829 }, { "epoch": 0.8529483878877038, "grad_norm": 1.356060360411469, "learning_rate": 5.564362769947118e-07, "loss": 0.5641, "step": 27830 }, { "epoch": 0.852979036410445, "grad_norm": 1.4195839041148626, "learning_rate": 5.562087547989875e-07, "loss": 0.5588, "step": 27831 }, { "epoch": 0.8530096849331862, "grad_norm": 1.4581448025280261, "learning_rate": 5.559812763890759e-07, "loss": 0.5245, "step": 27832 }, { "epoch": 0.8530403334559274, "grad_norm": 1.3557120681173314, "learning_rate": 5.557538417672187e-07, "loss": 0.5905, "step": 27833 }, { "epoch": 0.8530709819786686, "grad_norm": 1.263393329204063, "learning_rate": 5.555264509356556e-07, "loss": 0.6541, "step": 27834 }, { "epoch": 0.8531016305014099, "grad_norm": 1.21122754937584, "learning_rate": 5.55299103896626e-07, "loss": 0.5894, "step": 27835 }, { "epoch": 0.853132279024151, "grad_norm": 1.2773788409064208, "learning_rate": 5.550718006523736e-07, "loss": 0.5161, "step": 27836 }, { "epoch": 0.8531629275468923, "grad_norm": 1.4883543900970828, "learning_rate": 5.548445412051345e-07, "loss": 0.6498, "step": 27837 }, { "epoch": 0.8531935760696334, "grad_norm": 1.358555997844198, "learning_rate": 5.546173255571508e-07, "loss": 0.5942, "step": 27838 }, { "epoch": 0.8532242245923747, "grad_norm": 1.3089658397088013, "learning_rate": 5.543901537106594e-07, "loss": 0.5876, "step": 27839 }, { "epoch": 0.8532548731151158, "grad_norm": 1.338048806854028, "learning_rate": 5.541630256678987e-07, "loss": 0.5366, "step": 27840 }, { "epoch": 0.8532855216378571, "grad_norm": 1.339292540482252, "learning_rate": 5.539359414311085e-07, "loss": 0.5495, "step": 27841 }, { "epoch": 0.8533161701605982, "grad_norm": 0.46318065810039355, "learning_rate": 5.537089010025237e-07, "loss": 0.4082, "step": 27842 }, { "epoch": 0.8533468186833395, "grad_norm": 1.369996778704262, "learning_rate": 5.534819043843831e-07, "loss": 0.6218, "step": 27843 }, { "epoch": 0.8533774672060807, "grad_norm": 0.4453932303306201, "learning_rate": 5.532549515789237e-07, "loss": 0.4062, "step": 27844 }, { "epoch": 0.8534081157288219, "grad_norm": 1.3276476692717438, "learning_rate": 5.530280425883805e-07, "loss": 0.5896, "step": 27845 }, { "epoch": 0.8534387642515631, "grad_norm": 0.4534358539222951, "learning_rate": 5.528011774149905e-07, "loss": 0.3996, "step": 27846 }, { "epoch": 0.8534694127743043, "grad_norm": 1.4385210512497628, "learning_rate": 5.52574356060987e-07, "loss": 0.6384, "step": 27847 }, { "epoch": 0.8535000612970455, "grad_norm": 1.3090697821094452, "learning_rate": 5.52347578528607e-07, "loss": 0.5725, "step": 27848 }, { "epoch": 0.8535307098197867, "grad_norm": 1.2140350294195155, "learning_rate": 5.521208448200849e-07, "loss": 0.5237, "step": 27849 }, { "epoch": 0.8535613583425279, "grad_norm": 1.4747427543996974, "learning_rate": 5.518941549376527e-07, "loss": 0.6023, "step": 27850 }, { "epoch": 0.8535920068652691, "grad_norm": 1.3712971582502327, "learning_rate": 5.51667508883546e-07, "loss": 0.5434, "step": 27851 }, { "epoch": 0.8536226553880103, "grad_norm": 1.522931974853211, "learning_rate": 5.514409066599985e-07, "loss": 0.6574, "step": 27852 }, { "epoch": 0.8536533039107514, "grad_norm": 1.478772645025376, "learning_rate": 5.512143482692411e-07, "loss": 0.6823, "step": 27853 }, { "epoch": 0.8536839524334927, "grad_norm": 1.434059598506224, "learning_rate": 5.509878337135066e-07, "loss": 0.5639, "step": 27854 }, { "epoch": 0.8537146009562339, "grad_norm": 1.268363647927205, "learning_rate": 5.507613629950287e-07, "loss": 0.5648, "step": 27855 }, { "epoch": 0.8537452494789751, "grad_norm": 1.2109954322521321, "learning_rate": 5.505349361160362e-07, "loss": 0.5567, "step": 27856 }, { "epoch": 0.8537758980017163, "grad_norm": 0.44906447131986266, "learning_rate": 5.503085530787628e-07, "loss": 0.3804, "step": 27857 }, { "epoch": 0.8538065465244575, "grad_norm": 1.3309112090571968, "learning_rate": 5.500822138854361e-07, "loss": 0.5973, "step": 27858 }, { "epoch": 0.8538371950471987, "grad_norm": 1.3840213863653859, "learning_rate": 5.498559185382885e-07, "loss": 0.6102, "step": 27859 }, { "epoch": 0.8538678435699399, "grad_norm": 1.2900005043149005, "learning_rate": 5.496296670395501e-07, "loss": 0.528, "step": 27860 }, { "epoch": 0.8538984920926811, "grad_norm": 1.3415325715572368, "learning_rate": 5.494034593914476e-07, "loss": 0.6206, "step": 27861 }, { "epoch": 0.8539291406154224, "grad_norm": 1.5222055164198918, "learning_rate": 5.491772955962122e-07, "loss": 0.5966, "step": 27862 }, { "epoch": 0.8539597891381635, "grad_norm": 1.4457703143928815, "learning_rate": 5.489511756560728e-07, "loss": 0.6701, "step": 27863 }, { "epoch": 0.8539904376609048, "grad_norm": 0.46908802208165296, "learning_rate": 5.487250995732546e-07, "loss": 0.406, "step": 27864 }, { "epoch": 0.8540210861836459, "grad_norm": 0.4538191110523256, "learning_rate": 5.484990673499874e-07, "loss": 0.3957, "step": 27865 }, { "epoch": 0.8540517347063872, "grad_norm": 1.2701019554823174, "learning_rate": 5.482730789884987e-07, "loss": 0.6079, "step": 27866 }, { "epoch": 0.8540823832291283, "grad_norm": 1.4508998270142064, "learning_rate": 5.480471344910137e-07, "loss": 0.6887, "step": 27867 }, { "epoch": 0.8541130317518696, "grad_norm": 1.254254411651476, "learning_rate": 5.4782123385976e-07, "loss": 0.662, "step": 27868 }, { "epoch": 0.8541436802746107, "grad_norm": 0.4267295757535308, "learning_rate": 5.475953770969622e-07, "loss": 0.3788, "step": 27869 }, { "epoch": 0.854174328797352, "grad_norm": 1.3753977986641255, "learning_rate": 5.47369564204846e-07, "loss": 0.5568, "step": 27870 }, { "epoch": 0.8542049773200932, "grad_norm": 1.4627974031180444, "learning_rate": 5.471437951856378e-07, "loss": 0.5632, "step": 27871 }, { "epoch": 0.8542356258428344, "grad_norm": 0.4469277761576907, "learning_rate": 5.469180700415605e-07, "loss": 0.4105, "step": 27872 }, { "epoch": 0.8542662743655756, "grad_norm": 1.4127494537172887, "learning_rate": 5.466923887748382e-07, "loss": 0.5399, "step": 27873 }, { "epoch": 0.8542969228883168, "grad_norm": 1.2155804241049533, "learning_rate": 5.464667513876965e-07, "loss": 0.574, "step": 27874 }, { "epoch": 0.854327571411058, "grad_norm": 1.3277167820073146, "learning_rate": 5.462411578823562e-07, "loss": 0.5828, "step": 27875 }, { "epoch": 0.8543582199337992, "grad_norm": 1.3598661518517428, "learning_rate": 5.460156082610418e-07, "loss": 0.673, "step": 27876 }, { "epoch": 0.8543888684565404, "grad_norm": 1.2388010441449666, "learning_rate": 5.457901025259759e-07, "loss": 0.6012, "step": 27877 }, { "epoch": 0.8544195169792816, "grad_norm": 1.3139662489926802, "learning_rate": 5.455646406793785e-07, "loss": 0.6774, "step": 27878 }, { "epoch": 0.8544501655020228, "grad_norm": 1.2723111687134894, "learning_rate": 5.453392227234739e-07, "loss": 0.5869, "step": 27879 }, { "epoch": 0.8544808140247641, "grad_norm": 1.3387206506100062, "learning_rate": 5.451138486604796e-07, "loss": 0.6057, "step": 27880 }, { "epoch": 0.8545114625475052, "grad_norm": 1.3238501057708194, "learning_rate": 5.4488851849262e-07, "loss": 0.5953, "step": 27881 }, { "epoch": 0.8545421110702465, "grad_norm": 1.5947191081272065, "learning_rate": 5.44663232222114e-07, "loss": 0.8026, "step": 27882 }, { "epoch": 0.8545727595929876, "grad_norm": 1.419838587708191, "learning_rate": 5.444379898511803e-07, "loss": 0.7072, "step": 27883 }, { "epoch": 0.8546034081157288, "grad_norm": 1.6070953127959697, "learning_rate": 5.442127913820389e-07, "loss": 0.6046, "step": 27884 }, { "epoch": 0.85463405663847, "grad_norm": 1.2891256889115341, "learning_rate": 5.439876368169101e-07, "loss": 0.5305, "step": 27885 }, { "epoch": 0.8546647051612112, "grad_norm": 1.1880939858719348, "learning_rate": 5.437625261580099e-07, "loss": 0.473, "step": 27886 }, { "epoch": 0.8546953536839524, "grad_norm": 0.44435308434699133, "learning_rate": 5.435374594075576e-07, "loss": 0.3729, "step": 27887 }, { "epoch": 0.8547260022066936, "grad_norm": 0.4493738914611613, "learning_rate": 5.433124365677722e-07, "loss": 0.3825, "step": 27888 }, { "epoch": 0.8547566507294349, "grad_norm": 1.275245299102093, "learning_rate": 5.43087457640869e-07, "loss": 0.5115, "step": 27889 }, { "epoch": 0.854787299252176, "grad_norm": 1.2372238574447718, "learning_rate": 5.428625226290663e-07, "loss": 0.5844, "step": 27890 }, { "epoch": 0.8548179477749173, "grad_norm": 1.5052328019939272, "learning_rate": 5.426376315345783e-07, "loss": 0.598, "step": 27891 }, { "epoch": 0.8548485962976584, "grad_norm": 1.4838831057265531, "learning_rate": 5.424127843596222e-07, "loss": 0.5939, "step": 27892 }, { "epoch": 0.8548792448203997, "grad_norm": 1.349981890216527, "learning_rate": 5.421879811064145e-07, "loss": 0.6001, "step": 27893 }, { "epoch": 0.8549098933431408, "grad_norm": 1.4034931296785034, "learning_rate": 5.419632217771681e-07, "loss": 0.656, "step": 27894 }, { "epoch": 0.8549405418658821, "grad_norm": 1.586276023330353, "learning_rate": 5.417385063740987e-07, "loss": 0.613, "step": 27895 }, { "epoch": 0.8549711903886232, "grad_norm": 1.416120908173258, "learning_rate": 5.41513834899422e-07, "loss": 0.5794, "step": 27896 }, { "epoch": 0.8550018389113645, "grad_norm": 1.3971512320089183, "learning_rate": 5.412892073553489e-07, "loss": 0.5167, "step": 27897 }, { "epoch": 0.8550324874341056, "grad_norm": 1.332476442611529, "learning_rate": 5.410646237440947e-07, "loss": 0.533, "step": 27898 }, { "epoch": 0.8550631359568469, "grad_norm": 1.3018953628534147, "learning_rate": 5.408400840678701e-07, "loss": 0.5855, "step": 27899 }, { "epoch": 0.8550937844795881, "grad_norm": 2.4469722982543782, "learning_rate": 5.40615588328891e-07, "loss": 0.5602, "step": 27900 }, { "epoch": 0.8551244330023293, "grad_norm": 1.5227478740234448, "learning_rate": 5.403911365293674e-07, "loss": 0.6464, "step": 27901 }, { "epoch": 0.8551550815250705, "grad_norm": 1.270331019941665, "learning_rate": 5.401667286715096e-07, "loss": 0.5772, "step": 27902 }, { "epoch": 0.8551857300478117, "grad_norm": 1.3292402827066558, "learning_rate": 5.399423647575308e-07, "loss": 0.558, "step": 27903 }, { "epoch": 0.8552163785705529, "grad_norm": 1.614707108236983, "learning_rate": 5.397180447896416e-07, "loss": 0.6477, "step": 27904 }, { "epoch": 0.8552470270932941, "grad_norm": 1.2669896423637885, "learning_rate": 5.394937687700508e-07, "loss": 0.5846, "step": 27905 }, { "epoch": 0.8552776756160353, "grad_norm": 1.2911314598646166, "learning_rate": 5.392695367009693e-07, "loss": 0.4781, "step": 27906 }, { "epoch": 0.8553083241387766, "grad_norm": 1.3388811365621784, "learning_rate": 5.390453485846065e-07, "loss": 0.6158, "step": 27907 }, { "epoch": 0.8553389726615177, "grad_norm": 1.4464251212903856, "learning_rate": 5.388212044231716e-07, "loss": 0.6175, "step": 27908 }, { "epoch": 0.855369621184259, "grad_norm": 1.4950259891194075, "learning_rate": 5.385971042188736e-07, "loss": 0.5353, "step": 27909 }, { "epoch": 0.8554002697070001, "grad_norm": 1.5041709083645838, "learning_rate": 5.383730479739174e-07, "loss": 0.5916, "step": 27910 }, { "epoch": 0.8554309182297414, "grad_norm": 1.3710665393243093, "learning_rate": 5.381490356905155e-07, "loss": 0.5626, "step": 27911 }, { "epoch": 0.8554615667524825, "grad_norm": 0.4590798221423181, "learning_rate": 5.379250673708725e-07, "loss": 0.3843, "step": 27912 }, { "epoch": 0.8554922152752238, "grad_norm": 1.287521850252038, "learning_rate": 5.377011430171941e-07, "loss": 0.5949, "step": 27913 }, { "epoch": 0.8555228637979649, "grad_norm": 1.3759253047252844, "learning_rate": 5.374772626316887e-07, "loss": 0.6789, "step": 27914 }, { "epoch": 0.8555535123207061, "grad_norm": 0.4574542343972796, "learning_rate": 5.372534262165624e-07, "loss": 0.3962, "step": 27915 }, { "epoch": 0.8555841608434473, "grad_norm": 1.3831114811789853, "learning_rate": 5.370296337740188e-07, "loss": 0.6071, "step": 27916 }, { "epoch": 0.8556148093661885, "grad_norm": 1.5682872045954555, "learning_rate": 5.368058853062641e-07, "loss": 0.5833, "step": 27917 }, { "epoch": 0.8556454578889298, "grad_norm": 1.3244449495337054, "learning_rate": 5.36582180815503e-07, "loss": 0.5585, "step": 27918 }, { "epoch": 0.8556761064116709, "grad_norm": 1.4309009924956009, "learning_rate": 5.363585203039412e-07, "loss": 0.6796, "step": 27919 }, { "epoch": 0.8557067549344122, "grad_norm": 1.5287605255298637, "learning_rate": 5.361349037737801e-07, "loss": 0.6129, "step": 27920 }, { "epoch": 0.8557374034571533, "grad_norm": 0.43383876621678863, "learning_rate": 5.359113312272224e-07, "loss": 0.3818, "step": 27921 }, { "epoch": 0.8557680519798946, "grad_norm": 1.2959157562297543, "learning_rate": 5.356878026664747e-07, "loss": 0.6387, "step": 27922 }, { "epoch": 0.8557987005026357, "grad_norm": 1.297377799033974, "learning_rate": 5.354643180937368e-07, "loss": 0.6199, "step": 27923 }, { "epoch": 0.855829349025377, "grad_norm": 1.404837347154789, "learning_rate": 5.352408775112111e-07, "loss": 0.6857, "step": 27924 }, { "epoch": 0.8558599975481181, "grad_norm": 0.44680286024035926, "learning_rate": 5.350174809210989e-07, "loss": 0.3999, "step": 27925 }, { "epoch": 0.8558906460708594, "grad_norm": 1.465449369820155, "learning_rate": 5.347941283256014e-07, "loss": 0.5569, "step": 27926 }, { "epoch": 0.8559212945936006, "grad_norm": 1.3724420286697434, "learning_rate": 5.345708197269217e-07, "loss": 0.6284, "step": 27927 }, { "epoch": 0.8559519431163418, "grad_norm": 1.3252235004470183, "learning_rate": 5.343475551272565e-07, "loss": 0.6166, "step": 27928 }, { "epoch": 0.855982591639083, "grad_norm": 1.477042941856053, "learning_rate": 5.341243345288077e-07, "loss": 0.6573, "step": 27929 }, { "epoch": 0.8560132401618242, "grad_norm": 1.3993423970103334, "learning_rate": 5.339011579337761e-07, "loss": 0.5427, "step": 27930 }, { "epoch": 0.8560438886845654, "grad_norm": 1.4864135049212128, "learning_rate": 5.336780253443579e-07, "loss": 0.6248, "step": 27931 }, { "epoch": 0.8560745372073066, "grad_norm": 1.6539789838352317, "learning_rate": 5.334549367627518e-07, "loss": 0.5383, "step": 27932 }, { "epoch": 0.8561051857300478, "grad_norm": 1.4440073188553781, "learning_rate": 5.332318921911589e-07, "loss": 0.5395, "step": 27933 }, { "epoch": 0.856135834252789, "grad_norm": 0.43928458965437894, "learning_rate": 5.33008891631775e-07, "loss": 0.387, "step": 27934 }, { "epoch": 0.8561664827755302, "grad_norm": 1.0721125279801664, "learning_rate": 5.327859350867959e-07, "loss": 0.5265, "step": 27935 }, { "epoch": 0.8561971312982715, "grad_norm": 1.4315501287876158, "learning_rate": 5.325630225584206e-07, "loss": 0.6588, "step": 27936 }, { "epoch": 0.8562277798210126, "grad_norm": 1.3469180588331837, "learning_rate": 5.323401540488443e-07, "loss": 0.5932, "step": 27937 }, { "epoch": 0.8562584283437539, "grad_norm": 1.5328239746644217, "learning_rate": 5.32117329560265e-07, "loss": 0.6525, "step": 27938 }, { "epoch": 0.856289076866495, "grad_norm": 1.3118126882287153, "learning_rate": 5.318945490948757e-07, "loss": 0.5921, "step": 27939 }, { "epoch": 0.8563197253892363, "grad_norm": 0.4607567074054019, "learning_rate": 5.316718126548726e-07, "loss": 0.4063, "step": 27940 }, { "epoch": 0.8563503739119774, "grad_norm": 1.2687362077871909, "learning_rate": 5.314491202424515e-07, "loss": 0.5806, "step": 27941 }, { "epoch": 0.8563810224347187, "grad_norm": 0.4366565761956052, "learning_rate": 5.312264718598053e-07, "loss": 0.3738, "step": 27942 }, { "epoch": 0.8564116709574598, "grad_norm": 1.3900596276748545, "learning_rate": 5.310038675091273e-07, "loss": 0.6767, "step": 27943 }, { "epoch": 0.8564423194802011, "grad_norm": 1.4044070836229479, "learning_rate": 5.307813071926116e-07, "loss": 0.5404, "step": 27944 }, { "epoch": 0.8564729680029423, "grad_norm": 1.3786514793311053, "learning_rate": 5.30558790912451e-07, "loss": 0.6912, "step": 27945 }, { "epoch": 0.8565036165256834, "grad_norm": 1.384788624099248, "learning_rate": 5.303363186708394e-07, "loss": 0.631, "step": 27946 }, { "epoch": 0.8565342650484247, "grad_norm": 1.3296599359449475, "learning_rate": 5.301138904699665e-07, "loss": 0.6673, "step": 27947 }, { "epoch": 0.8565649135711658, "grad_norm": 1.373853194104313, "learning_rate": 5.298915063120252e-07, "loss": 0.6154, "step": 27948 }, { "epoch": 0.8565955620939071, "grad_norm": 1.4157411351644424, "learning_rate": 5.296691661992081e-07, "loss": 0.6385, "step": 27949 }, { "epoch": 0.8566262106166482, "grad_norm": 1.4176431319143201, "learning_rate": 5.294468701337036e-07, "loss": 0.6578, "step": 27950 }, { "epoch": 0.8566568591393895, "grad_norm": 1.3328784215863267, "learning_rate": 5.292246181177014e-07, "loss": 0.5163, "step": 27951 }, { "epoch": 0.8566875076621306, "grad_norm": 1.3287885044464343, "learning_rate": 5.290024101533952e-07, "loss": 0.5798, "step": 27952 }, { "epoch": 0.8567181561848719, "grad_norm": 1.241516339302524, "learning_rate": 5.287802462429708e-07, "loss": 0.527, "step": 27953 }, { "epoch": 0.856748804707613, "grad_norm": 1.3522788199540574, "learning_rate": 5.285581263886197e-07, "loss": 0.6044, "step": 27954 }, { "epoch": 0.8567794532303543, "grad_norm": 0.4279142272804823, "learning_rate": 5.283360505925283e-07, "loss": 0.3831, "step": 27955 }, { "epoch": 0.8568101017530955, "grad_norm": 1.3415943577539704, "learning_rate": 5.281140188568862e-07, "loss": 0.6157, "step": 27956 }, { "epoch": 0.8568407502758367, "grad_norm": 0.4503579147089039, "learning_rate": 5.27892031183882e-07, "loss": 0.3908, "step": 27957 }, { "epoch": 0.8568713987985779, "grad_norm": 1.3379741896540027, "learning_rate": 5.276700875757002e-07, "loss": 0.5841, "step": 27958 }, { "epoch": 0.8569020473213191, "grad_norm": 1.4449059563097257, "learning_rate": 5.274481880345301e-07, "loss": 0.6022, "step": 27959 }, { "epoch": 0.8569326958440603, "grad_norm": 1.4855200274686373, "learning_rate": 5.272263325625576e-07, "loss": 0.6138, "step": 27960 }, { "epoch": 0.8569633443668015, "grad_norm": 1.4192259223796004, "learning_rate": 5.27004521161969e-07, "loss": 0.6353, "step": 27961 }, { "epoch": 0.8569939928895427, "grad_norm": 1.4542940701538247, "learning_rate": 5.267827538349474e-07, "loss": 0.5732, "step": 27962 }, { "epoch": 0.857024641412284, "grad_norm": 1.29902426379333, "learning_rate": 5.26561030583681e-07, "loss": 0.6721, "step": 27963 }, { "epoch": 0.8570552899350251, "grad_norm": 1.2362246277531905, "learning_rate": 5.263393514103532e-07, "loss": 0.6305, "step": 27964 }, { "epoch": 0.8570859384577664, "grad_norm": 0.45325212018409533, "learning_rate": 5.261177163171494e-07, "loss": 0.394, "step": 27965 }, { "epoch": 0.8571165869805075, "grad_norm": 1.401045557482956, "learning_rate": 5.258961253062512e-07, "loss": 0.5647, "step": 27966 }, { "epoch": 0.8571472355032488, "grad_norm": 1.4309657326830976, "learning_rate": 5.256745783798428e-07, "loss": 0.5895, "step": 27967 }, { "epoch": 0.8571778840259899, "grad_norm": 1.34045736670753, "learning_rate": 5.254530755401094e-07, "loss": 0.6492, "step": 27968 }, { "epoch": 0.8572085325487312, "grad_norm": 1.458346537947174, "learning_rate": 5.252316167892301e-07, "loss": 0.6146, "step": 27969 }, { "epoch": 0.8572391810714723, "grad_norm": 1.23602577705193, "learning_rate": 5.25010202129389e-07, "loss": 0.5466, "step": 27970 }, { "epoch": 0.8572698295942136, "grad_norm": 1.4221679919768524, "learning_rate": 5.24788831562768e-07, "loss": 0.5775, "step": 27971 }, { "epoch": 0.8573004781169548, "grad_norm": 0.44671682778209876, "learning_rate": 5.245675050915467e-07, "loss": 0.3896, "step": 27972 }, { "epoch": 0.857331126639696, "grad_norm": 1.3990093006475017, "learning_rate": 5.243462227179069e-07, "loss": 0.6157, "step": 27973 }, { "epoch": 0.8573617751624372, "grad_norm": 1.1726712354624451, "learning_rate": 5.241249844440299e-07, "loss": 0.5282, "step": 27974 }, { "epoch": 0.8573924236851784, "grad_norm": 1.393168555925319, "learning_rate": 5.239037902720939e-07, "loss": 0.6544, "step": 27975 }, { "epoch": 0.8574230722079196, "grad_norm": 1.4343199019578587, "learning_rate": 5.2368264020428e-07, "loss": 0.584, "step": 27976 }, { "epoch": 0.8574537207306607, "grad_norm": 1.3732776513544651, "learning_rate": 5.234615342427651e-07, "loss": 0.567, "step": 27977 }, { "epoch": 0.857484369253402, "grad_norm": 1.3333656318611251, "learning_rate": 5.232404723897294e-07, "loss": 0.7196, "step": 27978 }, { "epoch": 0.8575150177761431, "grad_norm": 1.3087295413167528, "learning_rate": 5.230194546473516e-07, "loss": 0.6375, "step": 27979 }, { "epoch": 0.8575456662988844, "grad_norm": 1.4451994084433355, "learning_rate": 5.227984810178077e-07, "loss": 0.5887, "step": 27980 }, { "epoch": 0.8575763148216256, "grad_norm": 1.345847486666451, "learning_rate": 5.22577551503276e-07, "loss": 0.5484, "step": 27981 }, { "epoch": 0.8576069633443668, "grad_norm": 1.2552115543473268, "learning_rate": 5.223566661059338e-07, "loss": 0.5757, "step": 27982 }, { "epoch": 0.857637611867108, "grad_norm": 1.3472935532735628, "learning_rate": 5.221358248279568e-07, "loss": 0.6287, "step": 27983 }, { "epoch": 0.8576682603898492, "grad_norm": 1.2135956865008526, "learning_rate": 5.219150276715206e-07, "loss": 0.6323, "step": 27984 }, { "epoch": 0.8576989089125904, "grad_norm": 1.2841757293039777, "learning_rate": 5.216942746388026e-07, "loss": 0.6976, "step": 27985 }, { "epoch": 0.8577295574353316, "grad_norm": 1.2818665423473774, "learning_rate": 5.214735657319758e-07, "loss": 0.5598, "step": 27986 }, { "epoch": 0.8577602059580728, "grad_norm": 0.45107433981250294, "learning_rate": 5.212529009532164e-07, "loss": 0.394, "step": 27987 }, { "epoch": 0.857790854480814, "grad_norm": 1.5006361716406198, "learning_rate": 5.210322803046974e-07, "loss": 0.5941, "step": 27988 }, { "epoch": 0.8578215030035552, "grad_norm": 1.4058100334170682, "learning_rate": 5.208117037885934e-07, "loss": 0.6, "step": 27989 }, { "epoch": 0.8578521515262965, "grad_norm": 1.3553216509302957, "learning_rate": 5.205911714070788e-07, "loss": 0.6343, "step": 27990 }, { "epoch": 0.8578828000490376, "grad_norm": 1.5328287149895443, "learning_rate": 5.203706831623245e-07, "loss": 0.5356, "step": 27991 }, { "epoch": 0.8579134485717789, "grad_norm": 1.4686846057940197, "learning_rate": 5.201502390565039e-07, "loss": 0.5833, "step": 27992 }, { "epoch": 0.85794409709452, "grad_norm": 0.47337346556853427, "learning_rate": 5.1992983909179e-07, "loss": 0.3943, "step": 27993 }, { "epoch": 0.8579747456172613, "grad_norm": 0.44180247426890834, "learning_rate": 5.197094832703531e-07, "loss": 0.3847, "step": 27994 }, { "epoch": 0.8580053941400024, "grad_norm": 0.43034955179928447, "learning_rate": 5.194891715943656e-07, "loss": 0.3884, "step": 27995 }, { "epoch": 0.8580360426627437, "grad_norm": 1.3532497104044539, "learning_rate": 5.19268904065997e-07, "loss": 0.5937, "step": 27996 }, { "epoch": 0.8580666911854848, "grad_norm": 1.285791041229939, "learning_rate": 5.190486806874184e-07, "loss": 0.542, "step": 27997 }, { "epoch": 0.8580973397082261, "grad_norm": 1.363986156330993, "learning_rate": 5.188285014608002e-07, "loss": 0.5406, "step": 27998 }, { "epoch": 0.8581279882309673, "grad_norm": 1.3630368435053324, "learning_rate": 5.186083663883107e-07, "loss": 0.6386, "step": 27999 }, { "epoch": 0.8581586367537085, "grad_norm": 1.5328121569635176, "learning_rate": 5.183882754721198e-07, "loss": 0.5978, "step": 28000 }, { "epoch": 0.8581892852764497, "grad_norm": 1.4257392414574832, "learning_rate": 5.181682287143963e-07, "loss": 0.6791, "step": 28001 }, { "epoch": 0.8582199337991909, "grad_norm": 0.44743548757671175, "learning_rate": 5.179482261173075e-07, "loss": 0.4065, "step": 28002 }, { "epoch": 0.8582505823219321, "grad_norm": 0.43303571436909744, "learning_rate": 5.177282676830214e-07, "loss": 0.3884, "step": 28003 }, { "epoch": 0.8582812308446733, "grad_norm": 0.432259977704068, "learning_rate": 5.175083534137065e-07, "loss": 0.3842, "step": 28004 }, { "epoch": 0.8583118793674145, "grad_norm": 1.4207398035188192, "learning_rate": 5.172884833115277e-07, "loss": 0.6047, "step": 28005 }, { "epoch": 0.8583425278901557, "grad_norm": 1.2590143455150455, "learning_rate": 5.170686573786532e-07, "loss": 0.644, "step": 28006 }, { "epoch": 0.8583731764128969, "grad_norm": 0.4273579850369527, "learning_rate": 5.168488756172463e-07, "loss": 0.3739, "step": 28007 }, { "epoch": 0.858403824935638, "grad_norm": 1.2367860001572188, "learning_rate": 5.166291380294769e-07, "loss": 0.5678, "step": 28008 }, { "epoch": 0.8584344734583793, "grad_norm": 1.1885807297750592, "learning_rate": 5.164094446175072e-07, "loss": 0.6133, "step": 28009 }, { "epoch": 0.8584651219811205, "grad_norm": 1.575778292031253, "learning_rate": 5.161897953835015e-07, "loss": 0.6213, "step": 28010 }, { "epoch": 0.8584957705038617, "grad_norm": 0.45810360533936173, "learning_rate": 5.159701903296255e-07, "loss": 0.3787, "step": 28011 }, { "epoch": 0.8585264190266029, "grad_norm": 1.1958579524149369, "learning_rate": 5.157506294580428e-07, "loss": 0.6212, "step": 28012 }, { "epoch": 0.8585570675493441, "grad_norm": 1.3261023404173962, "learning_rate": 5.155311127709156e-07, "loss": 0.6257, "step": 28013 }, { "epoch": 0.8585877160720853, "grad_norm": 1.2398666549231563, "learning_rate": 5.153116402704083e-07, "loss": 0.6338, "step": 28014 }, { "epoch": 0.8586183645948265, "grad_norm": 1.3130451617826364, "learning_rate": 5.150922119586832e-07, "loss": 0.6327, "step": 28015 }, { "epoch": 0.8586490131175677, "grad_norm": 1.258357406709392, "learning_rate": 5.148728278379018e-07, "loss": 0.6064, "step": 28016 }, { "epoch": 0.858679661640309, "grad_norm": 0.4448289392192089, "learning_rate": 5.146534879102267e-07, "loss": 0.4085, "step": 28017 }, { "epoch": 0.8587103101630501, "grad_norm": 1.3431753314014687, "learning_rate": 5.144341921778162e-07, "loss": 0.5817, "step": 28018 }, { "epoch": 0.8587409586857914, "grad_norm": 1.4714840585031712, "learning_rate": 5.142149406428354e-07, "loss": 0.5993, "step": 28019 }, { "epoch": 0.8587716072085325, "grad_norm": 1.285163859003789, "learning_rate": 5.139957333074424e-07, "loss": 0.6656, "step": 28020 }, { "epoch": 0.8588022557312738, "grad_norm": 1.3948007565914693, "learning_rate": 5.137765701737962e-07, "loss": 0.5364, "step": 28021 }, { "epoch": 0.8588329042540149, "grad_norm": 1.1980230395871414, "learning_rate": 5.135574512440572e-07, "loss": 0.5621, "step": 28022 }, { "epoch": 0.8588635527767562, "grad_norm": 1.2481128039970943, "learning_rate": 5.133383765203859e-07, "loss": 0.591, "step": 28023 }, { "epoch": 0.8588942012994973, "grad_norm": 1.3549342571535703, "learning_rate": 5.131193460049383e-07, "loss": 0.5951, "step": 28024 }, { "epoch": 0.8589248498222386, "grad_norm": 1.1156106697789903, "learning_rate": 5.129003596998738e-07, "loss": 0.4439, "step": 28025 }, { "epoch": 0.8589554983449798, "grad_norm": 1.1532644425633403, "learning_rate": 5.126814176073508e-07, "loss": 0.5578, "step": 28026 }, { "epoch": 0.858986146867721, "grad_norm": 1.4518063138799684, "learning_rate": 5.124625197295263e-07, "loss": 0.624, "step": 28027 }, { "epoch": 0.8590167953904622, "grad_norm": 1.3473394407361132, "learning_rate": 5.122436660685565e-07, "loss": 0.5818, "step": 28028 }, { "epoch": 0.8590474439132034, "grad_norm": 0.4440425346766748, "learning_rate": 5.120248566265967e-07, "loss": 0.3681, "step": 28029 }, { "epoch": 0.8590780924359446, "grad_norm": 1.714011329730477, "learning_rate": 5.11806091405806e-07, "loss": 0.6142, "step": 28030 }, { "epoch": 0.8591087409586858, "grad_norm": 1.366165227685107, "learning_rate": 5.11587370408338e-07, "loss": 0.5582, "step": 28031 }, { "epoch": 0.859139389481427, "grad_norm": 1.2549783798386396, "learning_rate": 5.113686936363477e-07, "loss": 0.5116, "step": 28032 }, { "epoch": 0.8591700380041682, "grad_norm": 1.521227553279815, "learning_rate": 5.111500610919894e-07, "loss": 0.5872, "step": 28033 }, { "epoch": 0.8592006865269094, "grad_norm": 0.4359560450287801, "learning_rate": 5.109314727774184e-07, "loss": 0.3836, "step": 28034 }, { "epoch": 0.8592313350496507, "grad_norm": 0.4513310651322756, "learning_rate": 5.107129286947893e-07, "loss": 0.4003, "step": 28035 }, { "epoch": 0.8592619835723918, "grad_norm": 1.3632038409532063, "learning_rate": 5.104944288462532e-07, "loss": 0.5984, "step": 28036 }, { "epoch": 0.8592926320951331, "grad_norm": 1.395821315830723, "learning_rate": 5.10275973233964e-07, "loss": 0.6352, "step": 28037 }, { "epoch": 0.8593232806178742, "grad_norm": 1.3954640850811022, "learning_rate": 5.100575618600756e-07, "loss": 0.6187, "step": 28038 }, { "epoch": 0.8593539291406154, "grad_norm": 1.4225086579746828, "learning_rate": 5.09839194726739e-07, "loss": 0.5074, "step": 28039 }, { "epoch": 0.8593845776633566, "grad_norm": 1.2787448933158192, "learning_rate": 5.096208718361045e-07, "loss": 0.5873, "step": 28040 }, { "epoch": 0.8594152261860978, "grad_norm": 1.334772800949531, "learning_rate": 5.094025931903246e-07, "loss": 0.5663, "step": 28041 }, { "epoch": 0.859445874708839, "grad_norm": 1.2353353202449913, "learning_rate": 5.091843587915507e-07, "loss": 0.5404, "step": 28042 }, { "epoch": 0.8594765232315802, "grad_norm": 1.2150171863364727, "learning_rate": 5.089661686419318e-07, "loss": 0.5417, "step": 28043 }, { "epoch": 0.8595071717543215, "grad_norm": 0.4645987811573885, "learning_rate": 5.087480227436176e-07, "loss": 0.4247, "step": 28044 }, { "epoch": 0.8595378202770626, "grad_norm": 1.2977450162685915, "learning_rate": 5.085299210987587e-07, "loss": 0.6206, "step": 28045 }, { "epoch": 0.8595684687998039, "grad_norm": 1.249370557246815, "learning_rate": 5.083118637095047e-07, "loss": 0.5848, "step": 28046 }, { "epoch": 0.859599117322545, "grad_norm": 0.44281575969213105, "learning_rate": 5.080938505780031e-07, "loss": 0.4026, "step": 28047 }, { "epoch": 0.8596297658452863, "grad_norm": 1.454723303331268, "learning_rate": 5.078758817064e-07, "loss": 0.6769, "step": 28048 }, { "epoch": 0.8596604143680274, "grad_norm": 1.2477220389525134, "learning_rate": 5.076579570968471e-07, "loss": 0.6336, "step": 28049 }, { "epoch": 0.8596910628907687, "grad_norm": 1.370607086116019, "learning_rate": 5.074400767514898e-07, "loss": 0.6427, "step": 28050 }, { "epoch": 0.8597217114135098, "grad_norm": 1.2492635922623436, "learning_rate": 5.072222406724742e-07, "loss": 0.5416, "step": 28051 }, { "epoch": 0.8597523599362511, "grad_norm": 1.3061924794800568, "learning_rate": 5.070044488619469e-07, "loss": 0.5626, "step": 28052 }, { "epoch": 0.8597830084589922, "grad_norm": 1.5515200241086913, "learning_rate": 5.067867013220551e-07, "loss": 0.6631, "step": 28053 }, { "epoch": 0.8598136569817335, "grad_norm": 1.474016790197075, "learning_rate": 5.065689980549438e-07, "loss": 0.7092, "step": 28054 }, { "epoch": 0.8598443055044747, "grad_norm": 1.3579344108985403, "learning_rate": 5.063513390627572e-07, "loss": 0.5176, "step": 28055 }, { "epoch": 0.8598749540272159, "grad_norm": 1.3566977693204845, "learning_rate": 5.061337243476405e-07, "loss": 0.5666, "step": 28056 }, { "epoch": 0.8599056025499571, "grad_norm": 1.325380736060905, "learning_rate": 5.059161539117391e-07, "loss": 0.6031, "step": 28057 }, { "epoch": 0.8599362510726983, "grad_norm": 1.2487341495372766, "learning_rate": 5.056986277571957e-07, "loss": 0.5527, "step": 28058 }, { "epoch": 0.8599668995954395, "grad_norm": 1.30235743561004, "learning_rate": 5.05481145886152e-07, "loss": 0.6102, "step": 28059 }, { "epoch": 0.8599975481181807, "grad_norm": 1.289293625021852, "learning_rate": 5.052637083007539e-07, "loss": 0.5862, "step": 28060 }, { "epoch": 0.8600281966409219, "grad_norm": 1.4550646786784034, "learning_rate": 5.050463150031414e-07, "loss": 0.6295, "step": 28061 }, { "epoch": 0.8600588451636632, "grad_norm": 1.3168308376012572, "learning_rate": 5.048289659954591e-07, "loss": 0.5534, "step": 28062 }, { "epoch": 0.8600894936864043, "grad_norm": 1.2712903619367348, "learning_rate": 5.046116612798463e-07, "loss": 0.5296, "step": 28063 }, { "epoch": 0.8601201422091456, "grad_norm": 3.917476620773154, "learning_rate": 5.04394400858445e-07, "loss": 0.6176, "step": 28064 }, { "epoch": 0.8601507907318867, "grad_norm": 1.3563538620241036, "learning_rate": 5.041771847333965e-07, "loss": 0.6662, "step": 28065 }, { "epoch": 0.860181439254628, "grad_norm": 1.4856216621381408, "learning_rate": 5.039600129068395e-07, "loss": 0.5281, "step": 28066 }, { "epoch": 0.8602120877773691, "grad_norm": 1.3817249325796008, "learning_rate": 5.037428853809151e-07, "loss": 0.5264, "step": 28067 }, { "epoch": 0.8602427363001104, "grad_norm": 1.381764927876356, "learning_rate": 5.035258021577633e-07, "loss": 0.6043, "step": 28068 }, { "epoch": 0.8602733848228515, "grad_norm": 1.2917290242176944, "learning_rate": 5.033087632395223e-07, "loss": 0.553, "step": 28069 }, { "epoch": 0.8603040333455927, "grad_norm": 1.3766307346714117, "learning_rate": 5.030917686283287e-07, "loss": 0.6359, "step": 28070 }, { "epoch": 0.860334681868334, "grad_norm": 1.4661042489863807, "learning_rate": 5.028748183263243e-07, "loss": 0.6846, "step": 28071 }, { "epoch": 0.8603653303910751, "grad_norm": 1.325750028564218, "learning_rate": 5.02657912335644e-07, "loss": 0.6335, "step": 28072 }, { "epoch": 0.8603959789138164, "grad_norm": 1.4035835495044708, "learning_rate": 5.024410506584271e-07, "loss": 0.5114, "step": 28073 }, { "epoch": 0.8604266274365575, "grad_norm": 1.27420219246833, "learning_rate": 5.022242332968086e-07, "loss": 0.5778, "step": 28074 }, { "epoch": 0.8604572759592988, "grad_norm": 1.134857763655821, "learning_rate": 5.020074602529251e-07, "loss": 0.5734, "step": 28075 }, { "epoch": 0.8604879244820399, "grad_norm": 1.4578207401246945, "learning_rate": 5.017907315289139e-07, "loss": 0.5782, "step": 28076 }, { "epoch": 0.8605185730047812, "grad_norm": 1.2406172442765806, "learning_rate": 5.015740471269087e-07, "loss": 0.5536, "step": 28077 }, { "epoch": 0.8605492215275223, "grad_norm": 1.3852197712846255, "learning_rate": 5.013574070490452e-07, "loss": 0.6163, "step": 28078 }, { "epoch": 0.8605798700502636, "grad_norm": 1.3149587658304815, "learning_rate": 5.011408112974592e-07, "loss": 0.6722, "step": 28079 }, { "epoch": 0.8606105185730047, "grad_norm": 1.2046363255119232, "learning_rate": 5.00924259874283e-07, "loss": 0.5769, "step": 28080 }, { "epoch": 0.860641167095746, "grad_norm": 1.3602998430488344, "learning_rate": 5.007077527816512e-07, "loss": 0.582, "step": 28081 }, { "epoch": 0.8606718156184872, "grad_norm": 1.6496518792122277, "learning_rate": 5.004912900216985e-07, "loss": 0.6599, "step": 28082 }, { "epoch": 0.8607024641412284, "grad_norm": 1.3418224118807351, "learning_rate": 5.002748715965549e-07, "loss": 0.6596, "step": 28083 }, { "epoch": 0.8607331126639696, "grad_norm": 1.4852523084539297, "learning_rate": 5.000584975083556e-07, "loss": 0.7059, "step": 28084 }, { "epoch": 0.8607637611867108, "grad_norm": 1.178750261007546, "learning_rate": 4.998421677592297e-07, "loss": 0.5841, "step": 28085 }, { "epoch": 0.860794409709452, "grad_norm": 1.5548244084556568, "learning_rate": 4.996258823513106e-07, "loss": 0.6333, "step": 28086 }, { "epoch": 0.8608250582321932, "grad_norm": 2.035547134535262, "learning_rate": 4.994096412867306e-07, "loss": 0.6164, "step": 28087 }, { "epoch": 0.8608557067549344, "grad_norm": 1.4832115782015989, "learning_rate": 4.991934445676172e-07, "loss": 0.6849, "step": 28088 }, { "epoch": 0.8608863552776757, "grad_norm": 1.2357930059948778, "learning_rate": 4.989772921961029e-07, "loss": 0.5718, "step": 28089 }, { "epoch": 0.8609170038004168, "grad_norm": 1.1683818434743836, "learning_rate": 4.987611841743178e-07, "loss": 0.6272, "step": 28090 }, { "epoch": 0.8609476523231581, "grad_norm": 1.304922728767946, "learning_rate": 4.985451205043895e-07, "loss": 0.5405, "step": 28091 }, { "epoch": 0.8609783008458992, "grad_norm": 1.1382505493886776, "learning_rate": 4.983291011884489e-07, "loss": 0.5768, "step": 28092 }, { "epoch": 0.8610089493686405, "grad_norm": 1.5245905506160624, "learning_rate": 4.981131262286226e-07, "loss": 0.6868, "step": 28093 }, { "epoch": 0.8610395978913816, "grad_norm": 1.6015028395828044, "learning_rate": 4.978971956270389e-07, "loss": 0.6293, "step": 28094 }, { "epoch": 0.8610702464141229, "grad_norm": 1.5585440314128691, "learning_rate": 4.976813093858279e-07, "loss": 0.6294, "step": 28095 }, { "epoch": 0.861100894936864, "grad_norm": 1.263919513793417, "learning_rate": 4.974654675071133e-07, "loss": 0.4531, "step": 28096 }, { "epoch": 0.8611315434596053, "grad_norm": 1.3237329943811884, "learning_rate": 4.972496699930235e-07, "loss": 0.5985, "step": 28097 }, { "epoch": 0.8611621919823464, "grad_norm": 1.3160257778645619, "learning_rate": 4.970339168456861e-07, "loss": 0.5587, "step": 28098 }, { "epoch": 0.8611928405050877, "grad_norm": 0.4596517668804156, "learning_rate": 4.968182080672246e-07, "loss": 0.3934, "step": 28099 }, { "epoch": 0.8612234890278289, "grad_norm": 0.45947961593596315, "learning_rate": 4.966025436597655e-07, "loss": 0.3916, "step": 28100 }, { "epoch": 0.86125413755057, "grad_norm": 1.5170663292853805, "learning_rate": 4.963869236254343e-07, "loss": 0.6508, "step": 28101 }, { "epoch": 0.8612847860733113, "grad_norm": 1.224966181659329, "learning_rate": 4.961713479663549e-07, "loss": 0.609, "step": 28102 }, { "epoch": 0.8613154345960524, "grad_norm": 1.474673179269238, "learning_rate": 4.959558166846518e-07, "loss": 0.6305, "step": 28103 }, { "epoch": 0.8613460831187937, "grad_norm": 1.33811602132779, "learning_rate": 4.957403297824476e-07, "loss": 0.5351, "step": 28104 }, { "epoch": 0.8613767316415348, "grad_norm": 1.343949595538859, "learning_rate": 4.955248872618667e-07, "loss": 0.5553, "step": 28105 }, { "epoch": 0.8614073801642761, "grad_norm": 1.249842310951793, "learning_rate": 4.953094891250326e-07, "loss": 0.5501, "step": 28106 }, { "epoch": 0.8614380286870172, "grad_norm": 0.44867174381084013, "learning_rate": 4.950941353740651e-07, "loss": 0.4027, "step": 28107 }, { "epoch": 0.8614686772097585, "grad_norm": 1.4733519769013717, "learning_rate": 4.948788260110882e-07, "loss": 0.571, "step": 28108 }, { "epoch": 0.8614993257324997, "grad_norm": 0.45605704896670946, "learning_rate": 4.946635610382239e-07, "loss": 0.3918, "step": 28109 }, { "epoch": 0.8615299742552409, "grad_norm": 0.44328103812009245, "learning_rate": 4.944483404575911e-07, "loss": 0.3932, "step": 28110 }, { "epoch": 0.8615606227779821, "grad_norm": 1.3820523502470177, "learning_rate": 4.942331642713116e-07, "loss": 0.6439, "step": 28111 }, { "epoch": 0.8615912713007233, "grad_norm": 1.3460665374570546, "learning_rate": 4.940180324815069e-07, "loss": 0.5724, "step": 28112 }, { "epoch": 0.8616219198234645, "grad_norm": 1.286850866475477, "learning_rate": 4.938029450902943e-07, "loss": 0.6806, "step": 28113 }, { "epoch": 0.8616525683462057, "grad_norm": 0.45463778447784065, "learning_rate": 4.935879020997953e-07, "loss": 0.392, "step": 28114 }, { "epoch": 0.8616832168689469, "grad_norm": 1.513033058841845, "learning_rate": 4.933729035121266e-07, "loss": 0.5932, "step": 28115 }, { "epoch": 0.8617138653916881, "grad_norm": 0.44477852914652816, "learning_rate": 4.931579493294075e-07, "loss": 0.3819, "step": 28116 }, { "epoch": 0.8617445139144293, "grad_norm": 1.296866835812071, "learning_rate": 4.929430395537577e-07, "loss": 0.5816, "step": 28117 }, { "epoch": 0.8617751624371706, "grad_norm": 1.3572637123267308, "learning_rate": 4.927281741872919e-07, "loss": 0.6481, "step": 28118 }, { "epoch": 0.8618058109599117, "grad_norm": 1.3099393608728616, "learning_rate": 4.925133532321285e-07, "loss": 0.5724, "step": 28119 }, { "epoch": 0.861836459482653, "grad_norm": 1.275942462208388, "learning_rate": 4.922985766903859e-07, "loss": 0.5673, "step": 28120 }, { "epoch": 0.8618671080053941, "grad_norm": 1.3415801708679178, "learning_rate": 4.920838445641774e-07, "loss": 0.6072, "step": 28121 }, { "epoch": 0.8618977565281354, "grad_norm": 1.3306352581800847, "learning_rate": 4.918691568556205e-07, "loss": 0.6605, "step": 28122 }, { "epoch": 0.8619284050508765, "grad_norm": 1.3310900225531532, "learning_rate": 4.91654513566831e-07, "loss": 0.5745, "step": 28123 }, { "epoch": 0.8619590535736178, "grad_norm": 1.6258267307258016, "learning_rate": 4.914399146999222e-07, "loss": 0.6603, "step": 28124 }, { "epoch": 0.861989702096359, "grad_norm": 1.2842044121942173, "learning_rate": 4.912253602570105e-07, "loss": 0.6055, "step": 28125 }, { "epoch": 0.8620203506191002, "grad_norm": 1.4093276360505609, "learning_rate": 4.910108502402067e-07, "loss": 0.6016, "step": 28126 }, { "epoch": 0.8620509991418414, "grad_norm": 1.4862230400195513, "learning_rate": 4.907963846516289e-07, "loss": 0.6786, "step": 28127 }, { "epoch": 0.8620816476645826, "grad_norm": 1.288102650184012, "learning_rate": 4.905819634933878e-07, "loss": 0.5427, "step": 28128 }, { "epoch": 0.8621122961873238, "grad_norm": 1.3835984642950654, "learning_rate": 4.903675867675956e-07, "loss": 0.5905, "step": 28129 }, { "epoch": 0.862142944710065, "grad_norm": 1.473597265232871, "learning_rate": 4.901532544763654e-07, "loss": 0.5824, "step": 28130 }, { "epoch": 0.8621735932328062, "grad_norm": 1.2883843167224893, "learning_rate": 4.899389666218101e-07, "loss": 0.5341, "step": 28131 }, { "epoch": 0.8622042417555473, "grad_norm": 1.251552938847387, "learning_rate": 4.897247232060392e-07, "loss": 0.6173, "step": 28132 }, { "epoch": 0.8622348902782886, "grad_norm": 1.3697596698973284, "learning_rate": 4.895105242311643e-07, "loss": 0.5724, "step": 28133 }, { "epoch": 0.8622655388010297, "grad_norm": 1.253464630053449, "learning_rate": 4.892963696992964e-07, "loss": 0.6208, "step": 28134 }, { "epoch": 0.862296187323771, "grad_norm": 1.4619091812660128, "learning_rate": 4.890822596125466e-07, "loss": 0.5978, "step": 28135 }, { "epoch": 0.8623268358465122, "grad_norm": 1.6056063080493115, "learning_rate": 4.888681939730233e-07, "loss": 0.6385, "step": 28136 }, { "epoch": 0.8623574843692534, "grad_norm": 1.6132866173796416, "learning_rate": 4.886541727828348e-07, "loss": 0.52, "step": 28137 }, { "epoch": 0.8623881328919946, "grad_norm": 1.275996356163725, "learning_rate": 4.884401960440915e-07, "loss": 0.616, "step": 28138 }, { "epoch": 0.8624187814147358, "grad_norm": 1.376869430004795, "learning_rate": 4.882262637589019e-07, "loss": 0.6426, "step": 28139 }, { "epoch": 0.862449429937477, "grad_norm": 1.3559711079131103, "learning_rate": 4.880123759293725e-07, "loss": 0.5738, "step": 28140 }, { "epoch": 0.8624800784602182, "grad_norm": 0.4485501338891038, "learning_rate": 4.877985325576112e-07, "loss": 0.4076, "step": 28141 }, { "epoch": 0.8625107269829594, "grad_norm": 1.3636328518068226, "learning_rate": 4.875847336457268e-07, "loss": 0.5741, "step": 28142 }, { "epoch": 0.8625413755057006, "grad_norm": 1.3399505532766811, "learning_rate": 4.873709791958237e-07, "loss": 0.5585, "step": 28143 }, { "epoch": 0.8625720240284418, "grad_norm": 1.435763154152047, "learning_rate": 4.871572692100096e-07, "loss": 0.5836, "step": 28144 }, { "epoch": 0.8626026725511831, "grad_norm": 1.4460139206680336, "learning_rate": 4.86943603690388e-07, "loss": 0.5725, "step": 28145 }, { "epoch": 0.8626333210739242, "grad_norm": 1.3635134896236076, "learning_rate": 4.867299826390676e-07, "loss": 0.589, "step": 28146 }, { "epoch": 0.8626639695966655, "grad_norm": 1.3723535347148448, "learning_rate": 4.865164060581512e-07, "loss": 0.6049, "step": 28147 }, { "epoch": 0.8626946181194066, "grad_norm": 0.4240154704436869, "learning_rate": 4.863028739497427e-07, "loss": 0.3752, "step": 28148 }, { "epoch": 0.8627252666421479, "grad_norm": 1.3150404153897555, "learning_rate": 4.860893863159471e-07, "loss": 0.6299, "step": 28149 }, { "epoch": 0.862755915164889, "grad_norm": 1.4355225797637843, "learning_rate": 4.858759431588683e-07, "loss": 0.5959, "step": 28150 }, { "epoch": 0.8627865636876303, "grad_norm": 1.4097504146897435, "learning_rate": 4.856625444806079e-07, "loss": 0.6318, "step": 28151 }, { "epoch": 0.8628172122103714, "grad_norm": 0.45362846148987274, "learning_rate": 4.854491902832697e-07, "loss": 0.4223, "step": 28152 }, { "epoch": 0.8628478607331127, "grad_norm": 1.4200962710205443, "learning_rate": 4.852358805689556e-07, "loss": 0.6819, "step": 28153 }, { "epoch": 0.8628785092558539, "grad_norm": 1.2044431830546478, "learning_rate": 4.850226153397686e-07, "loss": 0.5917, "step": 28154 }, { "epoch": 0.8629091577785951, "grad_norm": 0.4471273464903621, "learning_rate": 4.848093945978088e-07, "loss": 0.372, "step": 28155 }, { "epoch": 0.8629398063013363, "grad_norm": 0.43217770718032206, "learning_rate": 4.845962183451753e-07, "loss": 0.4024, "step": 28156 }, { "epoch": 0.8629704548240775, "grad_norm": 1.3334409247296428, "learning_rate": 4.843830865839727e-07, "loss": 0.6018, "step": 28157 }, { "epoch": 0.8630011033468187, "grad_norm": 1.4150561197427158, "learning_rate": 4.841699993162985e-07, "loss": 0.5652, "step": 28158 }, { "epoch": 0.8630317518695599, "grad_norm": 1.1674565700656325, "learning_rate": 4.839569565442525e-07, "loss": 0.5792, "step": 28159 }, { "epoch": 0.8630624003923011, "grad_norm": 1.607201596232662, "learning_rate": 4.837439582699332e-07, "loss": 0.548, "step": 28160 }, { "epoch": 0.8630930489150423, "grad_norm": 1.304667181837925, "learning_rate": 4.835310044954411e-07, "loss": 0.6924, "step": 28161 }, { "epoch": 0.8631236974377835, "grad_norm": 1.3315974938159811, "learning_rate": 4.833180952228738e-07, "loss": 0.5857, "step": 28162 }, { "epoch": 0.8631543459605246, "grad_norm": 1.3854750538389102, "learning_rate": 4.831052304543288e-07, "loss": 0.5843, "step": 28163 }, { "epoch": 0.8631849944832659, "grad_norm": 1.3776228156253512, "learning_rate": 4.82892410191903e-07, "loss": 0.5492, "step": 28164 }, { "epoch": 0.8632156430060071, "grad_norm": 1.4055553239746392, "learning_rate": 4.826796344376955e-07, "loss": 0.6738, "step": 28165 }, { "epoch": 0.8632462915287483, "grad_norm": 0.42398073838594663, "learning_rate": 4.824669031938007e-07, "loss": 0.3725, "step": 28166 }, { "epoch": 0.8632769400514895, "grad_norm": 0.43061729449059, "learning_rate": 4.822542164623139e-07, "loss": 0.3884, "step": 28167 }, { "epoch": 0.8633075885742307, "grad_norm": 1.3092515006058392, "learning_rate": 4.820415742453343e-07, "loss": 0.6008, "step": 28168 }, { "epoch": 0.8633382370969719, "grad_norm": 1.1501797023467342, "learning_rate": 4.818289765449546e-07, "loss": 0.4871, "step": 28169 }, { "epoch": 0.8633688856197131, "grad_norm": 1.3086313820373536, "learning_rate": 4.816164233632692e-07, "loss": 0.6057, "step": 28170 }, { "epoch": 0.8633995341424543, "grad_norm": 0.41906114490011215, "learning_rate": 4.814039147023736e-07, "loss": 0.3802, "step": 28171 }, { "epoch": 0.8634301826651956, "grad_norm": 0.44786086865332453, "learning_rate": 4.811914505643612e-07, "loss": 0.4029, "step": 28172 }, { "epoch": 0.8634608311879367, "grad_norm": 1.2024393208384123, "learning_rate": 4.809790309513263e-07, "loss": 0.5312, "step": 28173 }, { "epoch": 0.863491479710678, "grad_norm": 1.5034939618209127, "learning_rate": 4.807666558653601e-07, "loss": 0.6551, "step": 28174 }, { "epoch": 0.8635221282334191, "grad_norm": 1.416677066869491, "learning_rate": 4.805543253085571e-07, "loss": 0.608, "step": 28175 }, { "epoch": 0.8635527767561604, "grad_norm": 1.4722048283730393, "learning_rate": 4.803420392830089e-07, "loss": 0.616, "step": 28176 }, { "epoch": 0.8635834252789015, "grad_norm": 1.3823907292205584, "learning_rate": 4.801297977908076e-07, "loss": 0.573, "step": 28177 }, { "epoch": 0.8636140738016428, "grad_norm": 1.2861377780823642, "learning_rate": 4.799176008340417e-07, "loss": 0.6293, "step": 28178 }, { "epoch": 0.8636447223243839, "grad_norm": 1.2817040116817326, "learning_rate": 4.797054484148061e-07, "loss": 0.5793, "step": 28179 }, { "epoch": 0.8636753708471252, "grad_norm": 1.2475334731818501, "learning_rate": 4.794933405351881e-07, "loss": 0.5605, "step": 28180 }, { "epoch": 0.8637060193698664, "grad_norm": 1.4652056296513756, "learning_rate": 4.792812771972799e-07, "loss": 0.6385, "step": 28181 }, { "epoch": 0.8637366678926076, "grad_norm": 1.3341233874276586, "learning_rate": 4.790692584031692e-07, "loss": 0.6363, "step": 28182 }, { "epoch": 0.8637673164153488, "grad_norm": 1.3140275233912795, "learning_rate": 4.788572841549461e-07, "loss": 0.6253, "step": 28183 }, { "epoch": 0.86379796493809, "grad_norm": 1.2617849433818322, "learning_rate": 4.786453544546993e-07, "loss": 0.6478, "step": 28184 }, { "epoch": 0.8638286134608312, "grad_norm": 1.370831163149278, "learning_rate": 4.784334693045157e-07, "loss": 0.5936, "step": 28185 }, { "epoch": 0.8638592619835724, "grad_norm": 0.4592414782047187, "learning_rate": 4.782216287064845e-07, "loss": 0.3965, "step": 28186 }, { "epoch": 0.8638899105063136, "grad_norm": 1.5384729429362305, "learning_rate": 4.780098326626931e-07, "loss": 0.6745, "step": 28187 }, { "epoch": 0.8639205590290548, "grad_norm": 1.2618890692287899, "learning_rate": 4.77798081175227e-07, "loss": 0.5254, "step": 28188 }, { "epoch": 0.863951207551796, "grad_norm": 1.317123305887646, "learning_rate": 4.775863742461745e-07, "loss": 0.6225, "step": 28189 }, { "epoch": 0.8639818560745373, "grad_norm": 1.3990700622089323, "learning_rate": 4.773747118776196e-07, "loss": 0.5746, "step": 28190 }, { "epoch": 0.8640125045972784, "grad_norm": 1.2733822174882976, "learning_rate": 4.771630940716487e-07, "loss": 0.5965, "step": 28191 }, { "epoch": 0.8640431531200197, "grad_norm": 1.4150079968036726, "learning_rate": 4.769515208303483e-07, "loss": 0.6143, "step": 28192 }, { "epoch": 0.8640738016427608, "grad_norm": 1.515743279512903, "learning_rate": 4.7673999215580027e-07, "loss": 0.6146, "step": 28193 }, { "epoch": 0.864104450165502, "grad_norm": 1.404292428405102, "learning_rate": 4.7652850805009086e-07, "loss": 0.628, "step": 28194 }, { "epoch": 0.8641350986882432, "grad_norm": 1.2376995195550429, "learning_rate": 4.763170685153046e-07, "loss": 0.6015, "step": 28195 }, { "epoch": 0.8641657472109844, "grad_norm": 1.4436064667771364, "learning_rate": 4.7610567355352356e-07, "loss": 0.5368, "step": 28196 }, { "epoch": 0.8641963957337256, "grad_norm": 1.488093356583141, "learning_rate": 4.758943231668284e-07, "loss": 0.7221, "step": 28197 }, { "epoch": 0.8642270442564668, "grad_norm": 1.304666981686183, "learning_rate": 4.7568301735730626e-07, "loss": 0.573, "step": 28198 }, { "epoch": 0.864257692779208, "grad_norm": 1.3064435945454351, "learning_rate": 4.754717561270361e-07, "loss": 0.5702, "step": 28199 }, { "epoch": 0.8642883413019492, "grad_norm": 1.5148828493884494, "learning_rate": 4.7526053947810127e-07, "loss": 0.6329, "step": 28200 }, { "epoch": 0.8643189898246905, "grad_norm": 0.4328621326389583, "learning_rate": 4.750493674125811e-07, "loss": 0.39, "step": 28201 }, { "epoch": 0.8643496383474316, "grad_norm": 1.5481463209569941, "learning_rate": 4.748382399325574e-07, "loss": 0.5617, "step": 28202 }, { "epoch": 0.8643802868701729, "grad_norm": 1.2175209474027346, "learning_rate": 4.746271570401112e-07, "loss": 0.5936, "step": 28203 }, { "epoch": 0.864410935392914, "grad_norm": 1.5082707917848972, "learning_rate": 4.744161187373203e-07, "loss": 0.6188, "step": 28204 }, { "epoch": 0.8644415839156553, "grad_norm": 1.4557749643069693, "learning_rate": 4.742051250262658e-07, "loss": 0.5943, "step": 28205 }, { "epoch": 0.8644722324383964, "grad_norm": 0.4288839641922555, "learning_rate": 4.7399417590902663e-07, "loss": 0.3697, "step": 28206 }, { "epoch": 0.8645028809611377, "grad_norm": 1.3624611748009583, "learning_rate": 4.737832713876805e-07, "loss": 0.5813, "step": 28207 }, { "epoch": 0.8645335294838788, "grad_norm": 1.156762131988708, "learning_rate": 4.7357241146430533e-07, "loss": 0.46, "step": 28208 }, { "epoch": 0.8645641780066201, "grad_norm": 0.450741678312425, "learning_rate": 4.7336159614098045e-07, "loss": 0.3902, "step": 28209 }, { "epoch": 0.8645948265293613, "grad_norm": 1.4183304168624367, "learning_rate": 4.7315082541978085e-07, "loss": 0.5672, "step": 28210 }, { "epoch": 0.8646254750521025, "grad_norm": 1.3922207258109351, "learning_rate": 4.729400993027855e-07, "loss": 0.5266, "step": 28211 }, { "epoch": 0.8646561235748437, "grad_norm": 1.5083943379716713, "learning_rate": 4.7272941779206885e-07, "loss": 0.5974, "step": 28212 }, { "epoch": 0.8646867720975849, "grad_norm": 1.3509584037417457, "learning_rate": 4.725187808897075e-07, "loss": 0.6368, "step": 28213 }, { "epoch": 0.8647174206203261, "grad_norm": 1.4923240018709376, "learning_rate": 4.723081885977776e-07, "loss": 0.6465, "step": 28214 }, { "epoch": 0.8647480691430673, "grad_norm": 1.2383571999022815, "learning_rate": 4.720976409183531e-07, "loss": 0.585, "step": 28215 }, { "epoch": 0.8647787176658085, "grad_norm": 1.3412047004808245, "learning_rate": 4.718871378535089e-07, "loss": 0.6416, "step": 28216 }, { "epoch": 0.8648093661885498, "grad_norm": 0.4636899978460507, "learning_rate": 4.716766794053201e-07, "loss": 0.3998, "step": 28217 }, { "epoch": 0.8648400147112909, "grad_norm": 1.4119715246781708, "learning_rate": 4.714662655758589e-07, "loss": 0.641, "step": 28218 }, { "epoch": 0.8648706632340322, "grad_norm": 1.4496760223942258, "learning_rate": 4.7125589636719925e-07, "loss": 0.6909, "step": 28219 }, { "epoch": 0.8649013117567733, "grad_norm": 1.4741556123343298, "learning_rate": 4.7104557178141495e-07, "loss": 0.622, "step": 28220 }, { "epoch": 0.8649319602795146, "grad_norm": 1.327374979194384, "learning_rate": 4.708352918205761e-07, "loss": 0.5483, "step": 28221 }, { "epoch": 0.8649626088022557, "grad_norm": 1.5154990556978651, "learning_rate": 4.706250564867576e-07, "loss": 0.6395, "step": 28222 }, { "epoch": 0.864993257324997, "grad_norm": 1.5027629851509943, "learning_rate": 4.704148657820279e-07, "loss": 0.6126, "step": 28223 }, { "epoch": 0.8650239058477381, "grad_norm": 1.3260072222974764, "learning_rate": 4.7020471970845913e-07, "loss": 0.566, "step": 28224 }, { "epoch": 0.8650545543704793, "grad_norm": 1.2143890063718625, "learning_rate": 4.6999461826812363e-07, "loss": 0.6003, "step": 28225 }, { "epoch": 0.8650852028932206, "grad_norm": 1.2528545389637602, "learning_rate": 4.6978456146308915e-07, "loss": 0.6027, "step": 28226 }, { "epoch": 0.8651158514159617, "grad_norm": 1.5202631900774397, "learning_rate": 4.695745492954268e-07, "loss": 0.5534, "step": 28227 }, { "epoch": 0.865146499938703, "grad_norm": 1.2589612429624961, "learning_rate": 4.6936458176720603e-07, "loss": 0.5343, "step": 28228 }, { "epoch": 0.8651771484614441, "grad_norm": 1.3398778259569981, "learning_rate": 4.691546588804946e-07, "loss": 0.6563, "step": 28229 }, { "epoch": 0.8652077969841854, "grad_norm": 1.412212039897971, "learning_rate": 4.6894478063736147e-07, "loss": 0.6146, "step": 28230 }, { "epoch": 0.8652384455069265, "grad_norm": 1.2051580455927855, "learning_rate": 4.6873494703987555e-07, "loss": 0.537, "step": 28231 }, { "epoch": 0.8652690940296678, "grad_norm": 1.3766149479006802, "learning_rate": 4.685251580901029e-07, "loss": 0.5207, "step": 28232 }, { "epoch": 0.8652997425524089, "grad_norm": 1.274508551330643, "learning_rate": 4.683154137901125e-07, "loss": 0.6153, "step": 28233 }, { "epoch": 0.8653303910751502, "grad_norm": 1.2272082806651814, "learning_rate": 4.6810571414196817e-07, "loss": 0.596, "step": 28234 }, { "epoch": 0.8653610395978913, "grad_norm": 1.3906661225306716, "learning_rate": 4.6789605914773827e-07, "loss": 0.6111, "step": 28235 }, { "epoch": 0.8653916881206326, "grad_norm": 1.394312078010575, "learning_rate": 4.67686448809489e-07, "loss": 0.5968, "step": 28236 }, { "epoch": 0.8654223366433738, "grad_norm": 1.3931664338568086, "learning_rate": 4.674768831292836e-07, "loss": 0.6376, "step": 28237 }, { "epoch": 0.865452985166115, "grad_norm": 1.2635451555466226, "learning_rate": 4.672673621091883e-07, "loss": 0.597, "step": 28238 }, { "epoch": 0.8654836336888562, "grad_norm": 1.4552993554465683, "learning_rate": 4.670578857512681e-07, "loss": 0.6702, "step": 28239 }, { "epoch": 0.8655142822115974, "grad_norm": 1.299778151062312, "learning_rate": 4.668484540575857e-07, "loss": 0.6465, "step": 28240 }, { "epoch": 0.8655449307343386, "grad_norm": 1.446379148719337, "learning_rate": 4.666390670302062e-07, "loss": 0.5476, "step": 28241 }, { "epoch": 0.8655755792570798, "grad_norm": 1.3542306658952143, "learning_rate": 4.664297246711902e-07, "loss": 0.5629, "step": 28242 }, { "epoch": 0.865606227779821, "grad_norm": 1.5379781081858572, "learning_rate": 4.662204269826037e-07, "loss": 0.6557, "step": 28243 }, { "epoch": 0.8656368763025623, "grad_norm": 0.48238370282254955, "learning_rate": 4.660111739665074e-07, "loss": 0.3958, "step": 28244 }, { "epoch": 0.8656675248253034, "grad_norm": 1.3361098352411227, "learning_rate": 4.658019656249624e-07, "loss": 0.5922, "step": 28245 }, { "epoch": 0.8656981733480447, "grad_norm": 1.5374664974496717, "learning_rate": 4.6559280196003087e-07, "loss": 0.5806, "step": 28246 }, { "epoch": 0.8657288218707858, "grad_norm": 1.3784733722715885, "learning_rate": 4.6538368297377403e-07, "loss": 0.6082, "step": 28247 }, { "epoch": 0.8657594703935271, "grad_norm": 1.219808331054682, "learning_rate": 4.6517460866825125e-07, "loss": 0.5867, "step": 28248 }, { "epoch": 0.8657901189162682, "grad_norm": 1.2611677405315898, "learning_rate": 4.649655790455232e-07, "loss": 0.6747, "step": 28249 }, { "epoch": 0.8658207674390095, "grad_norm": 1.3732260095307744, "learning_rate": 4.6475659410765097e-07, "loss": 0.6113, "step": 28250 }, { "epoch": 0.8658514159617506, "grad_norm": 1.258675948469954, "learning_rate": 4.645476538566912e-07, "loss": 0.5661, "step": 28251 }, { "epoch": 0.8658820644844919, "grad_norm": 0.5222664416584666, "learning_rate": 4.643387582947051e-07, "loss": 0.4153, "step": 28252 }, { "epoch": 0.865912713007233, "grad_norm": 1.5093655514838638, "learning_rate": 4.6412990742374766e-07, "loss": 0.6122, "step": 28253 }, { "epoch": 0.8659433615299743, "grad_norm": 1.3043890586527023, "learning_rate": 4.6392110124588055e-07, "loss": 0.5562, "step": 28254 }, { "epoch": 0.8659740100527155, "grad_norm": 1.2255939211940787, "learning_rate": 4.6371233976315935e-07, "loss": 0.6575, "step": 28255 }, { "epoch": 0.8660046585754566, "grad_norm": 1.2869919574428486, "learning_rate": 4.635036229776402e-07, "loss": 0.5697, "step": 28256 }, { "epoch": 0.8660353070981979, "grad_norm": 1.3993213565719715, "learning_rate": 4.6329495089138086e-07, "loss": 0.5828, "step": 28257 }, { "epoch": 0.866065955620939, "grad_norm": 1.3974555477382165, "learning_rate": 4.6308632350643756e-07, "loss": 0.6447, "step": 28258 }, { "epoch": 0.8660966041436803, "grad_norm": 1.4831748892515073, "learning_rate": 4.6287774082486523e-07, "loss": 0.5629, "step": 28259 }, { "epoch": 0.8661272526664214, "grad_norm": 1.190585559502896, "learning_rate": 4.62669202848719e-07, "loss": 0.5549, "step": 28260 }, { "epoch": 0.8661579011891627, "grad_norm": 1.3814579048245301, "learning_rate": 4.624607095800543e-07, "loss": 0.6603, "step": 28261 }, { "epoch": 0.8661885497119038, "grad_norm": 1.2612818912886492, "learning_rate": 4.622522610209257e-07, "loss": 0.5772, "step": 28262 }, { "epoch": 0.8662191982346451, "grad_norm": 0.4314038408107908, "learning_rate": 4.6204385717338705e-07, "loss": 0.4003, "step": 28263 }, { "epoch": 0.8662498467573863, "grad_norm": 1.4874381504425218, "learning_rate": 4.6183549803948903e-07, "loss": 0.5285, "step": 28264 }, { "epoch": 0.8662804952801275, "grad_norm": 1.315089143091194, "learning_rate": 4.6162718362128933e-07, "loss": 0.5896, "step": 28265 }, { "epoch": 0.8663111438028687, "grad_norm": 1.4475720687191178, "learning_rate": 4.6141891392083804e-07, "loss": 0.6952, "step": 28266 }, { "epoch": 0.8663417923256099, "grad_norm": 1.4076614864165395, "learning_rate": 4.612106889401863e-07, "loss": 0.6262, "step": 28267 }, { "epoch": 0.8663724408483511, "grad_norm": 1.479744992373177, "learning_rate": 4.610025086813874e-07, "loss": 0.573, "step": 28268 }, { "epoch": 0.8664030893710923, "grad_norm": 1.2190547330325776, "learning_rate": 4.6079437314649257e-07, "loss": 0.5222, "step": 28269 }, { "epoch": 0.8664337378938335, "grad_norm": 1.3708947048334246, "learning_rate": 4.605862823375512e-07, "loss": 0.6, "step": 28270 }, { "epoch": 0.8664643864165747, "grad_norm": 1.2837035107179562, "learning_rate": 4.6037823625661504e-07, "loss": 0.5515, "step": 28271 }, { "epoch": 0.8664950349393159, "grad_norm": 1.265663875282904, "learning_rate": 4.601702349057335e-07, "loss": 0.664, "step": 28272 }, { "epoch": 0.8665256834620572, "grad_norm": 1.5573476532056585, "learning_rate": 4.599622782869573e-07, "loss": 0.6565, "step": 28273 }, { "epoch": 0.8665563319847983, "grad_norm": 1.4407002742480408, "learning_rate": 4.5975436640233407e-07, "loss": 0.5575, "step": 28274 }, { "epoch": 0.8665869805075396, "grad_norm": 1.2578519703764819, "learning_rate": 4.5954649925391116e-07, "loss": 0.5767, "step": 28275 }, { "epoch": 0.8666176290302807, "grad_norm": 1.2140228127414618, "learning_rate": 4.593386768437402e-07, "loss": 0.5288, "step": 28276 }, { "epoch": 0.866648277553022, "grad_norm": 0.4366621124091801, "learning_rate": 4.591308991738669e-07, "loss": 0.3975, "step": 28277 }, { "epoch": 0.8666789260757631, "grad_norm": 1.2989826554864814, "learning_rate": 4.589231662463373e-07, "loss": 0.613, "step": 28278 }, { "epoch": 0.8667095745985044, "grad_norm": 1.529717903625665, "learning_rate": 4.587154780632003e-07, "loss": 0.6363, "step": 28279 }, { "epoch": 0.8667402231212455, "grad_norm": 1.2836151156303244, "learning_rate": 4.585078346265015e-07, "loss": 0.6322, "step": 28280 }, { "epoch": 0.8667708716439868, "grad_norm": 1.2537952230598128, "learning_rate": 4.5830023593828764e-07, "loss": 0.5008, "step": 28281 }, { "epoch": 0.866801520166728, "grad_norm": 1.309224864375413, "learning_rate": 4.5809268200060265e-07, "loss": 0.5859, "step": 28282 }, { "epoch": 0.8668321686894692, "grad_norm": 1.6377633252822368, "learning_rate": 4.578851728154932e-07, "loss": 0.6809, "step": 28283 }, { "epoch": 0.8668628172122104, "grad_norm": 1.2997780947347217, "learning_rate": 4.576777083850037e-07, "loss": 0.6861, "step": 28284 }, { "epoch": 0.8668934657349516, "grad_norm": 1.3823463152131388, "learning_rate": 4.5747028871117815e-07, "loss": 0.6336, "step": 28285 }, { "epoch": 0.8669241142576928, "grad_norm": 0.45470811965679525, "learning_rate": 4.572629137960588e-07, "loss": 0.4162, "step": 28286 }, { "epoch": 0.8669547627804339, "grad_norm": 1.3800395541431967, "learning_rate": 4.570555836416907e-07, "loss": 0.5404, "step": 28287 }, { "epoch": 0.8669854113031752, "grad_norm": 1.4089546474809447, "learning_rate": 4.56848298250116e-07, "loss": 0.6457, "step": 28288 }, { "epoch": 0.8670160598259163, "grad_norm": 1.3700748468978132, "learning_rate": 4.566410576233782e-07, "loss": 0.562, "step": 28289 }, { "epoch": 0.8670467083486576, "grad_norm": 1.4491824950951917, "learning_rate": 4.5643386176351777e-07, "loss": 0.6119, "step": 28290 }, { "epoch": 0.8670773568713988, "grad_norm": 1.405094834356091, "learning_rate": 4.562267106725776e-07, "loss": 0.6911, "step": 28291 }, { "epoch": 0.86710800539414, "grad_norm": 0.4249527042969269, "learning_rate": 4.560196043525983e-07, "loss": 0.3947, "step": 28292 }, { "epoch": 0.8671386539168812, "grad_norm": 1.3694825009652165, "learning_rate": 4.5581254280562094e-07, "loss": 0.7293, "step": 28293 }, { "epoch": 0.8671693024396224, "grad_norm": 1.456955672093772, "learning_rate": 4.5560552603368334e-07, "loss": 0.6306, "step": 28294 }, { "epoch": 0.8671999509623636, "grad_norm": 1.3535013311116397, "learning_rate": 4.5539855403882895e-07, "loss": 0.5707, "step": 28295 }, { "epoch": 0.8672305994851048, "grad_norm": 1.3823939710920492, "learning_rate": 4.551916268230955e-07, "loss": 0.7121, "step": 28296 }, { "epoch": 0.867261248007846, "grad_norm": 1.3557142789512684, "learning_rate": 4.549847443885208e-07, "loss": 0.668, "step": 28297 }, { "epoch": 0.8672918965305872, "grad_norm": 1.3696773447960289, "learning_rate": 4.5477790673714437e-07, "loss": 0.664, "step": 28298 }, { "epoch": 0.8673225450533284, "grad_norm": 1.3986595943525229, "learning_rate": 4.545711138710046e-07, "loss": 0.5281, "step": 28299 }, { "epoch": 0.8673531935760697, "grad_norm": 0.4346672109570701, "learning_rate": 4.543643657921387e-07, "loss": 0.3908, "step": 28300 }, { "epoch": 0.8673838420988108, "grad_norm": 1.4322321946928498, "learning_rate": 4.5415766250258343e-07, "loss": 0.6424, "step": 28301 }, { "epoch": 0.8674144906215521, "grad_norm": 1.301437831897525, "learning_rate": 4.53951004004376e-07, "loss": 0.5483, "step": 28302 }, { "epoch": 0.8674451391442932, "grad_norm": 1.4844709575007384, "learning_rate": 4.5374439029955307e-07, "loss": 0.7257, "step": 28303 }, { "epoch": 0.8674757876670345, "grad_norm": 0.43850918886169554, "learning_rate": 4.535378213901498e-07, "loss": 0.3706, "step": 28304 }, { "epoch": 0.8675064361897756, "grad_norm": 1.4867314149899218, "learning_rate": 4.533312972781995e-07, "loss": 0.5847, "step": 28305 }, { "epoch": 0.8675370847125169, "grad_norm": 0.4266088653606534, "learning_rate": 4.5312481796574157e-07, "loss": 0.378, "step": 28306 }, { "epoch": 0.867567733235258, "grad_norm": 0.43504997530713035, "learning_rate": 4.529183834548073e-07, "loss": 0.4012, "step": 28307 }, { "epoch": 0.8675983817579993, "grad_norm": 1.3528090222045368, "learning_rate": 4.5271199374743226e-07, "loss": 0.5329, "step": 28308 }, { "epoch": 0.8676290302807405, "grad_norm": 1.3710775301689286, "learning_rate": 4.5250564884564864e-07, "loss": 0.5633, "step": 28309 }, { "epoch": 0.8676596788034817, "grad_norm": 1.2896758108831619, "learning_rate": 4.522993487514904e-07, "loss": 0.5985, "step": 28310 }, { "epoch": 0.8676903273262229, "grad_norm": 1.3579521378787804, "learning_rate": 4.5209309346699093e-07, "loss": 0.5963, "step": 28311 }, { "epoch": 0.8677209758489641, "grad_norm": 1.4525451667869196, "learning_rate": 4.518868829941814e-07, "loss": 0.5951, "step": 28312 }, { "epoch": 0.8677516243717053, "grad_norm": 1.2198111730150922, "learning_rate": 4.516807173350934e-07, "loss": 0.617, "step": 28313 }, { "epoch": 0.8677822728944465, "grad_norm": 1.1397459899902482, "learning_rate": 4.514745964917605e-07, "loss": 0.6129, "step": 28314 }, { "epoch": 0.8678129214171877, "grad_norm": 1.3969897692475988, "learning_rate": 4.512685204662115e-07, "loss": 0.6814, "step": 28315 }, { "epoch": 0.867843569939929, "grad_norm": 1.3047150675074994, "learning_rate": 4.51062489260477e-07, "loss": 0.6382, "step": 28316 }, { "epoch": 0.8678742184626701, "grad_norm": 1.5265563780315377, "learning_rate": 4.5085650287658875e-07, "loss": 0.6492, "step": 28317 }, { "epoch": 0.8679048669854112, "grad_norm": 1.390581006821973, "learning_rate": 4.506505613165746e-07, "loss": 0.6351, "step": 28318 }, { "epoch": 0.8679355155081525, "grad_norm": 1.4212700752381162, "learning_rate": 4.5044466458246563e-07, "loss": 0.7006, "step": 28319 }, { "epoch": 0.8679661640308937, "grad_norm": 1.1912461182799114, "learning_rate": 4.50238812676288e-07, "loss": 0.6143, "step": 28320 }, { "epoch": 0.8679968125536349, "grad_norm": 1.21251066212152, "learning_rate": 4.500330056000718e-07, "loss": 0.4371, "step": 28321 }, { "epoch": 0.8680274610763761, "grad_norm": 1.5455464802943513, "learning_rate": 4.498272433558454e-07, "loss": 0.6697, "step": 28322 }, { "epoch": 0.8680581095991173, "grad_norm": 1.4638841689162851, "learning_rate": 4.4962152594563436e-07, "loss": 0.5801, "step": 28323 }, { "epoch": 0.8680887581218585, "grad_norm": 1.4446042555208316, "learning_rate": 4.494158533714665e-07, "loss": 0.6192, "step": 28324 }, { "epoch": 0.8681194066445997, "grad_norm": 1.547041257984769, "learning_rate": 4.4921022563536974e-07, "loss": 0.5508, "step": 28325 }, { "epoch": 0.8681500551673409, "grad_norm": 1.3960296829995427, "learning_rate": 4.4900464273936793e-07, "loss": 0.653, "step": 28326 }, { "epoch": 0.8681807036900822, "grad_norm": 1.6681692193809836, "learning_rate": 4.487991046854878e-07, "loss": 0.6074, "step": 28327 }, { "epoch": 0.8682113522128233, "grad_norm": 1.234646339951323, "learning_rate": 4.4859361147575553e-07, "loss": 0.4573, "step": 28328 }, { "epoch": 0.8682420007355646, "grad_norm": 1.3220457166248338, "learning_rate": 4.4838816311219445e-07, "loss": 0.6364, "step": 28329 }, { "epoch": 0.8682726492583057, "grad_norm": 0.44314580503052725, "learning_rate": 4.4818275959682967e-07, "loss": 0.3916, "step": 28330 }, { "epoch": 0.868303297781047, "grad_norm": 1.5031791183518077, "learning_rate": 4.4797740093168395e-07, "loss": 0.5669, "step": 28331 }, { "epoch": 0.8683339463037881, "grad_norm": 0.43028357099019127, "learning_rate": 4.4777208711878186e-07, "loss": 0.3884, "step": 28332 }, { "epoch": 0.8683645948265294, "grad_norm": 1.2493416386081484, "learning_rate": 4.475668181601472e-07, "loss": 0.6177, "step": 28333 }, { "epoch": 0.8683952433492705, "grad_norm": 0.4384462267792251, "learning_rate": 4.473615940578002e-07, "loss": 0.3758, "step": 28334 }, { "epoch": 0.8684258918720118, "grad_norm": 1.4098013067393975, "learning_rate": 4.4715641481376414e-07, "loss": 0.591, "step": 28335 }, { "epoch": 0.868456540394753, "grad_norm": 1.2511749662285416, "learning_rate": 4.4695128043006187e-07, "loss": 0.534, "step": 28336 }, { "epoch": 0.8684871889174942, "grad_norm": 1.3504588977647882, "learning_rate": 4.467461909087129e-07, "loss": 0.6742, "step": 28337 }, { "epoch": 0.8685178374402354, "grad_norm": 1.341521703197195, "learning_rate": 4.465411462517394e-07, "loss": 0.583, "step": 28338 }, { "epoch": 0.8685484859629766, "grad_norm": 1.6387169901021092, "learning_rate": 4.463361464611604e-07, "loss": 0.6541, "step": 28339 }, { "epoch": 0.8685791344857178, "grad_norm": 0.43593767815586, "learning_rate": 4.46131191538996e-07, "loss": 0.371, "step": 28340 }, { "epoch": 0.868609783008459, "grad_norm": 1.2516122166593227, "learning_rate": 4.459262814872672e-07, "loss": 0.5907, "step": 28341 }, { "epoch": 0.8686404315312002, "grad_norm": 0.42876132041745035, "learning_rate": 4.457214163079915e-07, "loss": 0.3833, "step": 28342 }, { "epoch": 0.8686710800539414, "grad_norm": 1.2436169740745908, "learning_rate": 4.455165960031876e-07, "loss": 0.5892, "step": 28343 }, { "epoch": 0.8687017285766826, "grad_norm": 1.352234740793393, "learning_rate": 4.4531182057487464e-07, "loss": 0.6137, "step": 28344 }, { "epoch": 0.8687323770994239, "grad_norm": 1.8743078652646759, "learning_rate": 4.4510709002506924e-07, "loss": 0.5974, "step": 28345 }, { "epoch": 0.868763025622165, "grad_norm": 1.3722652929960617, "learning_rate": 4.449024043557887e-07, "loss": 0.6527, "step": 28346 }, { "epoch": 0.8687936741449063, "grad_norm": 1.303485005380175, "learning_rate": 4.446977635690514e-07, "loss": 0.5796, "step": 28347 }, { "epoch": 0.8688243226676474, "grad_norm": 1.3302597512789813, "learning_rate": 4.4449316766687177e-07, "loss": 0.5658, "step": 28348 }, { "epoch": 0.8688549711903886, "grad_norm": 1.4059331270996192, "learning_rate": 4.442886166512672e-07, "loss": 0.5519, "step": 28349 }, { "epoch": 0.8688856197131298, "grad_norm": 1.3955182364440462, "learning_rate": 4.440841105242516e-07, "loss": 0.594, "step": 28350 }, { "epoch": 0.868916268235871, "grad_norm": 1.2088369293340016, "learning_rate": 4.438796492878411e-07, "loss": 0.6005, "step": 28351 }, { "epoch": 0.8689469167586122, "grad_norm": 1.3729520265299717, "learning_rate": 4.436752329440508e-07, "loss": 0.641, "step": 28352 }, { "epoch": 0.8689775652813534, "grad_norm": 0.4534142540185119, "learning_rate": 4.434708614948935e-07, "loss": 0.3918, "step": 28353 }, { "epoch": 0.8690082138040947, "grad_norm": 1.36567306784281, "learning_rate": 4.432665349423837e-07, "loss": 0.5898, "step": 28354 }, { "epoch": 0.8690388623268358, "grad_norm": 1.4235824842443852, "learning_rate": 4.430622532885354e-07, "loss": 0.6655, "step": 28355 }, { "epoch": 0.8690695108495771, "grad_norm": 1.2398141617185574, "learning_rate": 4.4285801653535964e-07, "loss": 0.6431, "step": 28356 }, { "epoch": 0.8691001593723182, "grad_norm": 1.299558981784121, "learning_rate": 4.4265382468486993e-07, "loss": 0.6907, "step": 28357 }, { "epoch": 0.8691308078950595, "grad_norm": 1.349484788778034, "learning_rate": 4.424496777390791e-07, "loss": 0.6517, "step": 28358 }, { "epoch": 0.8691614564178006, "grad_norm": 1.4616519286621925, "learning_rate": 4.4224557569999715e-07, "loss": 0.5462, "step": 28359 }, { "epoch": 0.8691921049405419, "grad_norm": 1.29281086008904, "learning_rate": 4.4204151856963586e-07, "loss": 0.6068, "step": 28360 }, { "epoch": 0.869222753463283, "grad_norm": 1.3275502686513556, "learning_rate": 4.418375063500041e-07, "loss": 0.6329, "step": 28361 }, { "epoch": 0.8692534019860243, "grad_norm": 1.2414505368292932, "learning_rate": 4.416335390431159e-07, "loss": 0.6022, "step": 28362 }, { "epoch": 0.8692840505087654, "grad_norm": 1.332499661270873, "learning_rate": 4.414296166509785e-07, "loss": 0.5612, "step": 28363 }, { "epoch": 0.8693146990315067, "grad_norm": 1.2054769756433723, "learning_rate": 4.412257391756003e-07, "loss": 0.5676, "step": 28364 }, { "epoch": 0.8693453475542479, "grad_norm": 1.560952881284519, "learning_rate": 4.410219066189919e-07, "loss": 0.6176, "step": 28365 }, { "epoch": 0.8693759960769891, "grad_norm": 1.2776813139101033, "learning_rate": 4.408181189831612e-07, "loss": 0.5835, "step": 28366 }, { "epoch": 0.8694066445997303, "grad_norm": 1.5990624919473977, "learning_rate": 4.4061437627011597e-07, "loss": 0.6337, "step": 28367 }, { "epoch": 0.8694372931224715, "grad_norm": 1.3444564884276518, "learning_rate": 4.4041067848186347e-07, "loss": 0.6528, "step": 28368 }, { "epoch": 0.8694679416452127, "grad_norm": 1.4182760400740217, "learning_rate": 4.402070256204111e-07, "loss": 0.5703, "step": 28369 }, { "epoch": 0.8694985901679539, "grad_norm": 0.4341362218718813, "learning_rate": 4.4000341768776654e-07, "loss": 0.3981, "step": 28370 }, { "epoch": 0.8695292386906951, "grad_norm": 1.313840393601813, "learning_rate": 4.39799854685935e-07, "loss": 0.5782, "step": 28371 }, { "epoch": 0.8695598872134364, "grad_norm": 1.6168317002381238, "learning_rate": 4.3959633661692145e-07, "loss": 0.5909, "step": 28372 }, { "epoch": 0.8695905357361775, "grad_norm": 0.46858925519054034, "learning_rate": 4.3939286348273215e-07, "loss": 0.4116, "step": 28373 }, { "epoch": 0.8696211842589188, "grad_norm": 1.6993070588622525, "learning_rate": 4.391894352853726e-07, "loss": 0.6769, "step": 28374 }, { "epoch": 0.8696518327816599, "grad_norm": 0.4333598685122371, "learning_rate": 4.389860520268457e-07, "loss": 0.3587, "step": 28375 }, { "epoch": 0.8696824813044012, "grad_norm": 1.4411569138431979, "learning_rate": 4.3878271370915606e-07, "loss": 0.6272, "step": 28376 }, { "epoch": 0.8697131298271423, "grad_norm": 1.342223779362985, "learning_rate": 4.3857942033430857e-07, "loss": 0.4982, "step": 28377 }, { "epoch": 0.8697437783498836, "grad_norm": 1.2693257111747982, "learning_rate": 4.3837617190430393e-07, "loss": 0.5448, "step": 28378 }, { "epoch": 0.8697744268726247, "grad_norm": 1.3307311895079814, "learning_rate": 4.3817296842114667e-07, "loss": 0.6332, "step": 28379 }, { "epoch": 0.8698050753953659, "grad_norm": 0.421846221237105, "learning_rate": 4.379698098868368e-07, "loss": 0.3892, "step": 28380 }, { "epoch": 0.8698357239181072, "grad_norm": 1.319997138167096, "learning_rate": 4.3776669630338e-07, "loss": 0.6392, "step": 28381 }, { "epoch": 0.8698663724408483, "grad_norm": 1.2740218731921686, "learning_rate": 4.375636276727746e-07, "loss": 0.5634, "step": 28382 }, { "epoch": 0.8698970209635896, "grad_norm": 1.5037620302993098, "learning_rate": 4.373606039970213e-07, "loss": 0.6389, "step": 28383 }, { "epoch": 0.8699276694863307, "grad_norm": 1.2616278438875976, "learning_rate": 4.3715762527812125e-07, "loss": 0.6065, "step": 28384 }, { "epoch": 0.869958318009072, "grad_norm": 1.2777259665500094, "learning_rate": 4.3695469151807555e-07, "loss": 0.6154, "step": 28385 }, { "epoch": 0.8699889665318131, "grad_norm": 1.3270384663114212, "learning_rate": 4.3675180271888217e-07, "loss": 0.6141, "step": 28386 }, { "epoch": 0.8700196150545544, "grad_norm": 1.4804896583708533, "learning_rate": 4.365489588825406e-07, "loss": 0.5312, "step": 28387 }, { "epoch": 0.8700502635772955, "grad_norm": 1.4056827425295673, "learning_rate": 4.3634616001105024e-07, "loss": 0.6631, "step": 28388 }, { "epoch": 0.8700809121000368, "grad_norm": 0.44176257114528783, "learning_rate": 4.3614340610640905e-07, "loss": 0.3931, "step": 28389 }, { "epoch": 0.870111560622778, "grad_norm": 1.3644039048978693, "learning_rate": 4.3594069717061484e-07, "loss": 0.5607, "step": 28390 }, { "epoch": 0.8701422091455192, "grad_norm": 1.5371481420883228, "learning_rate": 4.3573803320566264e-07, "loss": 0.5704, "step": 28391 }, { "epoch": 0.8701728576682604, "grad_norm": 1.2967713813187665, "learning_rate": 4.355354142135537e-07, "loss": 0.5927, "step": 28392 }, { "epoch": 0.8702035061910016, "grad_norm": 1.4557341379478133, "learning_rate": 4.35332840196282e-07, "loss": 0.6019, "step": 28393 }, { "epoch": 0.8702341547137428, "grad_norm": 1.168821402773431, "learning_rate": 4.35130311155843e-07, "loss": 0.5579, "step": 28394 }, { "epoch": 0.870264803236484, "grad_norm": 1.3184694059832127, "learning_rate": 4.349278270942325e-07, "loss": 0.5226, "step": 28395 }, { "epoch": 0.8702954517592252, "grad_norm": 1.268884137733656, "learning_rate": 4.347253880134467e-07, "loss": 0.5862, "step": 28396 }, { "epoch": 0.8703261002819664, "grad_norm": 1.464764285244289, "learning_rate": 4.3452299391548047e-07, "loss": 0.5852, "step": 28397 }, { "epoch": 0.8703567488047076, "grad_norm": 1.3700119331998526, "learning_rate": 4.343206448023263e-07, "loss": 0.6054, "step": 28398 }, { "epoch": 0.8703873973274489, "grad_norm": 1.4252807010741986, "learning_rate": 4.3411834067597913e-07, "loss": 0.6417, "step": 28399 }, { "epoch": 0.87041804585019, "grad_norm": 1.405767233878121, "learning_rate": 4.33916081538433e-07, "loss": 0.5416, "step": 28400 }, { "epoch": 0.8704486943729313, "grad_norm": 1.3454649950944135, "learning_rate": 4.3371386739167966e-07, "loss": 0.674, "step": 28401 }, { "epoch": 0.8704793428956724, "grad_norm": 1.4499571712203883, "learning_rate": 4.335116982377108e-07, "loss": 0.6608, "step": 28402 }, { "epoch": 0.8705099914184137, "grad_norm": 1.478435031509581, "learning_rate": 4.333095740785209e-07, "loss": 0.6281, "step": 28403 }, { "epoch": 0.8705406399411548, "grad_norm": 1.2491973759016857, "learning_rate": 4.331074949161002e-07, "loss": 0.5793, "step": 28404 }, { "epoch": 0.8705712884638961, "grad_norm": 1.4099172002799363, "learning_rate": 4.329054607524391e-07, "loss": 0.5836, "step": 28405 }, { "epoch": 0.8706019369866372, "grad_norm": 1.2465079386872133, "learning_rate": 4.3270347158952894e-07, "loss": 0.6572, "step": 28406 }, { "epoch": 0.8706325855093785, "grad_norm": 1.5061012109561296, "learning_rate": 4.325015274293598e-07, "loss": 0.6445, "step": 28407 }, { "epoch": 0.8706632340321196, "grad_norm": 1.3364269258965087, "learning_rate": 4.3229962827392336e-07, "loss": 0.5712, "step": 28408 }, { "epoch": 0.8706938825548609, "grad_norm": 1.328926838734296, "learning_rate": 4.320977741252058e-07, "loss": 0.6212, "step": 28409 }, { "epoch": 0.8707245310776021, "grad_norm": 1.3702783032246664, "learning_rate": 4.318959649851978e-07, "loss": 0.6586, "step": 28410 }, { "epoch": 0.8707551796003432, "grad_norm": 1.4621395425804986, "learning_rate": 4.3169420085588885e-07, "loss": 0.569, "step": 28411 }, { "epoch": 0.8707858281230845, "grad_norm": 1.4230571253125075, "learning_rate": 4.3149248173926575e-07, "loss": 0.6669, "step": 28412 }, { "epoch": 0.8708164766458256, "grad_norm": 1.0462818086649868, "learning_rate": 4.31290807637314e-07, "loss": 0.6027, "step": 28413 }, { "epoch": 0.8708471251685669, "grad_norm": 1.2821551080112406, "learning_rate": 4.3108917855202494e-07, "loss": 0.6416, "step": 28414 }, { "epoch": 0.870877773691308, "grad_norm": 1.6343066305599219, "learning_rate": 4.308875944853824e-07, "loss": 0.5083, "step": 28415 }, { "epoch": 0.8709084222140493, "grad_norm": 1.257185774614795, "learning_rate": 4.3068605543937434e-07, "loss": 0.5785, "step": 28416 }, { "epoch": 0.8709390707367904, "grad_norm": 1.5002808685113898, "learning_rate": 4.304845614159842e-07, "loss": 0.6273, "step": 28417 }, { "epoch": 0.8709697192595317, "grad_norm": 0.4431844826668115, "learning_rate": 4.3028311241719964e-07, "loss": 0.364, "step": 28418 }, { "epoch": 0.8710003677822729, "grad_norm": 1.30146032715492, "learning_rate": 4.3008170844500543e-07, "loss": 0.6351, "step": 28419 }, { "epoch": 0.8710310163050141, "grad_norm": 1.494486328883653, "learning_rate": 4.2988034950138424e-07, "loss": 0.683, "step": 28420 }, { "epoch": 0.8710616648277553, "grad_norm": 0.44646516603567177, "learning_rate": 4.2967903558832125e-07, "loss": 0.4056, "step": 28421 }, { "epoch": 0.8710923133504965, "grad_norm": 1.229378874431054, "learning_rate": 4.294777667078015e-07, "loss": 0.5832, "step": 28422 }, { "epoch": 0.8711229618732377, "grad_norm": 1.3591376762892338, "learning_rate": 4.292765428618051e-07, "loss": 0.6003, "step": 28423 }, { "epoch": 0.8711536103959789, "grad_norm": 1.2810043502661173, "learning_rate": 4.2907536405231767e-07, "loss": 0.596, "step": 28424 }, { "epoch": 0.8711842589187201, "grad_norm": 1.5123134773775315, "learning_rate": 4.288742302813192e-07, "loss": 0.6523, "step": 28425 }, { "epoch": 0.8712149074414614, "grad_norm": 1.3027892953675122, "learning_rate": 4.2867314155079275e-07, "loss": 0.6474, "step": 28426 }, { "epoch": 0.8712455559642025, "grad_norm": 1.2269052648518204, "learning_rate": 4.284720978627205e-07, "loss": 0.5604, "step": 28427 }, { "epoch": 0.8712762044869438, "grad_norm": 1.2928160760588006, "learning_rate": 4.282710992190814e-07, "loss": 0.6309, "step": 28428 }, { "epoch": 0.8713068530096849, "grad_norm": 1.353627775707135, "learning_rate": 4.280701456218567e-07, "loss": 0.6245, "step": 28429 }, { "epoch": 0.8713375015324262, "grad_norm": 1.4435198459982483, "learning_rate": 4.2786923707302755e-07, "loss": 0.6517, "step": 28430 }, { "epoch": 0.8713681500551673, "grad_norm": 1.2660519073215466, "learning_rate": 4.2766837357457235e-07, "loss": 0.6478, "step": 28431 }, { "epoch": 0.8713987985779086, "grad_norm": 0.4451912496129159, "learning_rate": 4.2746755512846904e-07, "loss": 0.3814, "step": 28432 }, { "epoch": 0.8714294471006497, "grad_norm": 1.1461008702408808, "learning_rate": 4.2726678173669935e-07, "loss": 0.5667, "step": 28433 }, { "epoch": 0.871460095623391, "grad_norm": 1.379649461850902, "learning_rate": 4.270660534012394e-07, "loss": 0.5666, "step": 28434 }, { "epoch": 0.8714907441461321, "grad_norm": 1.3413399813035698, "learning_rate": 4.2686537012406883e-07, "loss": 0.6132, "step": 28435 }, { "epoch": 0.8715213926688734, "grad_norm": 1.2977377893540438, "learning_rate": 4.2666473190716264e-07, "loss": 0.5858, "step": 28436 }, { "epoch": 0.8715520411916146, "grad_norm": 0.4511266373194046, "learning_rate": 4.2646413875249925e-07, "loss": 0.4087, "step": 28437 }, { "epoch": 0.8715826897143558, "grad_norm": 1.4160084644866966, "learning_rate": 4.2626359066205546e-07, "loss": 0.5679, "step": 28438 }, { "epoch": 0.871613338237097, "grad_norm": 1.330613341642755, "learning_rate": 4.2606308763780577e-07, "loss": 0.657, "step": 28439 }, { "epoch": 0.8716439867598382, "grad_norm": 1.293134650995534, "learning_rate": 4.25862629681727e-07, "loss": 0.5962, "step": 28440 }, { "epoch": 0.8716746352825794, "grad_norm": 1.2266450367202995, "learning_rate": 4.2566221679579524e-07, "loss": 0.545, "step": 28441 }, { "epoch": 0.8717052838053205, "grad_norm": 1.4164305599647085, "learning_rate": 4.2546184898198285e-07, "loss": 0.5913, "step": 28442 }, { "epoch": 0.8717359323280618, "grad_norm": 0.453438472770768, "learning_rate": 4.2526152624226494e-07, "loss": 0.4006, "step": 28443 }, { "epoch": 0.8717665808508029, "grad_norm": 0.4667869654479291, "learning_rate": 4.250612485786171e-07, "loss": 0.3824, "step": 28444 }, { "epoch": 0.8717972293735442, "grad_norm": 1.420880823541137, "learning_rate": 4.2486101599301054e-07, "loss": 0.5802, "step": 28445 }, { "epoch": 0.8718278778962854, "grad_norm": 0.438741908955677, "learning_rate": 4.246608284874193e-07, "loss": 0.3919, "step": 28446 }, { "epoch": 0.8718585264190266, "grad_norm": 1.418845081615964, "learning_rate": 4.2446068606381507e-07, "loss": 0.5712, "step": 28447 }, { "epoch": 0.8718891749417678, "grad_norm": 1.1688249324152156, "learning_rate": 4.2426058872417074e-07, "loss": 0.5695, "step": 28448 }, { "epoch": 0.871919823464509, "grad_norm": 1.3612984723623642, "learning_rate": 4.2406053647045807e-07, "loss": 0.7069, "step": 28449 }, { "epoch": 0.8719504719872502, "grad_norm": 1.2543272573985866, "learning_rate": 4.238605293046466e-07, "loss": 0.6438, "step": 28450 }, { "epoch": 0.8719811205099914, "grad_norm": 1.5181884763379463, "learning_rate": 4.2366056722870865e-07, "loss": 0.6273, "step": 28451 }, { "epoch": 0.8720117690327326, "grad_norm": 1.3795217636272326, "learning_rate": 4.234606502446148e-07, "loss": 0.584, "step": 28452 }, { "epoch": 0.8720424175554738, "grad_norm": 1.3909597741560917, "learning_rate": 4.23260778354333e-07, "loss": 0.6289, "step": 28453 }, { "epoch": 0.872073066078215, "grad_norm": 1.3622728492374117, "learning_rate": 4.2306095155983387e-07, "loss": 0.6241, "step": 28454 }, { "epoch": 0.8721037146009563, "grad_norm": 0.4529410928026915, "learning_rate": 4.2286116986308747e-07, "loss": 0.3917, "step": 28455 }, { "epoch": 0.8721343631236974, "grad_norm": 1.2545179123124997, "learning_rate": 4.2266143326605947e-07, "loss": 0.6576, "step": 28456 }, { "epoch": 0.8721650116464387, "grad_norm": 0.456938290199455, "learning_rate": 4.224617417707211e-07, "loss": 0.3976, "step": 28457 }, { "epoch": 0.8721956601691798, "grad_norm": 1.4896896869974061, "learning_rate": 4.222620953790374e-07, "loss": 0.6231, "step": 28458 }, { "epoch": 0.8722263086919211, "grad_norm": 1.3060578437762853, "learning_rate": 4.2206249409297627e-07, "loss": 0.5056, "step": 28459 }, { "epoch": 0.8722569572146622, "grad_norm": 1.271047120468787, "learning_rate": 4.218629379145056e-07, "loss": 0.581, "step": 28460 }, { "epoch": 0.8722876057374035, "grad_norm": 1.3671073841786878, "learning_rate": 4.2166342684558994e-07, "loss": 0.6299, "step": 28461 }, { "epoch": 0.8723182542601446, "grad_norm": 1.199757469343897, "learning_rate": 4.214639608881965e-07, "loss": 0.5135, "step": 28462 }, { "epoch": 0.8723489027828859, "grad_norm": 1.2474720570697861, "learning_rate": 4.212645400442905e-07, "loss": 0.6506, "step": 28463 }, { "epoch": 0.872379551305627, "grad_norm": 1.3904445577367563, "learning_rate": 4.210651643158353e-07, "loss": 0.6576, "step": 28464 }, { "epoch": 0.8724101998283683, "grad_norm": 1.3504237882596328, "learning_rate": 4.2086583370479717e-07, "loss": 0.648, "step": 28465 }, { "epoch": 0.8724408483511095, "grad_norm": 1.2888322473385028, "learning_rate": 4.2066654821314e-07, "loss": 0.6568, "step": 28466 }, { "epoch": 0.8724714968738507, "grad_norm": 1.278560093420054, "learning_rate": 4.204673078428267e-07, "loss": 0.5923, "step": 28467 }, { "epoch": 0.8725021453965919, "grad_norm": 1.4700614283563491, "learning_rate": 4.202681125958213e-07, "loss": 0.6661, "step": 28468 }, { "epoch": 0.8725327939193331, "grad_norm": 1.3735219624397497, "learning_rate": 4.20068962474085e-07, "loss": 0.6185, "step": 28469 }, { "epoch": 0.8725634424420743, "grad_norm": 1.144168082072771, "learning_rate": 4.198698574795812e-07, "loss": 0.5479, "step": 28470 }, { "epoch": 0.8725940909648155, "grad_norm": 1.2706353201111766, "learning_rate": 4.196707976142722e-07, "loss": 0.6419, "step": 28471 }, { "epoch": 0.8726247394875567, "grad_norm": 1.3210453248571679, "learning_rate": 4.1947178288011815e-07, "loss": 0.5999, "step": 28472 }, { "epoch": 0.8726553880102978, "grad_norm": 1.3485028078537367, "learning_rate": 4.1927281327908074e-07, "loss": 0.59, "step": 28473 }, { "epoch": 0.8726860365330391, "grad_norm": 1.3745283794977876, "learning_rate": 4.1907388881312074e-07, "loss": 0.6004, "step": 28474 }, { "epoch": 0.8727166850557803, "grad_norm": 1.3535096928073498, "learning_rate": 4.1887500948419755e-07, "loss": 0.6567, "step": 28475 }, { "epoch": 0.8727473335785215, "grad_norm": 1.6624081203176875, "learning_rate": 4.186761752942714e-07, "loss": 0.5948, "step": 28476 }, { "epoch": 0.8727779821012627, "grad_norm": 1.2220816335555826, "learning_rate": 4.1847738624530007e-07, "loss": 0.6096, "step": 28477 }, { "epoch": 0.8728086306240039, "grad_norm": 1.2073402152747938, "learning_rate": 4.1827864233924374e-07, "loss": 0.5529, "step": 28478 }, { "epoch": 0.8728392791467451, "grad_norm": 1.3210741059821718, "learning_rate": 4.180799435780608e-07, "loss": 0.6035, "step": 28479 }, { "epoch": 0.8728699276694863, "grad_norm": 1.3265340158646854, "learning_rate": 4.1788128996370803e-07, "loss": 0.563, "step": 28480 }, { "epoch": 0.8729005761922275, "grad_norm": 1.5220442815243698, "learning_rate": 4.176826814981427e-07, "loss": 0.6109, "step": 28481 }, { "epoch": 0.8729312247149688, "grad_norm": 1.4892580678926286, "learning_rate": 4.174841181833239e-07, "loss": 0.6387, "step": 28482 }, { "epoch": 0.8729618732377099, "grad_norm": 1.2831307552387714, "learning_rate": 4.17285600021205e-07, "loss": 0.5705, "step": 28483 }, { "epoch": 0.8729925217604512, "grad_norm": 1.411736969114306, "learning_rate": 4.170871270137439e-07, "loss": 0.6113, "step": 28484 }, { "epoch": 0.8730231702831923, "grad_norm": 0.44484020184428885, "learning_rate": 4.168886991628968e-07, "loss": 0.3942, "step": 28485 }, { "epoch": 0.8730538188059336, "grad_norm": 1.2982450812045716, "learning_rate": 4.166903164706171e-07, "loss": 0.5271, "step": 28486 }, { "epoch": 0.8730844673286747, "grad_norm": 1.3854504583718052, "learning_rate": 4.164919789388616e-07, "loss": 0.6761, "step": 28487 }, { "epoch": 0.873115115851416, "grad_norm": 1.2390240195726039, "learning_rate": 4.16293686569581e-07, "loss": 0.541, "step": 28488 }, { "epoch": 0.8731457643741571, "grad_norm": 1.4802310603632984, "learning_rate": 4.160954393647337e-07, "loss": 0.5843, "step": 28489 }, { "epoch": 0.8731764128968984, "grad_norm": 1.1652817839971554, "learning_rate": 4.1589723732627094e-07, "loss": 0.5731, "step": 28490 }, { "epoch": 0.8732070614196396, "grad_norm": 0.43301400169481913, "learning_rate": 4.156990804561445e-07, "loss": 0.3857, "step": 28491 }, { "epoch": 0.8732377099423808, "grad_norm": 1.4380975345596565, "learning_rate": 4.155009687563083e-07, "loss": 0.578, "step": 28492 }, { "epoch": 0.873268358465122, "grad_norm": 1.1849324634437701, "learning_rate": 4.1530290222871474e-07, "loss": 0.5173, "step": 28493 }, { "epoch": 0.8732990069878632, "grad_norm": 1.2327388797795873, "learning_rate": 4.151048808753133e-07, "loss": 0.5876, "step": 28494 }, { "epoch": 0.8733296555106044, "grad_norm": 1.288201333390872, "learning_rate": 4.1490690469805694e-07, "loss": 0.5573, "step": 28495 }, { "epoch": 0.8733603040333456, "grad_norm": 1.2904398376881372, "learning_rate": 4.147089736988963e-07, "loss": 0.4789, "step": 28496 }, { "epoch": 0.8733909525560868, "grad_norm": 1.223793128922861, "learning_rate": 4.14511087879782e-07, "loss": 0.6002, "step": 28497 }, { "epoch": 0.873421601078828, "grad_norm": 1.3610239606167347, "learning_rate": 4.1431324724266306e-07, "loss": 0.6547, "step": 28498 }, { "epoch": 0.8734522496015692, "grad_norm": 1.2711902300705384, "learning_rate": 4.141154517894874e-07, "loss": 0.5465, "step": 28499 }, { "epoch": 0.8734828981243105, "grad_norm": 1.3625941153555783, "learning_rate": 4.139177015222073e-07, "loss": 0.535, "step": 28500 }, { "epoch": 0.8735135466470516, "grad_norm": 1.3005802867933633, "learning_rate": 4.137199964427696e-07, "loss": 0.5923, "step": 28501 }, { "epoch": 0.8735441951697929, "grad_norm": 1.4513885162135274, "learning_rate": 4.13522336553121e-07, "loss": 0.5955, "step": 28502 }, { "epoch": 0.873574843692534, "grad_norm": 1.3315772778119603, "learning_rate": 4.1332472185521054e-07, "loss": 0.6106, "step": 28503 }, { "epoch": 0.8736054922152752, "grad_norm": 0.4433840369648642, "learning_rate": 4.131271523509861e-07, "loss": 0.3893, "step": 28504 }, { "epoch": 0.8736361407380164, "grad_norm": 1.2394168094685298, "learning_rate": 4.129296280423928e-07, "loss": 0.5272, "step": 28505 }, { "epoch": 0.8736667892607576, "grad_norm": 1.4587098608021294, "learning_rate": 4.127321489313768e-07, "loss": 0.5731, "step": 28506 }, { "epoch": 0.8736974377834988, "grad_norm": 1.375986988656071, "learning_rate": 4.1253471501988495e-07, "loss": 0.6001, "step": 28507 }, { "epoch": 0.87372808630624, "grad_norm": 1.184159892277027, "learning_rate": 4.1233732630986343e-07, "loss": 0.5276, "step": 28508 }, { "epoch": 0.8737587348289813, "grad_norm": 1.3513854846606088, "learning_rate": 4.121399828032557e-07, "loss": 0.7191, "step": 28509 }, { "epoch": 0.8737893833517224, "grad_norm": 1.5060462687804794, "learning_rate": 4.1194268450200526e-07, "loss": 0.6375, "step": 28510 }, { "epoch": 0.8738200318744637, "grad_norm": 1.4069350952159867, "learning_rate": 4.1174543140805877e-07, "loss": 0.5596, "step": 28511 }, { "epoch": 0.8738506803972048, "grad_norm": 1.4856251255962856, "learning_rate": 4.1154822352335864e-07, "loss": 0.6503, "step": 28512 }, { "epoch": 0.8738813289199461, "grad_norm": 1.4088940370821588, "learning_rate": 4.1135106084984724e-07, "loss": 0.6366, "step": 28513 }, { "epoch": 0.8739119774426872, "grad_norm": 1.315527997145167, "learning_rate": 4.111539433894679e-07, "loss": 0.6003, "step": 28514 }, { "epoch": 0.8739426259654285, "grad_norm": 1.4193819240544865, "learning_rate": 4.109568711441625e-07, "loss": 0.531, "step": 28515 }, { "epoch": 0.8739732744881696, "grad_norm": 1.4410415187614636, "learning_rate": 4.1075984411587387e-07, "loss": 0.6272, "step": 28516 }, { "epoch": 0.8740039230109109, "grad_norm": 1.5994262515738709, "learning_rate": 4.105628623065422e-07, "loss": 0.5592, "step": 28517 }, { "epoch": 0.874034571533652, "grad_norm": 1.4296750628168977, "learning_rate": 4.1036592571810916e-07, "loss": 0.589, "step": 28518 }, { "epoch": 0.8740652200563933, "grad_norm": 1.3620541501626535, "learning_rate": 4.1016903435251554e-07, "loss": 0.5724, "step": 28519 }, { "epoch": 0.8740958685791345, "grad_norm": 1.892089456705478, "learning_rate": 4.099721882117008e-07, "loss": 0.6002, "step": 28520 }, { "epoch": 0.8741265171018757, "grad_norm": 1.3039752532772984, "learning_rate": 4.0977538729760344e-07, "loss": 0.6124, "step": 28521 }, { "epoch": 0.8741571656246169, "grad_norm": 1.3374854077594451, "learning_rate": 4.0957863161216416e-07, "loss": 0.4526, "step": 28522 }, { "epoch": 0.8741878141473581, "grad_norm": 1.2750391830476697, "learning_rate": 4.0938192115732076e-07, "loss": 0.5003, "step": 28523 }, { "epoch": 0.8742184626700993, "grad_norm": 1.2609215298079262, "learning_rate": 4.091852559350129e-07, "loss": 0.5093, "step": 28524 }, { "epoch": 0.8742491111928405, "grad_norm": 1.1657579717650668, "learning_rate": 4.089886359471762e-07, "loss": 0.5027, "step": 28525 }, { "epoch": 0.8742797597155817, "grad_norm": 0.46903338901315506, "learning_rate": 4.087920611957491e-07, "loss": 0.3845, "step": 28526 }, { "epoch": 0.874310408238323, "grad_norm": 1.2087954651656545, "learning_rate": 4.085955316826695e-07, "loss": 0.5429, "step": 28527 }, { "epoch": 0.8743410567610641, "grad_norm": 1.3299739399526194, "learning_rate": 4.083990474098731e-07, "loss": 0.6082, "step": 28528 }, { "epoch": 0.8743717052838054, "grad_norm": 0.4513120491494386, "learning_rate": 4.0820260837929394e-07, "loss": 0.4056, "step": 28529 }, { "epoch": 0.8744023538065465, "grad_norm": 1.1388389249627997, "learning_rate": 4.080062145928709e-07, "loss": 0.6065, "step": 28530 }, { "epoch": 0.8744330023292878, "grad_norm": 1.2428067573345014, "learning_rate": 4.078098660525376e-07, "loss": 0.5357, "step": 28531 }, { "epoch": 0.8744636508520289, "grad_norm": 1.4234559009252377, "learning_rate": 4.0761356276022736e-07, "loss": 0.6425, "step": 28532 }, { "epoch": 0.8744942993747702, "grad_norm": 1.249924599571087, "learning_rate": 4.0741730471787646e-07, "loss": 0.5948, "step": 28533 }, { "epoch": 0.8745249478975113, "grad_norm": 1.5241548429461094, "learning_rate": 4.072210919274172e-07, "loss": 0.6876, "step": 28534 }, { "epoch": 0.8745555964202525, "grad_norm": 0.45721733380590424, "learning_rate": 4.0702492439078534e-07, "loss": 0.3805, "step": 28535 }, { "epoch": 0.8745862449429938, "grad_norm": 0.4695811115923615, "learning_rate": 4.068288021099104e-07, "loss": 0.3937, "step": 28536 }, { "epoch": 0.8746168934657349, "grad_norm": 1.290510873376777, "learning_rate": 4.0663272508672693e-07, "loss": 0.6019, "step": 28537 }, { "epoch": 0.8746475419884762, "grad_norm": 0.44193222447902286, "learning_rate": 4.0643669332316726e-07, "loss": 0.3947, "step": 28538 }, { "epoch": 0.8746781905112173, "grad_norm": 0.4517408042718877, "learning_rate": 4.062407068211621e-07, "loss": 0.3674, "step": 28539 }, { "epoch": 0.8747088390339586, "grad_norm": 1.648032201561264, "learning_rate": 4.0604476558264106e-07, "loss": 0.6615, "step": 28540 }, { "epoch": 0.8747394875566997, "grad_norm": 1.2795700834645405, "learning_rate": 4.0584886960953806e-07, "loss": 0.5254, "step": 28541 }, { "epoch": 0.874770136079441, "grad_norm": 1.228238818477954, "learning_rate": 4.0565301890378053e-07, "loss": 0.5486, "step": 28542 }, { "epoch": 0.8748007846021821, "grad_norm": 1.3993579328315457, "learning_rate": 4.0545721346730017e-07, "loss": 0.5949, "step": 28543 }, { "epoch": 0.8748314331249234, "grad_norm": 1.2668670647074876, "learning_rate": 4.0526145330202494e-07, "loss": 0.5305, "step": 28544 }, { "epoch": 0.8748620816476645, "grad_norm": 1.3499869874213382, "learning_rate": 4.0506573840988386e-07, "loss": 0.5366, "step": 28545 }, { "epoch": 0.8748927301704058, "grad_norm": 1.3323081328097102, "learning_rate": 4.0487006879280646e-07, "loss": 0.4927, "step": 28546 }, { "epoch": 0.874923378693147, "grad_norm": 1.685971010688016, "learning_rate": 4.0467444445271953e-07, "loss": 0.7534, "step": 28547 }, { "epoch": 0.8749540272158882, "grad_norm": 0.428640510213418, "learning_rate": 4.0447886539155103e-07, "loss": 0.3826, "step": 28548 }, { "epoch": 0.8749846757386294, "grad_norm": 1.5456559889883297, "learning_rate": 4.042833316112288e-07, "loss": 0.6263, "step": 28549 }, { "epoch": 0.8750153242613706, "grad_norm": 1.4553708751776266, "learning_rate": 4.04087843113678e-07, "loss": 0.6404, "step": 28550 }, { "epoch": 0.8750459727841118, "grad_norm": 1.4298464494694465, "learning_rate": 4.038923999008254e-07, "loss": 0.6295, "step": 28551 }, { "epoch": 0.875076621306853, "grad_norm": 1.3077059246316258, "learning_rate": 4.036970019745978e-07, "loss": 0.5401, "step": 28552 }, { "epoch": 0.8751072698295942, "grad_norm": 1.332634306057636, "learning_rate": 4.0350164933691925e-07, "loss": 0.6373, "step": 28553 }, { "epoch": 0.8751379183523355, "grad_norm": 1.3094593510036183, "learning_rate": 4.0330634198971543e-07, "loss": 0.529, "step": 28554 }, { "epoch": 0.8751685668750766, "grad_norm": 1.3801560393330303, "learning_rate": 4.031110799349097e-07, "loss": 0.5981, "step": 28555 }, { "epoch": 0.8751992153978179, "grad_norm": 1.2803206669801115, "learning_rate": 4.029158631744262e-07, "loss": 0.5826, "step": 28556 }, { "epoch": 0.875229863920559, "grad_norm": 1.1294527972889619, "learning_rate": 4.0272069171019055e-07, "loss": 0.5816, "step": 28557 }, { "epoch": 0.8752605124433003, "grad_norm": 1.4019954466413405, "learning_rate": 4.025255655441229e-07, "loss": 0.6042, "step": 28558 }, { "epoch": 0.8752911609660414, "grad_norm": 1.3816842381409478, "learning_rate": 4.0233048467814727e-07, "loss": 0.5876, "step": 28559 }, { "epoch": 0.8753218094887827, "grad_norm": 1.28477980191284, "learning_rate": 4.0213544911418653e-07, "loss": 0.5342, "step": 28560 }, { "epoch": 0.8753524580115238, "grad_norm": 1.4559484813536803, "learning_rate": 4.0194045885416034e-07, "loss": 0.612, "step": 28561 }, { "epoch": 0.8753831065342651, "grad_norm": 0.4578546883448, "learning_rate": 4.017455138999921e-07, "loss": 0.3894, "step": 28562 }, { "epoch": 0.8754137550570062, "grad_norm": 0.4507122060116672, "learning_rate": 4.0155061425360187e-07, "loss": 0.38, "step": 28563 }, { "epoch": 0.8754444035797475, "grad_norm": 1.289244245922514, "learning_rate": 4.013557599169099e-07, "loss": 0.5385, "step": 28564 }, { "epoch": 0.8754750521024887, "grad_norm": 0.44695293048161394, "learning_rate": 4.0116095089183684e-07, "loss": 0.3988, "step": 28565 }, { "epoch": 0.8755057006252298, "grad_norm": 1.3531022612612682, "learning_rate": 4.0096618718030055e-07, "loss": 0.6246, "step": 28566 }, { "epoch": 0.8755363491479711, "grad_norm": 1.2589012726218898, "learning_rate": 4.0077146878422126e-07, "loss": 0.5462, "step": 28567 }, { "epoch": 0.8755669976707122, "grad_norm": 1.2329276771217916, "learning_rate": 4.005767957055179e-07, "loss": 0.5562, "step": 28568 }, { "epoch": 0.8755976461934535, "grad_norm": 1.4171659214571533, "learning_rate": 4.0038216794610786e-07, "loss": 0.6344, "step": 28569 }, { "epoch": 0.8756282947161946, "grad_norm": 1.2784016564903198, "learning_rate": 4.00187585507909e-07, "loss": 0.5497, "step": 28570 }, { "epoch": 0.8756589432389359, "grad_norm": 1.587312848005133, "learning_rate": 3.999930483928399e-07, "loss": 0.5665, "step": 28571 }, { "epoch": 0.875689591761677, "grad_norm": 0.4320892808345618, "learning_rate": 3.9979855660281505e-07, "loss": 0.3772, "step": 28572 }, { "epoch": 0.8757202402844183, "grad_norm": 1.356123828735175, "learning_rate": 3.9960411013975296e-07, "loss": 0.6638, "step": 28573 }, { "epoch": 0.8757508888071595, "grad_norm": 1.7021061286945462, "learning_rate": 3.9940970900556766e-07, "loss": 0.5398, "step": 28574 }, { "epoch": 0.8757815373299007, "grad_norm": 0.4525989638300353, "learning_rate": 3.9921535320217583e-07, "loss": 0.4028, "step": 28575 }, { "epoch": 0.8758121858526419, "grad_norm": 0.450642140420875, "learning_rate": 3.990210427314933e-07, "loss": 0.3947, "step": 28576 }, { "epoch": 0.8758428343753831, "grad_norm": 0.43548784625605175, "learning_rate": 3.9882677759543244e-07, "loss": 0.3821, "step": 28577 }, { "epoch": 0.8758734828981243, "grad_norm": 1.4500228930534327, "learning_rate": 3.986325577959088e-07, "loss": 0.596, "step": 28578 }, { "epoch": 0.8759041314208655, "grad_norm": 1.3832281442127548, "learning_rate": 3.9843838333483654e-07, "loss": 0.6236, "step": 28579 }, { "epoch": 0.8759347799436067, "grad_norm": 1.3985998961740536, "learning_rate": 3.9824425421412736e-07, "loss": 0.5685, "step": 28580 }, { "epoch": 0.875965428466348, "grad_norm": 1.4585521358824296, "learning_rate": 3.980501704356954e-07, "loss": 0.567, "step": 28581 }, { "epoch": 0.8759960769890891, "grad_norm": 1.3177449434275783, "learning_rate": 3.978561320014529e-07, "loss": 0.5995, "step": 28582 }, { "epoch": 0.8760267255118304, "grad_norm": 1.2460916623256697, "learning_rate": 3.9766213891331116e-07, "loss": 0.6075, "step": 28583 }, { "epoch": 0.8760573740345715, "grad_norm": 1.4577326155710004, "learning_rate": 3.97468191173182e-07, "loss": 0.7149, "step": 28584 }, { "epoch": 0.8760880225573128, "grad_norm": 1.2762559033498408, "learning_rate": 3.9727428878297613e-07, "loss": 0.5286, "step": 28585 }, { "epoch": 0.8761186710800539, "grad_norm": 1.3737625500620039, "learning_rate": 3.970804317446042e-07, "loss": 0.581, "step": 28586 }, { "epoch": 0.8761493196027952, "grad_norm": 1.3544061426707503, "learning_rate": 3.9688662005997747e-07, "loss": 0.6392, "step": 28587 }, { "epoch": 0.8761799681255363, "grad_norm": 1.1441264839908778, "learning_rate": 3.966928537310033e-07, "loss": 0.5428, "step": 28588 }, { "epoch": 0.8762106166482776, "grad_norm": 1.4479178318697776, "learning_rate": 3.9649913275959295e-07, "loss": 0.5594, "step": 28589 }, { "epoch": 0.8762412651710187, "grad_norm": 1.4239754682801653, "learning_rate": 3.963054571476549e-07, "loss": 0.7168, "step": 28590 }, { "epoch": 0.87627191369376, "grad_norm": 1.2506199697376572, "learning_rate": 3.9611182689709595e-07, "loss": 0.5638, "step": 28591 }, { "epoch": 0.8763025622165012, "grad_norm": 1.5010362155410444, "learning_rate": 3.959182420098256e-07, "loss": 0.6785, "step": 28592 }, { "epoch": 0.8763332107392424, "grad_norm": 0.4570616154899617, "learning_rate": 3.957247024877514e-07, "loss": 0.3968, "step": 28593 }, { "epoch": 0.8763638592619836, "grad_norm": 1.3113351115849528, "learning_rate": 3.955312083327795e-07, "loss": 0.606, "step": 28594 }, { "epoch": 0.8763945077847248, "grad_norm": 1.4537209365800474, "learning_rate": 3.9533775954681664e-07, "loss": 0.6822, "step": 28595 }, { "epoch": 0.876425156307466, "grad_norm": 1.2416664649505382, "learning_rate": 3.9514435613176805e-07, "loss": 0.5266, "step": 28596 }, { "epoch": 0.8764558048302071, "grad_norm": 1.2670555504981766, "learning_rate": 3.9495099808954165e-07, "loss": 0.5841, "step": 28597 }, { "epoch": 0.8764864533529484, "grad_norm": 1.5927935362045773, "learning_rate": 3.947576854220414e-07, "loss": 0.7208, "step": 28598 }, { "epoch": 0.8765171018756895, "grad_norm": 1.4371614520235407, "learning_rate": 3.945644181311709e-07, "loss": 0.6413, "step": 28599 }, { "epoch": 0.8765477503984308, "grad_norm": 1.3432332764946728, "learning_rate": 3.9437119621883626e-07, "loss": 0.7184, "step": 28600 }, { "epoch": 0.876578398921172, "grad_norm": 1.4374738070818378, "learning_rate": 3.9417801968694045e-07, "loss": 0.6298, "step": 28601 }, { "epoch": 0.8766090474439132, "grad_norm": 1.3338582249846447, "learning_rate": 3.93984888537387e-07, "loss": 0.594, "step": 28602 }, { "epoch": 0.8766396959666544, "grad_norm": 1.3102111860893386, "learning_rate": 3.9379180277207885e-07, "loss": 0.6033, "step": 28603 }, { "epoch": 0.8766703444893956, "grad_norm": 1.2770656529908677, "learning_rate": 3.935987623929183e-07, "loss": 0.5829, "step": 28604 }, { "epoch": 0.8767009930121368, "grad_norm": 1.1869312201729023, "learning_rate": 3.934057674018088e-07, "loss": 0.5949, "step": 28605 }, { "epoch": 0.876731641534878, "grad_norm": 1.2036823300209434, "learning_rate": 3.9321281780065055e-07, "loss": 0.4865, "step": 28606 }, { "epoch": 0.8767622900576192, "grad_norm": 1.2870052069549454, "learning_rate": 3.9301991359134373e-07, "loss": 0.5791, "step": 28607 }, { "epoch": 0.8767929385803604, "grad_norm": 1.2714118748231071, "learning_rate": 3.928270547757923e-07, "loss": 0.5143, "step": 28608 }, { "epoch": 0.8768235871031016, "grad_norm": 1.2841264205403902, "learning_rate": 3.926342413558948e-07, "loss": 0.5568, "step": 28609 }, { "epoch": 0.8768542356258429, "grad_norm": 0.47128105430608247, "learning_rate": 3.9244147333354965e-07, "loss": 0.399, "step": 28610 }, { "epoch": 0.876884884148584, "grad_norm": 0.45134064988913847, "learning_rate": 3.922487507106576e-07, "loss": 0.3919, "step": 28611 }, { "epoch": 0.8769155326713253, "grad_norm": 1.3945668787199321, "learning_rate": 3.920560734891188e-07, "loss": 0.6104, "step": 28612 }, { "epoch": 0.8769461811940664, "grad_norm": 1.1889388008750323, "learning_rate": 3.9186344167082945e-07, "loss": 0.5782, "step": 28613 }, { "epoch": 0.8769768297168077, "grad_norm": 1.3586538853359633, "learning_rate": 3.9167085525768864e-07, "loss": 0.577, "step": 28614 }, { "epoch": 0.8770074782395488, "grad_norm": 1.2488432758493697, "learning_rate": 3.9147831425159375e-07, "loss": 0.597, "step": 28615 }, { "epoch": 0.8770381267622901, "grad_norm": 1.3137986537901665, "learning_rate": 3.9128581865444325e-07, "loss": 0.6299, "step": 28616 }, { "epoch": 0.8770687752850312, "grad_norm": 1.1810056254418755, "learning_rate": 3.9109336846813285e-07, "loss": 0.586, "step": 28617 }, { "epoch": 0.8770994238077725, "grad_norm": 1.3092662008065028, "learning_rate": 3.9090096369455763e-07, "loss": 0.6545, "step": 28618 }, { "epoch": 0.8771300723305137, "grad_norm": 0.4218947749733105, "learning_rate": 3.907086043356145e-07, "loss": 0.3728, "step": 28619 }, { "epoch": 0.8771607208532549, "grad_norm": 1.255570832278606, "learning_rate": 3.905162903932003e-07, "loss": 0.5057, "step": 28620 }, { "epoch": 0.8771913693759961, "grad_norm": 1.3774905153207062, "learning_rate": 3.903240218692067e-07, "loss": 0.5434, "step": 28621 }, { "epoch": 0.8772220178987373, "grad_norm": 1.5059328281790052, "learning_rate": 3.9013179876553067e-07, "loss": 0.6127, "step": 28622 }, { "epoch": 0.8772526664214785, "grad_norm": 1.2603068607767984, "learning_rate": 3.89939621084065e-07, "loss": 0.5346, "step": 28623 }, { "epoch": 0.8772833149442197, "grad_norm": 0.45032045357911127, "learning_rate": 3.89747488826705e-07, "loss": 0.3893, "step": 28624 }, { "epoch": 0.8773139634669609, "grad_norm": 1.261973953495244, "learning_rate": 3.895554019953424e-07, "loss": 0.5684, "step": 28625 }, { "epoch": 0.8773446119897022, "grad_norm": 1.3370706711456566, "learning_rate": 3.893633605918684e-07, "loss": 0.6431, "step": 28626 }, { "epoch": 0.8773752605124433, "grad_norm": 1.4356542362306748, "learning_rate": 3.8917136461817884e-07, "loss": 0.6397, "step": 28627 }, { "epoch": 0.8774059090351845, "grad_norm": 0.47564865239962384, "learning_rate": 3.889794140761632e-07, "loss": 0.3967, "step": 28628 }, { "epoch": 0.8774365575579257, "grad_norm": 1.383172349389179, "learning_rate": 3.887875089677123e-07, "loss": 0.7202, "step": 28629 }, { "epoch": 0.8774672060806669, "grad_norm": 1.2458201133498463, "learning_rate": 3.8859564929471793e-07, "loss": 0.5218, "step": 28630 }, { "epoch": 0.8774978546034081, "grad_norm": 1.4540840818789862, "learning_rate": 3.8840383505907186e-07, "loss": 0.6801, "step": 28631 }, { "epoch": 0.8775285031261493, "grad_norm": 1.2928067273503607, "learning_rate": 3.882120662626615e-07, "loss": 0.5992, "step": 28632 }, { "epoch": 0.8775591516488905, "grad_norm": 1.3382268184059734, "learning_rate": 3.8802034290737756e-07, "loss": 0.6431, "step": 28633 }, { "epoch": 0.8775898001716317, "grad_norm": 1.4075397905016556, "learning_rate": 3.8782866499510905e-07, "loss": 0.5839, "step": 28634 }, { "epoch": 0.877620448694373, "grad_norm": 1.5423714096710435, "learning_rate": 3.876370325277462e-07, "loss": 0.565, "step": 28635 }, { "epoch": 0.8776510972171141, "grad_norm": 1.3787515604617893, "learning_rate": 3.874454455071752e-07, "loss": 0.5412, "step": 28636 }, { "epoch": 0.8776817457398554, "grad_norm": 1.3933543827267623, "learning_rate": 3.8725390393528293e-07, "loss": 0.57, "step": 28637 }, { "epoch": 0.8777123942625965, "grad_norm": 1.3676872640046918, "learning_rate": 3.870624078139601e-07, "loss": 0.5429, "step": 28638 }, { "epoch": 0.8777430427853378, "grad_norm": 0.4682568510113702, "learning_rate": 3.8687095714509124e-07, "loss": 0.3915, "step": 28639 }, { "epoch": 0.8777736913080789, "grad_norm": 1.2419115432032461, "learning_rate": 3.866795519305622e-07, "loss": 0.6233, "step": 28640 }, { "epoch": 0.8778043398308202, "grad_norm": 1.428756276231858, "learning_rate": 3.864881921722602e-07, "loss": 0.6676, "step": 28641 }, { "epoch": 0.8778349883535613, "grad_norm": 1.2332556282254772, "learning_rate": 3.862968778720705e-07, "loss": 0.6033, "step": 28642 }, { "epoch": 0.8778656368763026, "grad_norm": 1.5006837123892531, "learning_rate": 3.861056090318788e-07, "loss": 0.6139, "step": 28643 }, { "epoch": 0.8778962853990437, "grad_norm": 0.45923912431681935, "learning_rate": 3.859143856535685e-07, "loss": 0.3919, "step": 28644 }, { "epoch": 0.877926933921785, "grad_norm": 1.3927393073097314, "learning_rate": 3.8572320773902436e-07, "loss": 0.615, "step": 28645 }, { "epoch": 0.8779575824445262, "grad_norm": 1.3326453911152483, "learning_rate": 3.855320752901304e-07, "loss": 0.5189, "step": 28646 }, { "epoch": 0.8779882309672674, "grad_norm": 1.2573373501971363, "learning_rate": 3.8534098830877e-07, "loss": 0.5323, "step": 28647 }, { "epoch": 0.8780188794900086, "grad_norm": 1.3648317503375178, "learning_rate": 3.8514994679682395e-07, "loss": 0.586, "step": 28648 }, { "epoch": 0.8780495280127498, "grad_norm": 1.3974240107747264, "learning_rate": 3.849589507561774e-07, "loss": 0.5859, "step": 28649 }, { "epoch": 0.878080176535491, "grad_norm": 1.2735596249571741, "learning_rate": 3.8476800018871054e-07, "loss": 0.5964, "step": 28650 }, { "epoch": 0.8781108250582322, "grad_norm": 1.209253607047139, "learning_rate": 3.8457709509630623e-07, "loss": 0.5366, "step": 28651 }, { "epoch": 0.8781414735809734, "grad_norm": 1.4460200319230665, "learning_rate": 3.843862354808442e-07, "loss": 0.6588, "step": 28652 }, { "epoch": 0.8781721221037146, "grad_norm": 1.2322895715934195, "learning_rate": 3.8419542134420505e-07, "loss": 0.6156, "step": 28653 }, { "epoch": 0.8782027706264558, "grad_norm": 1.4017557898405602, "learning_rate": 3.840046526882707e-07, "loss": 0.5858, "step": 28654 }, { "epoch": 0.8782334191491971, "grad_norm": 1.3617850693046254, "learning_rate": 3.838139295149185e-07, "loss": 0.5761, "step": 28655 }, { "epoch": 0.8782640676719382, "grad_norm": 1.3277215233132467, "learning_rate": 3.8362325182602857e-07, "loss": 0.5399, "step": 28656 }, { "epoch": 0.8782947161946795, "grad_norm": 0.4552665643692307, "learning_rate": 3.834326196234811e-07, "loss": 0.3718, "step": 28657 }, { "epoch": 0.8783253647174206, "grad_norm": 0.4713394019105197, "learning_rate": 3.8324203290915296e-07, "loss": 0.4102, "step": 28658 }, { "epoch": 0.8783560132401618, "grad_norm": 1.4392040205390881, "learning_rate": 3.8305149168492094e-07, "loss": 0.6304, "step": 28659 }, { "epoch": 0.878386661762903, "grad_norm": 1.7258646281863075, "learning_rate": 3.8286099595266525e-07, "loss": 0.6325, "step": 28660 }, { "epoch": 0.8784173102856442, "grad_norm": 1.323332894424522, "learning_rate": 3.82670545714261e-07, "loss": 0.6062, "step": 28661 }, { "epoch": 0.8784479588083854, "grad_norm": 1.4767539498443316, "learning_rate": 3.824801409715856e-07, "loss": 0.57, "step": 28662 }, { "epoch": 0.8784786073311266, "grad_norm": 0.45699198017489545, "learning_rate": 3.822897817265142e-07, "loss": 0.376, "step": 28663 }, { "epoch": 0.8785092558538679, "grad_norm": 1.2579593522469505, "learning_rate": 3.820994679809231e-07, "loss": 0.5548, "step": 28664 }, { "epoch": 0.878539904376609, "grad_norm": 1.5293150335713217, "learning_rate": 3.819091997366886e-07, "loss": 0.5888, "step": 28665 }, { "epoch": 0.8785705528993503, "grad_norm": 1.3083048392327907, "learning_rate": 3.8171897699568304e-07, "loss": 0.5415, "step": 28666 }, { "epoch": 0.8786012014220914, "grad_norm": 1.6308550181925872, "learning_rate": 3.815287997597822e-07, "loss": 0.6259, "step": 28667 }, { "epoch": 0.8786318499448327, "grad_norm": 1.2893339636236205, "learning_rate": 3.813386680308606e-07, "loss": 0.5915, "step": 28668 }, { "epoch": 0.8786624984675738, "grad_norm": 0.4412923809912441, "learning_rate": 3.811485818107902e-07, "loss": 0.3844, "step": 28669 }, { "epoch": 0.8786931469903151, "grad_norm": 0.4521435352165402, "learning_rate": 3.809585411014455e-07, "loss": 0.3948, "step": 28670 }, { "epoch": 0.8787237955130562, "grad_norm": 1.4934372485308993, "learning_rate": 3.807685459046967e-07, "loss": 0.6655, "step": 28671 }, { "epoch": 0.8787544440357975, "grad_norm": 1.382172868749914, "learning_rate": 3.805785962224179e-07, "loss": 0.6466, "step": 28672 }, { "epoch": 0.8787850925585386, "grad_norm": 1.349766084891531, "learning_rate": 3.803886920564809e-07, "loss": 0.5379, "step": 28673 }, { "epoch": 0.8788157410812799, "grad_norm": 1.3492337756612198, "learning_rate": 3.8019883340875473e-07, "loss": 0.5749, "step": 28674 }, { "epoch": 0.8788463896040211, "grad_norm": 1.429785049269854, "learning_rate": 3.800090202811119e-07, "loss": 0.5731, "step": 28675 }, { "epoch": 0.8788770381267623, "grad_norm": 1.5476804561177162, "learning_rate": 3.798192526754235e-07, "loss": 0.5843, "step": 28676 }, { "epoch": 0.8789076866495035, "grad_norm": 0.4563919458011295, "learning_rate": 3.7962953059355655e-07, "loss": 0.3975, "step": 28677 }, { "epoch": 0.8789383351722447, "grad_norm": 0.4623426358464643, "learning_rate": 3.794398540373823e-07, "loss": 0.3989, "step": 28678 }, { "epoch": 0.8789689836949859, "grad_norm": 1.3634839843887832, "learning_rate": 3.7925022300877026e-07, "loss": 0.5667, "step": 28679 }, { "epoch": 0.8789996322177271, "grad_norm": 0.445906254289544, "learning_rate": 3.7906063750958734e-07, "loss": 0.3865, "step": 28680 }, { "epoch": 0.8790302807404683, "grad_norm": 1.5322625584150136, "learning_rate": 3.7887109754170315e-07, "loss": 0.6564, "step": 28681 }, { "epoch": 0.8790609292632096, "grad_norm": 1.6185140070404456, "learning_rate": 3.786816031069829e-07, "loss": 0.623, "step": 28682 }, { "epoch": 0.8790915777859507, "grad_norm": 0.4340141900252004, "learning_rate": 3.7849215420729615e-07, "loss": 0.3886, "step": 28683 }, { "epoch": 0.879122226308692, "grad_norm": 1.2966847527244472, "learning_rate": 3.7830275084450865e-07, "loss": 0.5899, "step": 28684 }, { "epoch": 0.8791528748314331, "grad_norm": 1.9524007749328238, "learning_rate": 3.7811339302048667e-07, "loss": 0.64, "step": 28685 }, { "epoch": 0.8791835233541744, "grad_norm": 1.343591871608128, "learning_rate": 3.779240807370954e-07, "loss": 0.4796, "step": 28686 }, { "epoch": 0.8792141718769155, "grad_norm": 1.3671837401638998, "learning_rate": 3.777348139962017e-07, "loss": 0.6223, "step": 28687 }, { "epoch": 0.8792448203996568, "grad_norm": 1.3170181905443206, "learning_rate": 3.775455927996685e-07, "loss": 0.5693, "step": 28688 }, { "epoch": 0.8792754689223979, "grad_norm": 1.3432861120502533, "learning_rate": 3.7735641714936157e-07, "loss": 0.6116, "step": 28689 }, { "epoch": 0.8793061174451391, "grad_norm": 0.45786473038601405, "learning_rate": 3.7716728704714547e-07, "loss": 0.385, "step": 28690 }, { "epoch": 0.8793367659678804, "grad_norm": 1.3200748074640825, "learning_rate": 3.7697820249488204e-07, "loss": 0.6365, "step": 28691 }, { "epoch": 0.8793674144906215, "grad_norm": 1.2794361368043548, "learning_rate": 3.7678916349443596e-07, "loss": 0.6091, "step": 28692 }, { "epoch": 0.8793980630133628, "grad_norm": 1.2339352371553567, "learning_rate": 3.766001700476685e-07, "loss": 0.508, "step": 28693 }, { "epoch": 0.8794287115361039, "grad_norm": 0.47404685574649985, "learning_rate": 3.7641122215644254e-07, "loss": 0.4154, "step": 28694 }, { "epoch": 0.8794593600588452, "grad_norm": 1.2537656711382392, "learning_rate": 3.7622231982262057e-07, "loss": 0.5615, "step": 28695 }, { "epoch": 0.8794900085815863, "grad_norm": 1.439810358387995, "learning_rate": 3.760334630480622e-07, "loss": 0.609, "step": 28696 }, { "epoch": 0.8795206571043276, "grad_norm": 1.2198612243904092, "learning_rate": 3.7584465183462925e-07, "loss": 0.6824, "step": 28697 }, { "epoch": 0.8795513056270687, "grad_norm": 1.2017017576176052, "learning_rate": 3.7565588618418305e-07, "loss": 0.5818, "step": 28698 }, { "epoch": 0.87958195414981, "grad_norm": 1.3856377213601638, "learning_rate": 3.7546716609858146e-07, "loss": 0.5076, "step": 28699 }, { "epoch": 0.8796126026725511, "grad_norm": 1.4971502331231237, "learning_rate": 3.752784915796853e-07, "loss": 0.6228, "step": 28700 }, { "epoch": 0.8796432511952924, "grad_norm": 1.368528439239154, "learning_rate": 3.750898626293542e-07, "loss": 0.5671, "step": 28701 }, { "epoch": 0.8796738997180336, "grad_norm": 1.3485859504207656, "learning_rate": 3.749012792494455e-07, "loss": 0.6257, "step": 28702 }, { "epoch": 0.8797045482407748, "grad_norm": 1.3432031071769301, "learning_rate": 3.7471274144181836e-07, "loss": 0.5629, "step": 28703 }, { "epoch": 0.879735196763516, "grad_norm": 0.46046473723415593, "learning_rate": 3.745242492083284e-07, "loss": 0.4017, "step": 28704 }, { "epoch": 0.8797658452862572, "grad_norm": 1.2475340599625504, "learning_rate": 3.743358025508359e-07, "loss": 0.5232, "step": 28705 }, { "epoch": 0.8797964938089984, "grad_norm": 1.3413147502131741, "learning_rate": 3.7414740147119653e-07, "loss": 0.5543, "step": 28706 }, { "epoch": 0.8798271423317396, "grad_norm": 1.385309780701896, "learning_rate": 3.739590459712661e-07, "loss": 0.7358, "step": 28707 }, { "epoch": 0.8798577908544808, "grad_norm": 1.406915305939255, "learning_rate": 3.7377073605290024e-07, "loss": 0.5901, "step": 28708 }, { "epoch": 0.879888439377222, "grad_norm": 0.4307153828469869, "learning_rate": 3.7358247171795593e-07, "loss": 0.3709, "step": 28709 }, { "epoch": 0.8799190878999632, "grad_norm": 1.2459048189894466, "learning_rate": 3.7339425296828603e-07, "loss": 0.5344, "step": 28710 }, { "epoch": 0.8799497364227045, "grad_norm": 0.4608321080906011, "learning_rate": 3.732060798057469e-07, "loss": 0.4007, "step": 28711 }, { "epoch": 0.8799803849454456, "grad_norm": 1.402647538767995, "learning_rate": 3.730179522321925e-07, "loss": 0.6588, "step": 28712 }, { "epoch": 0.8800110334681869, "grad_norm": 1.3363663682370412, "learning_rate": 3.728298702494754e-07, "loss": 0.6212, "step": 28713 }, { "epoch": 0.880041681990928, "grad_norm": 1.3209466047293252, "learning_rate": 3.726418338594506e-07, "loss": 0.6604, "step": 28714 }, { "epoch": 0.8800723305136693, "grad_norm": 1.4685781481546838, "learning_rate": 3.724538430639685e-07, "loss": 0.6415, "step": 28715 }, { "epoch": 0.8801029790364104, "grad_norm": 1.2375720428617074, "learning_rate": 3.72265897864883e-07, "loss": 0.5127, "step": 28716 }, { "epoch": 0.8801336275591517, "grad_norm": 1.5001242782774835, "learning_rate": 3.7207799826404603e-07, "loss": 0.69, "step": 28717 }, { "epoch": 0.8801642760818928, "grad_norm": 1.442862777305148, "learning_rate": 3.7189014426330826e-07, "loss": 0.555, "step": 28718 }, { "epoch": 0.8801949246046341, "grad_norm": 1.1639633092873292, "learning_rate": 3.717023358645211e-07, "loss": 0.5882, "step": 28719 }, { "epoch": 0.8802255731273753, "grad_norm": 1.450699641469456, "learning_rate": 3.715145730695358e-07, "loss": 0.6851, "step": 28720 }, { "epoch": 0.8802562216501164, "grad_norm": 1.429694384207706, "learning_rate": 3.713268558802008e-07, "loss": 0.6089, "step": 28721 }, { "epoch": 0.8802868701728577, "grad_norm": 1.3768373751961018, "learning_rate": 3.711391842983675e-07, "loss": 0.6142, "step": 28722 }, { "epoch": 0.8803175186955988, "grad_norm": 1.4577745144318337, "learning_rate": 3.709515583258821e-07, "loss": 0.6328, "step": 28723 }, { "epoch": 0.8803481672183401, "grad_norm": 1.2954110875957043, "learning_rate": 3.707639779645972e-07, "loss": 0.5947, "step": 28724 }, { "epoch": 0.8803788157410812, "grad_norm": 1.419686497157109, "learning_rate": 3.705764432163594e-07, "loss": 0.6822, "step": 28725 }, { "epoch": 0.8804094642638225, "grad_norm": 2.0876483341028758, "learning_rate": 3.703889540830158e-07, "loss": 0.6844, "step": 28726 }, { "epoch": 0.8804401127865636, "grad_norm": 1.3685735291729104, "learning_rate": 3.702015105664142e-07, "loss": 0.6011, "step": 28727 }, { "epoch": 0.8804707613093049, "grad_norm": 0.4547719040110778, "learning_rate": 3.70014112668402e-07, "loss": 0.4016, "step": 28728 }, { "epoch": 0.8805014098320461, "grad_norm": 1.4083550563558935, "learning_rate": 3.698267603908251e-07, "loss": 0.5919, "step": 28729 }, { "epoch": 0.8805320583547873, "grad_norm": 1.226024829214193, "learning_rate": 3.696394537355297e-07, "loss": 0.6319, "step": 28730 }, { "epoch": 0.8805627068775285, "grad_norm": 1.3929070020286247, "learning_rate": 3.694521927043615e-07, "loss": 0.5568, "step": 28731 }, { "epoch": 0.8805933554002697, "grad_norm": 0.48291325603211277, "learning_rate": 3.6926497729916633e-07, "loss": 0.3968, "step": 28732 }, { "epoch": 0.8806240039230109, "grad_norm": 1.2501030483710227, "learning_rate": 3.6907780752178877e-07, "loss": 0.6067, "step": 28733 }, { "epoch": 0.8806546524457521, "grad_norm": 1.3621684079909118, "learning_rate": 3.688906833740702e-07, "loss": 0.6202, "step": 28734 }, { "epoch": 0.8806853009684933, "grad_norm": 1.4565546640733784, "learning_rate": 3.687036048578585e-07, "loss": 0.5696, "step": 28735 }, { "epoch": 0.8807159494912346, "grad_norm": 1.3959488820789996, "learning_rate": 3.6851657197499503e-07, "loss": 0.5801, "step": 28736 }, { "epoch": 0.8807465980139757, "grad_norm": 1.2935164883972583, "learning_rate": 3.683295847273216e-07, "loss": 0.5929, "step": 28737 }, { "epoch": 0.880777246536717, "grad_norm": 1.3668645411004372, "learning_rate": 3.6814264311668235e-07, "loss": 0.6147, "step": 28738 }, { "epoch": 0.8808078950594581, "grad_norm": 1.3352402242484571, "learning_rate": 3.6795574714491966e-07, "loss": 0.6138, "step": 28739 }, { "epoch": 0.8808385435821994, "grad_norm": 1.4120317479460938, "learning_rate": 3.677688968138732e-07, "loss": 0.5938, "step": 28740 }, { "epoch": 0.8808691921049405, "grad_norm": 1.3035759421372872, "learning_rate": 3.675820921253848e-07, "loss": 0.6392, "step": 28741 }, { "epoch": 0.8808998406276818, "grad_norm": 1.4121183824960188, "learning_rate": 3.673953330812952e-07, "loss": 0.6098, "step": 28742 }, { "epoch": 0.8809304891504229, "grad_norm": 1.3539061500945921, "learning_rate": 3.6720861968344567e-07, "loss": 0.5795, "step": 28743 }, { "epoch": 0.8809611376731642, "grad_norm": 1.4311308485465384, "learning_rate": 3.670219519336754e-07, "loss": 0.657, "step": 28744 }, { "epoch": 0.8809917861959053, "grad_norm": 1.2399343220292889, "learning_rate": 3.668353298338212e-07, "loss": 0.4686, "step": 28745 }, { "epoch": 0.8810224347186466, "grad_norm": 1.2143425142745716, "learning_rate": 3.6664875338572546e-07, "loss": 0.6087, "step": 28746 }, { "epoch": 0.8810530832413878, "grad_norm": 0.43701297480970924, "learning_rate": 3.664622225912251e-07, "loss": 0.3726, "step": 28747 }, { "epoch": 0.881083731764129, "grad_norm": 1.1946098696215546, "learning_rate": 3.662757374521575e-07, "loss": 0.4702, "step": 28748 }, { "epoch": 0.8811143802868702, "grad_norm": 1.4240434867754992, "learning_rate": 3.660892979703601e-07, "loss": 0.6337, "step": 28749 }, { "epoch": 0.8811450288096114, "grad_norm": 1.6085813225168764, "learning_rate": 3.6590290414767084e-07, "loss": 0.6452, "step": 28750 }, { "epoch": 0.8811756773323526, "grad_norm": 1.2818489592829938, "learning_rate": 3.6571655598592715e-07, "loss": 0.5624, "step": 28751 }, { "epoch": 0.8812063258550937, "grad_norm": 1.2851947600741829, "learning_rate": 3.6553025348696256e-07, "loss": 0.5632, "step": 28752 }, { "epoch": 0.881236974377835, "grad_norm": 1.3765148717797124, "learning_rate": 3.6534399665261454e-07, "loss": 0.5604, "step": 28753 }, { "epoch": 0.8812676229005761, "grad_norm": 1.5403909248245535, "learning_rate": 3.6515778548471824e-07, "loss": 0.706, "step": 28754 }, { "epoch": 0.8812982714233174, "grad_norm": 1.2304354241266557, "learning_rate": 3.6497161998510833e-07, "loss": 0.5881, "step": 28755 }, { "epoch": 0.8813289199460586, "grad_norm": 0.4466716658193121, "learning_rate": 3.6478550015561775e-07, "loss": 0.3835, "step": 28756 }, { "epoch": 0.8813595684687998, "grad_norm": 1.352794243212733, "learning_rate": 3.6459942599808285e-07, "loss": 0.5829, "step": 28757 }, { "epoch": 0.881390216991541, "grad_norm": 1.4959891406825492, "learning_rate": 3.6441339751433546e-07, "loss": 0.685, "step": 28758 }, { "epoch": 0.8814208655142822, "grad_norm": 0.4670514955724507, "learning_rate": 3.6422741470620913e-07, "loss": 0.3958, "step": 28759 }, { "epoch": 0.8814515140370234, "grad_norm": 1.5268690418110922, "learning_rate": 3.640414775755358e-07, "loss": 0.6563, "step": 28760 }, { "epoch": 0.8814821625597646, "grad_norm": 1.3825816416012764, "learning_rate": 3.638555861241477e-07, "loss": 0.6114, "step": 28761 }, { "epoch": 0.8815128110825058, "grad_norm": 1.4152084542271017, "learning_rate": 3.636697403538775e-07, "loss": 0.6888, "step": 28762 }, { "epoch": 0.881543459605247, "grad_norm": 1.5063284002907704, "learning_rate": 3.634839402665552e-07, "loss": 0.6969, "step": 28763 }, { "epoch": 0.8815741081279882, "grad_norm": 1.3567395155239228, "learning_rate": 3.632981858640117e-07, "loss": 0.6401, "step": 28764 }, { "epoch": 0.8816047566507295, "grad_norm": 1.3002609939667347, "learning_rate": 3.6311247714807815e-07, "loss": 0.5305, "step": 28765 }, { "epoch": 0.8816354051734706, "grad_norm": 1.3560920527333367, "learning_rate": 3.6292681412058384e-07, "loss": 0.582, "step": 28766 }, { "epoch": 0.8816660536962119, "grad_norm": 1.354084590736668, "learning_rate": 3.6274119678335775e-07, "loss": 0.5281, "step": 28767 }, { "epoch": 0.881696702218953, "grad_norm": 1.1797277785391256, "learning_rate": 3.625556251382284e-07, "loss": 0.5896, "step": 28768 }, { "epoch": 0.8817273507416943, "grad_norm": 1.2842904236869126, "learning_rate": 3.623700991870255e-07, "loss": 0.5921, "step": 28769 }, { "epoch": 0.8817579992644354, "grad_norm": 1.199642832746155, "learning_rate": 3.6218461893157753e-07, "loss": 0.5585, "step": 28770 }, { "epoch": 0.8817886477871767, "grad_norm": 1.4413585788688226, "learning_rate": 3.619991843737097e-07, "loss": 0.6377, "step": 28771 }, { "epoch": 0.8818192963099178, "grad_norm": 1.3379513500734361, "learning_rate": 3.618137955152512e-07, "loss": 0.5765, "step": 28772 }, { "epoch": 0.8818499448326591, "grad_norm": 1.3880439417621733, "learning_rate": 3.616284523580288e-07, "loss": 0.5121, "step": 28773 }, { "epoch": 0.8818805933554003, "grad_norm": 1.423211342938422, "learning_rate": 3.6144315490386774e-07, "loss": 0.5954, "step": 28774 }, { "epoch": 0.8819112418781415, "grad_norm": 1.459038829878922, "learning_rate": 3.6125790315459263e-07, "loss": 0.5355, "step": 28775 }, { "epoch": 0.8819418904008827, "grad_norm": 1.5593645947225732, "learning_rate": 3.610726971120321e-07, "loss": 0.5672, "step": 28776 }, { "epoch": 0.8819725389236239, "grad_norm": 1.2621237905334137, "learning_rate": 3.608875367780079e-07, "loss": 0.6095, "step": 28777 }, { "epoch": 0.8820031874463651, "grad_norm": 0.44228409296940613, "learning_rate": 3.60702422154347e-07, "loss": 0.3897, "step": 28778 }, { "epoch": 0.8820338359691063, "grad_norm": 1.2729268831546663, "learning_rate": 3.605173532428713e-07, "loss": 0.5525, "step": 28779 }, { "epoch": 0.8820644844918475, "grad_norm": 0.43327365781734306, "learning_rate": 3.6033233004540534e-07, "loss": 0.362, "step": 28780 }, { "epoch": 0.8820951330145888, "grad_norm": 1.2134568325312454, "learning_rate": 3.601473525637728e-07, "loss": 0.4852, "step": 28781 }, { "epoch": 0.8821257815373299, "grad_norm": 1.3541860897315978, "learning_rate": 3.599624207997943e-07, "loss": 0.6225, "step": 28782 }, { "epoch": 0.882156430060071, "grad_norm": 1.1676681112465397, "learning_rate": 3.597775347552934e-07, "loss": 0.5146, "step": 28783 }, { "epoch": 0.8821870785828123, "grad_norm": 1.3588941774515613, "learning_rate": 3.5959269443209267e-07, "loss": 0.7073, "step": 28784 }, { "epoch": 0.8822177271055535, "grad_norm": 1.3581814201785853, "learning_rate": 3.5940789983201274e-07, "loss": 0.5686, "step": 28785 }, { "epoch": 0.8822483756282947, "grad_norm": 1.4266882424682767, "learning_rate": 3.592231509568722e-07, "loss": 0.6506, "step": 28786 }, { "epoch": 0.8822790241510359, "grad_norm": 1.3295075983051052, "learning_rate": 3.5903844780849464e-07, "loss": 0.5723, "step": 28787 }, { "epoch": 0.8823096726737771, "grad_norm": 1.457013436271413, "learning_rate": 3.588537903886985e-07, "loss": 0.5721, "step": 28788 }, { "epoch": 0.8823403211965183, "grad_norm": 1.3359532607541855, "learning_rate": 3.5866917869930405e-07, "loss": 0.6189, "step": 28789 }, { "epoch": 0.8823709697192595, "grad_norm": 1.3413812880618838, "learning_rate": 3.584846127421288e-07, "loss": 0.5924, "step": 28790 }, { "epoch": 0.8824016182420007, "grad_norm": 1.3198828766764328, "learning_rate": 3.583000925189922e-07, "loss": 0.6054, "step": 28791 }, { "epoch": 0.882432266764742, "grad_norm": 1.1627686778540571, "learning_rate": 3.5811561803171304e-07, "loss": 0.5384, "step": 28792 }, { "epoch": 0.8824629152874831, "grad_norm": 1.2244260837022172, "learning_rate": 3.5793118928210803e-07, "loss": 0.6407, "step": 28793 }, { "epoch": 0.8824935638102244, "grad_norm": 1.2514482504652091, "learning_rate": 3.577468062719941e-07, "loss": 0.5281, "step": 28794 }, { "epoch": 0.8825242123329655, "grad_norm": 1.3728077897924165, "learning_rate": 3.5756246900319034e-07, "loss": 0.614, "step": 28795 }, { "epoch": 0.8825548608557068, "grad_norm": 1.3793614134281489, "learning_rate": 3.573781774775098e-07, "loss": 0.5878, "step": 28796 }, { "epoch": 0.8825855093784479, "grad_norm": 1.2642005231136193, "learning_rate": 3.571939316967704e-07, "loss": 0.5346, "step": 28797 }, { "epoch": 0.8826161579011892, "grad_norm": 1.3297687761281323, "learning_rate": 3.5700973166278795e-07, "loss": 0.4901, "step": 28798 }, { "epoch": 0.8826468064239303, "grad_norm": 1.2914543372184941, "learning_rate": 3.5682557737737546e-07, "loss": 0.5855, "step": 28799 }, { "epoch": 0.8826774549466716, "grad_norm": 1.6520098732349495, "learning_rate": 3.566414688423492e-07, "loss": 0.6376, "step": 28800 }, { "epoch": 0.8827081034694128, "grad_norm": 1.3214405398543456, "learning_rate": 3.564574060595222e-07, "loss": 0.6623, "step": 28801 }, { "epoch": 0.882738751992154, "grad_norm": 1.5046065681026013, "learning_rate": 3.562733890307085e-07, "loss": 0.6469, "step": 28802 }, { "epoch": 0.8827694005148952, "grad_norm": 1.237999156444247, "learning_rate": 3.5608941775772175e-07, "loss": 0.5773, "step": 28803 }, { "epoch": 0.8828000490376364, "grad_norm": 1.2985113269771917, "learning_rate": 3.559054922423738e-07, "loss": 0.5559, "step": 28804 }, { "epoch": 0.8828306975603776, "grad_norm": 1.306928212269488, "learning_rate": 3.5572161248647705e-07, "loss": 0.5868, "step": 28805 }, { "epoch": 0.8828613460831188, "grad_norm": 1.3114990995610303, "learning_rate": 3.5553777849184403e-07, "loss": 0.5626, "step": 28806 }, { "epoch": 0.88289199460586, "grad_norm": 0.4707770953917505, "learning_rate": 3.5535399026028537e-07, "loss": 0.4022, "step": 28807 }, { "epoch": 0.8829226431286012, "grad_norm": 1.4080314070893523, "learning_rate": 3.55170247793612e-07, "loss": 0.6573, "step": 28808 }, { "epoch": 0.8829532916513424, "grad_norm": 1.351197174999915, "learning_rate": 3.549865510936351e-07, "loss": 0.537, "step": 28809 }, { "epoch": 0.8829839401740837, "grad_norm": 1.3289321360862318, "learning_rate": 3.548029001621639e-07, "loss": 0.5882, "step": 28810 }, { "epoch": 0.8830145886968248, "grad_norm": 1.1910807507228052, "learning_rate": 3.5461929500100857e-07, "loss": 0.642, "step": 28811 }, { "epoch": 0.8830452372195661, "grad_norm": 1.3102278566197665, "learning_rate": 3.5443573561197763e-07, "loss": 0.5716, "step": 28812 }, { "epoch": 0.8830758857423072, "grad_norm": 1.388929804459608, "learning_rate": 3.542522219968797e-07, "loss": 0.5882, "step": 28813 }, { "epoch": 0.8831065342650484, "grad_norm": 1.4083323042919988, "learning_rate": 3.5406875415752386e-07, "loss": 0.693, "step": 28814 }, { "epoch": 0.8831371827877896, "grad_norm": 0.4479811257126884, "learning_rate": 3.5388533209571696e-07, "loss": 0.4067, "step": 28815 }, { "epoch": 0.8831678313105308, "grad_norm": 1.2465505005148756, "learning_rate": 3.537019558132665e-07, "loss": 0.6708, "step": 28816 }, { "epoch": 0.883198479833272, "grad_norm": 0.44636007809834344, "learning_rate": 3.535186253119799e-07, "loss": 0.3836, "step": 28817 }, { "epoch": 0.8832291283560132, "grad_norm": 1.3722319129077896, "learning_rate": 3.5333534059366294e-07, "loss": 0.5887, "step": 28818 }, { "epoch": 0.8832597768787545, "grad_norm": 1.2586296859816426, "learning_rate": 3.5315210166012195e-07, "loss": 0.4841, "step": 28819 }, { "epoch": 0.8832904254014956, "grad_norm": 1.4177580118596318, "learning_rate": 3.5296890851316154e-07, "loss": 0.6249, "step": 28820 }, { "epoch": 0.8833210739242369, "grad_norm": 1.2306138999547485, "learning_rate": 3.5278576115458817e-07, "loss": 0.5978, "step": 28821 }, { "epoch": 0.883351722446978, "grad_norm": 1.3318226469555428, "learning_rate": 3.5260265958620586e-07, "loss": 0.6043, "step": 28822 }, { "epoch": 0.8833823709697193, "grad_norm": 1.5176325955305818, "learning_rate": 3.524196038098182e-07, "loss": 0.6907, "step": 28823 }, { "epoch": 0.8834130194924604, "grad_norm": 0.44252363304963566, "learning_rate": 3.5223659382722875e-07, "loss": 0.3665, "step": 28824 }, { "epoch": 0.8834436680152017, "grad_norm": 0.45242894050539306, "learning_rate": 3.520536296402427e-07, "loss": 0.4005, "step": 28825 }, { "epoch": 0.8834743165379428, "grad_norm": 1.383069818330756, "learning_rate": 3.518707112506603e-07, "loss": 0.5837, "step": 28826 }, { "epoch": 0.8835049650606841, "grad_norm": 0.45419352659862106, "learning_rate": 3.516878386602857e-07, "loss": 0.3815, "step": 28827 }, { "epoch": 0.8835356135834253, "grad_norm": 1.348514238827356, "learning_rate": 3.5150501187092013e-07, "loss": 0.6136, "step": 28828 }, { "epoch": 0.8835662621061665, "grad_norm": 1.5198087568725533, "learning_rate": 3.51322230884365e-07, "loss": 0.6704, "step": 28829 }, { "epoch": 0.8835969106289077, "grad_norm": 1.1696837519924048, "learning_rate": 3.511394957024217e-07, "loss": 0.6076, "step": 28830 }, { "epoch": 0.8836275591516489, "grad_norm": 1.347245531831518, "learning_rate": 3.5095680632688867e-07, "loss": 0.5642, "step": 28831 }, { "epoch": 0.8836582076743901, "grad_norm": 1.284689729872567, "learning_rate": 3.5077416275956956e-07, "loss": 0.6092, "step": 28832 }, { "epoch": 0.8836888561971313, "grad_norm": 1.3268920187092499, "learning_rate": 3.5059156500226235e-07, "loss": 0.6856, "step": 28833 }, { "epoch": 0.8837195047198725, "grad_norm": 1.3799556808722921, "learning_rate": 3.50409013056765e-07, "loss": 0.6169, "step": 28834 }, { "epoch": 0.8837501532426137, "grad_norm": 1.4654760836465295, "learning_rate": 3.5022650692487725e-07, "loss": 0.6972, "step": 28835 }, { "epoch": 0.8837808017653549, "grad_norm": 1.390302099372588, "learning_rate": 3.500440466083982e-07, "loss": 0.6545, "step": 28836 }, { "epoch": 0.8838114502880962, "grad_norm": 2.046082261699581, "learning_rate": 3.498616321091242e-07, "loss": 0.5853, "step": 28837 }, { "epoch": 0.8838420988108373, "grad_norm": 0.4290293833759216, "learning_rate": 3.4967926342885317e-07, "loss": 0.3915, "step": 28838 }, { "epoch": 0.8838727473335786, "grad_norm": 1.39264086105387, "learning_rate": 3.4949694056938324e-07, "loss": 0.5691, "step": 28839 }, { "epoch": 0.8839033958563197, "grad_norm": 1.3483756932298694, "learning_rate": 3.49314663532509e-07, "loss": 0.6017, "step": 28840 }, { "epoch": 0.883934044379061, "grad_norm": 0.4394636660706482, "learning_rate": 3.4913243232002846e-07, "loss": 0.388, "step": 28841 }, { "epoch": 0.8839646929018021, "grad_norm": 1.3704096801022576, "learning_rate": 3.489502469337336e-07, "loss": 0.5403, "step": 28842 }, { "epoch": 0.8839953414245434, "grad_norm": 1.3459846853975908, "learning_rate": 3.48768107375424e-07, "loss": 0.6794, "step": 28843 }, { "epoch": 0.8840259899472845, "grad_norm": 1.162192643162646, "learning_rate": 3.485860136468927e-07, "loss": 0.5912, "step": 28844 }, { "epoch": 0.8840566384700257, "grad_norm": 1.2877124323869418, "learning_rate": 3.4840396574993217e-07, "loss": 0.5602, "step": 28845 }, { "epoch": 0.884087286992767, "grad_norm": 0.4249686523052875, "learning_rate": 3.4822196368633767e-07, "loss": 0.3843, "step": 28846 }, { "epoch": 0.8841179355155081, "grad_norm": 1.460540975584031, "learning_rate": 3.480400074579032e-07, "loss": 0.5836, "step": 28847 }, { "epoch": 0.8841485840382494, "grad_norm": 1.1440639940037112, "learning_rate": 3.4785809706642027e-07, "loss": 0.555, "step": 28848 }, { "epoch": 0.8841792325609905, "grad_norm": 0.43836199966959294, "learning_rate": 3.476762325136812e-07, "loss": 0.3897, "step": 28849 }, { "epoch": 0.8842098810837318, "grad_norm": 1.5693809453415306, "learning_rate": 3.4749441380147906e-07, "loss": 0.578, "step": 28850 }, { "epoch": 0.8842405296064729, "grad_norm": 0.4479807793397852, "learning_rate": 3.4731264093160574e-07, "loss": 0.396, "step": 28851 }, { "epoch": 0.8842711781292142, "grad_norm": 1.3446990309730595, "learning_rate": 3.4713091390585096e-07, "loss": 0.6004, "step": 28852 }, { "epoch": 0.8843018266519553, "grad_norm": 1.427649610405494, "learning_rate": 3.469492327260043e-07, "loss": 0.6822, "step": 28853 }, { "epoch": 0.8843324751746966, "grad_norm": 1.3061063265005681, "learning_rate": 3.4676759739385946e-07, "loss": 0.6433, "step": 28854 }, { "epoch": 0.8843631236974377, "grad_norm": 0.4517160348779728, "learning_rate": 3.465860079112032e-07, "loss": 0.3951, "step": 28855 }, { "epoch": 0.884393772220179, "grad_norm": 1.27607394358637, "learning_rate": 3.464044642798259e-07, "loss": 0.5234, "step": 28856 }, { "epoch": 0.8844244207429202, "grad_norm": 1.3893528160188082, "learning_rate": 3.4622296650151545e-07, "loss": 0.6025, "step": 28857 }, { "epoch": 0.8844550692656614, "grad_norm": 1.3357494990115433, "learning_rate": 3.460415145780605e-07, "loss": 0.5758, "step": 28858 }, { "epoch": 0.8844857177884026, "grad_norm": 1.4175769480766103, "learning_rate": 3.4586010851125063e-07, "loss": 0.6604, "step": 28859 }, { "epoch": 0.8845163663111438, "grad_norm": 1.7194603845044159, "learning_rate": 3.4567874830287116e-07, "loss": 0.6643, "step": 28860 }, { "epoch": 0.884547014833885, "grad_norm": 1.4054389507116487, "learning_rate": 3.454974339547096e-07, "loss": 0.604, "step": 28861 }, { "epoch": 0.8845776633566262, "grad_norm": 1.4613539288525563, "learning_rate": 3.453161654685533e-07, "loss": 0.5684, "step": 28862 }, { "epoch": 0.8846083118793674, "grad_norm": 1.4008022581638695, "learning_rate": 3.451349428461881e-07, "loss": 0.688, "step": 28863 }, { "epoch": 0.8846389604021087, "grad_norm": 1.3081170231210322, "learning_rate": 3.449537660893987e-07, "loss": 0.6432, "step": 28864 }, { "epoch": 0.8846696089248498, "grad_norm": 1.716509247223222, "learning_rate": 3.447726351999703e-07, "loss": 0.6179, "step": 28865 }, { "epoch": 0.8847002574475911, "grad_norm": 1.4290720745676428, "learning_rate": 3.4459155017968925e-07, "loss": 0.6308, "step": 28866 }, { "epoch": 0.8847309059703322, "grad_norm": 1.3806936661487899, "learning_rate": 3.4441051103033807e-07, "loss": 0.5766, "step": 28867 }, { "epoch": 0.8847615544930735, "grad_norm": 1.2654612088793213, "learning_rate": 3.442295177537014e-07, "loss": 0.6073, "step": 28868 }, { "epoch": 0.8847922030158146, "grad_norm": 1.4634988473801398, "learning_rate": 3.4404857035156226e-07, "loss": 0.7097, "step": 28869 }, { "epoch": 0.8848228515385559, "grad_norm": 1.2331646064917152, "learning_rate": 3.438676688257053e-07, "loss": 0.5905, "step": 28870 }, { "epoch": 0.884853500061297, "grad_norm": 1.254799309748049, "learning_rate": 3.4368681317791086e-07, "loss": 0.627, "step": 28871 }, { "epoch": 0.8848841485840383, "grad_norm": 1.528146994739042, "learning_rate": 3.4350600340996023e-07, "loss": 0.6395, "step": 28872 }, { "epoch": 0.8849147971067794, "grad_norm": 1.4749187296966149, "learning_rate": 3.433252395236381e-07, "loss": 0.6565, "step": 28873 }, { "epoch": 0.8849454456295207, "grad_norm": 1.410072035045765, "learning_rate": 3.4314452152072354e-07, "loss": 0.5387, "step": 28874 }, { "epoch": 0.8849760941522619, "grad_norm": 0.43723165646250284, "learning_rate": 3.4296384940299687e-07, "loss": 0.3809, "step": 28875 }, { "epoch": 0.885006742675003, "grad_norm": 1.273287627949079, "learning_rate": 3.427832231722389e-07, "loss": 0.4902, "step": 28876 }, { "epoch": 0.8850373911977443, "grad_norm": 0.4501322243276845, "learning_rate": 3.4260264283022926e-07, "loss": 0.3884, "step": 28877 }, { "epoch": 0.8850680397204854, "grad_norm": 1.55074109436945, "learning_rate": 3.4242210837874876e-07, "loss": 0.5608, "step": 28878 }, { "epoch": 0.8850986882432267, "grad_norm": 1.7015584222164095, "learning_rate": 3.422416198195738e-07, "loss": 0.6139, "step": 28879 }, { "epoch": 0.8851293367659678, "grad_norm": 1.2849872908402888, "learning_rate": 3.420611771544835e-07, "loss": 0.6144, "step": 28880 }, { "epoch": 0.8851599852887091, "grad_norm": 1.3891410146180379, "learning_rate": 3.418807803852575e-07, "loss": 0.5643, "step": 28881 }, { "epoch": 0.8851906338114502, "grad_norm": 1.2619557327371551, "learning_rate": 3.4170042951367224e-07, "loss": 0.6183, "step": 28882 }, { "epoch": 0.8852212823341915, "grad_norm": 0.47668736673122164, "learning_rate": 3.415201245415023e-07, "loss": 0.3937, "step": 28883 }, { "epoch": 0.8852519308569327, "grad_norm": 0.4460485605957854, "learning_rate": 3.4133986547052855e-07, "loss": 0.3989, "step": 28884 }, { "epoch": 0.8852825793796739, "grad_norm": 1.197567226855666, "learning_rate": 3.4115965230252404e-07, "loss": 0.4835, "step": 28885 }, { "epoch": 0.8853132279024151, "grad_norm": 0.4433308252160093, "learning_rate": 3.4097948503926613e-07, "loss": 0.3793, "step": 28886 }, { "epoch": 0.8853438764251563, "grad_norm": 1.3904102536483662, "learning_rate": 3.407993636825291e-07, "loss": 0.5892, "step": 28887 }, { "epoch": 0.8853745249478975, "grad_norm": 2.551866622377069, "learning_rate": 3.406192882340875e-07, "loss": 0.606, "step": 28888 }, { "epoch": 0.8854051734706387, "grad_norm": 1.4538285065078322, "learning_rate": 3.4043925869571724e-07, "loss": 0.6725, "step": 28889 }, { "epoch": 0.8854358219933799, "grad_norm": 1.1910604660743054, "learning_rate": 3.4025927506919075e-07, "loss": 0.5855, "step": 28890 }, { "epoch": 0.8854664705161212, "grad_norm": 1.4629012815309346, "learning_rate": 3.4007933735628163e-07, "loss": 0.6445, "step": 28891 }, { "epoch": 0.8854971190388623, "grad_norm": 0.4374198416271927, "learning_rate": 3.398994455587634e-07, "loss": 0.3789, "step": 28892 }, { "epoch": 0.8855277675616036, "grad_norm": 1.3105141996698635, "learning_rate": 3.397195996784092e-07, "loss": 0.6285, "step": 28893 }, { "epoch": 0.8855584160843447, "grad_norm": 1.4458058714775575, "learning_rate": 3.39539799716988e-07, "loss": 0.5717, "step": 28894 }, { "epoch": 0.885589064607086, "grad_norm": 1.344045781963876, "learning_rate": 3.3936004567627523e-07, "loss": 0.5222, "step": 28895 }, { "epoch": 0.8856197131298271, "grad_norm": 0.434395024856949, "learning_rate": 3.391803375580394e-07, "loss": 0.3932, "step": 28896 }, { "epoch": 0.8856503616525684, "grad_norm": 1.3743166557741464, "learning_rate": 3.3900067536405346e-07, "loss": 0.5486, "step": 28897 }, { "epoch": 0.8856810101753095, "grad_norm": 1.3449016785721473, "learning_rate": 3.3882105909608497e-07, "loss": 0.5928, "step": 28898 }, { "epoch": 0.8857116586980508, "grad_norm": 1.36436606345915, "learning_rate": 3.386414887559059e-07, "loss": 0.6172, "step": 28899 }, { "epoch": 0.885742307220792, "grad_norm": 0.4378069746324427, "learning_rate": 3.384619643452852e-07, "loss": 0.3921, "step": 28900 }, { "epoch": 0.8857729557435332, "grad_norm": 0.4595845370895016, "learning_rate": 3.3828248586599113e-07, "loss": 0.3902, "step": 28901 }, { "epoch": 0.8858036042662744, "grad_norm": 1.2759043627619193, "learning_rate": 3.381030533197921e-07, "loss": 0.6158, "step": 28902 }, { "epoch": 0.8858342527890156, "grad_norm": 1.17142507405238, "learning_rate": 3.379236667084573e-07, "loss": 0.5845, "step": 28903 }, { "epoch": 0.8858649013117568, "grad_norm": 1.385005912957353, "learning_rate": 3.377443260337532e-07, "loss": 0.596, "step": 28904 }, { "epoch": 0.885895549834498, "grad_norm": 1.3707726814209018, "learning_rate": 3.375650312974466e-07, "loss": 0.5716, "step": 28905 }, { "epoch": 0.8859261983572392, "grad_norm": 0.42588298809687114, "learning_rate": 3.3738578250130547e-07, "loss": 0.3933, "step": 28906 }, { "epoch": 0.8859568468799803, "grad_norm": 1.4443392185731732, "learning_rate": 3.372065796470947e-07, "loss": 0.4958, "step": 28907 }, { "epoch": 0.8859874954027216, "grad_norm": 1.3193416073427633, "learning_rate": 3.370274227365811e-07, "loss": 0.5515, "step": 28908 }, { "epoch": 0.8860181439254627, "grad_norm": 0.4800566466508028, "learning_rate": 3.3684831177152876e-07, "loss": 0.395, "step": 28909 }, { "epoch": 0.886048792448204, "grad_norm": 1.3399211114875287, "learning_rate": 3.3666924675370307e-07, "loss": 0.5817, "step": 28910 }, { "epoch": 0.8860794409709452, "grad_norm": 1.266776713432722, "learning_rate": 3.3649022768486917e-07, "loss": 0.5106, "step": 28911 }, { "epoch": 0.8861100894936864, "grad_norm": 1.451371791729598, "learning_rate": 3.363112545667896e-07, "loss": 0.6174, "step": 28912 }, { "epoch": 0.8861407380164276, "grad_norm": 1.2866415156133424, "learning_rate": 3.361323274012279e-07, "loss": 0.5615, "step": 28913 }, { "epoch": 0.8861713865391688, "grad_norm": 1.3013414926031242, "learning_rate": 3.359534461899494e-07, "loss": 0.6247, "step": 28914 }, { "epoch": 0.88620203506191, "grad_norm": 1.8149119438766117, "learning_rate": 3.3577461093471376e-07, "loss": 0.753, "step": 28915 }, { "epoch": 0.8862326835846512, "grad_norm": 1.3615173329911183, "learning_rate": 3.3559582163728456e-07, "loss": 0.6256, "step": 28916 }, { "epoch": 0.8862633321073924, "grad_norm": 1.3770992976413297, "learning_rate": 3.3541707829942314e-07, "loss": 0.5435, "step": 28917 }, { "epoch": 0.8862939806301336, "grad_norm": 1.4720579970738783, "learning_rate": 3.352383809228904e-07, "loss": 0.576, "step": 28918 }, { "epoch": 0.8863246291528748, "grad_norm": 1.4973589841467572, "learning_rate": 3.350597295094482e-07, "loss": 0.5623, "step": 28919 }, { "epoch": 0.8863552776756161, "grad_norm": 1.247354394095301, "learning_rate": 3.348811240608552e-07, "loss": 0.5505, "step": 28920 }, { "epoch": 0.8863859261983572, "grad_norm": 1.3520929523107423, "learning_rate": 3.347025645788726e-07, "loss": 0.6247, "step": 28921 }, { "epoch": 0.8864165747210985, "grad_norm": 1.3252968391032334, "learning_rate": 3.345240510652592e-07, "loss": 0.6611, "step": 28922 }, { "epoch": 0.8864472232438396, "grad_norm": 1.255659805725923, "learning_rate": 3.3434558352177403e-07, "loss": 0.5982, "step": 28923 }, { "epoch": 0.8864778717665809, "grad_norm": 1.4376505175946672, "learning_rate": 3.341671619501752e-07, "loss": 0.5505, "step": 28924 }, { "epoch": 0.886508520289322, "grad_norm": 1.4242566372960226, "learning_rate": 3.339887863522223e-07, "loss": 0.6672, "step": 28925 }, { "epoch": 0.8865391688120633, "grad_norm": 1.4066643438989461, "learning_rate": 3.338104567296707e-07, "loss": 0.6637, "step": 28926 }, { "epoch": 0.8865698173348044, "grad_norm": 1.329161697616492, "learning_rate": 3.33632173084279e-07, "loss": 0.6266, "step": 28927 }, { "epoch": 0.8866004658575457, "grad_norm": 1.3594174989239314, "learning_rate": 3.334539354178029e-07, "loss": 0.6066, "step": 28928 }, { "epoch": 0.8866311143802869, "grad_norm": 1.2357539628343905, "learning_rate": 3.3327574373199946e-07, "loss": 0.4875, "step": 28929 }, { "epoch": 0.8866617629030281, "grad_norm": 0.4619914350000905, "learning_rate": 3.3309759802862496e-07, "loss": 0.3885, "step": 28930 }, { "epoch": 0.8866924114257693, "grad_norm": 1.3626609662575728, "learning_rate": 3.329194983094325e-07, "loss": 0.6568, "step": 28931 }, { "epoch": 0.8867230599485105, "grad_norm": 1.412132399225043, "learning_rate": 3.3274144457617897e-07, "loss": 0.6429, "step": 28932 }, { "epoch": 0.8867537084712517, "grad_norm": 1.229724102694108, "learning_rate": 3.3256343683061854e-07, "loss": 0.5564, "step": 28933 }, { "epoch": 0.8867843569939929, "grad_norm": 1.542763971990283, "learning_rate": 3.3238547507450425e-07, "loss": 0.551, "step": 28934 }, { "epoch": 0.8868150055167341, "grad_norm": 1.4179414499419325, "learning_rate": 3.3220755930959025e-07, "loss": 0.5373, "step": 28935 }, { "epoch": 0.8868456540394754, "grad_norm": 0.4797761988061275, "learning_rate": 3.3202968953763015e-07, "loss": 0.422, "step": 28936 }, { "epoch": 0.8868763025622165, "grad_norm": 0.47467811125921117, "learning_rate": 3.3185186576037474e-07, "loss": 0.4035, "step": 28937 }, { "epoch": 0.8869069510849577, "grad_norm": 1.1425863959241793, "learning_rate": 3.316740879795782e-07, "loss": 0.5138, "step": 28938 }, { "epoch": 0.8869375996076989, "grad_norm": 1.232949496320869, "learning_rate": 3.3149635619699026e-07, "loss": 0.6431, "step": 28939 }, { "epoch": 0.8869682481304401, "grad_norm": 1.3320396941589188, "learning_rate": 3.3131867041436394e-07, "loss": 0.6956, "step": 28940 }, { "epoch": 0.8869988966531813, "grad_norm": 1.6214710107391812, "learning_rate": 3.3114103063345006e-07, "loss": 0.6436, "step": 28941 }, { "epoch": 0.8870295451759225, "grad_norm": 1.422243720372171, "learning_rate": 3.3096343685599717e-07, "loss": 0.5919, "step": 28942 }, { "epoch": 0.8870601936986637, "grad_norm": 1.4513156574168253, "learning_rate": 3.3078588908375565e-07, "loss": 0.561, "step": 28943 }, { "epoch": 0.8870908422214049, "grad_norm": 1.4278943582618575, "learning_rate": 3.3060838731847676e-07, "loss": 0.6264, "step": 28944 }, { "epoch": 0.8871214907441461, "grad_norm": 1.3786125217640923, "learning_rate": 3.3043093156190754e-07, "loss": 0.6566, "step": 28945 }, { "epoch": 0.8871521392668873, "grad_norm": 1.3348091158604947, "learning_rate": 3.302535218157965e-07, "loss": 0.5298, "step": 28946 }, { "epoch": 0.8871827877896286, "grad_norm": 1.5546368031572795, "learning_rate": 3.300761580818934e-07, "loss": 0.6482, "step": 28947 }, { "epoch": 0.8872134363123697, "grad_norm": 1.41909152242225, "learning_rate": 3.298988403619441e-07, "loss": 0.6093, "step": 28948 }, { "epoch": 0.887244084835111, "grad_norm": 1.4482794340742005, "learning_rate": 3.297215686576971e-07, "loss": 0.6426, "step": 28949 }, { "epoch": 0.8872747333578521, "grad_norm": 1.291596735637904, "learning_rate": 3.2954434297089775e-07, "loss": 0.5877, "step": 28950 }, { "epoch": 0.8873053818805934, "grad_norm": 0.46988621649033513, "learning_rate": 3.29367163303293e-07, "loss": 0.3847, "step": 28951 }, { "epoch": 0.8873360304033345, "grad_norm": 1.366525848014388, "learning_rate": 3.2919002965662915e-07, "loss": 0.5607, "step": 28952 }, { "epoch": 0.8873666789260758, "grad_norm": 1.5379456462816137, "learning_rate": 3.2901294203265046e-07, "loss": 0.5544, "step": 28953 }, { "epoch": 0.8873973274488169, "grad_norm": 1.3108592186623795, "learning_rate": 3.288359004331021e-07, "loss": 0.5243, "step": 28954 }, { "epoch": 0.8874279759715582, "grad_norm": 1.1908058965107478, "learning_rate": 3.2865890485972995e-07, "loss": 0.5012, "step": 28955 }, { "epoch": 0.8874586244942994, "grad_norm": 1.4567888961018682, "learning_rate": 3.2848195531427594e-07, "loss": 0.6136, "step": 28956 }, { "epoch": 0.8874892730170406, "grad_norm": 0.47502908843942504, "learning_rate": 3.2830505179848425e-07, "loss": 0.4138, "step": 28957 }, { "epoch": 0.8875199215397818, "grad_norm": 1.4459015573586884, "learning_rate": 3.281281943140985e-07, "loss": 0.5917, "step": 28958 }, { "epoch": 0.887550570062523, "grad_norm": 1.4890768655252078, "learning_rate": 3.279513828628611e-07, "loss": 0.5558, "step": 28959 }, { "epoch": 0.8875812185852642, "grad_norm": 1.3813720384281438, "learning_rate": 3.2777461744651516e-07, "loss": 0.4558, "step": 28960 }, { "epoch": 0.8876118671080054, "grad_norm": 1.22206257823126, "learning_rate": 3.2759789806679987e-07, "loss": 0.4061, "step": 28961 }, { "epoch": 0.8876425156307466, "grad_norm": 1.4179847835871078, "learning_rate": 3.2742122472545825e-07, "loss": 0.5226, "step": 28962 }, { "epoch": 0.8876731641534878, "grad_norm": 1.2861050346176872, "learning_rate": 3.272445974242311e-07, "loss": 0.4893, "step": 28963 }, { "epoch": 0.887703812676229, "grad_norm": 1.506985774902551, "learning_rate": 3.2706801616485816e-07, "loss": 0.643, "step": 28964 }, { "epoch": 0.8877344611989703, "grad_norm": 1.1961648956908275, "learning_rate": 3.268914809490797e-07, "loss": 0.6051, "step": 28965 }, { "epoch": 0.8877651097217114, "grad_norm": 1.465296044521024, "learning_rate": 3.267149917786361e-07, "loss": 0.67, "step": 28966 }, { "epoch": 0.8877957582444527, "grad_norm": 1.4894360323568139, "learning_rate": 3.2653854865526414e-07, "loss": 0.5914, "step": 28967 }, { "epoch": 0.8878264067671938, "grad_norm": 0.44325084113970536, "learning_rate": 3.263621515807047e-07, "loss": 0.3868, "step": 28968 }, { "epoch": 0.887857055289935, "grad_norm": 1.2546119127400297, "learning_rate": 3.2618580055669313e-07, "loss": 0.5315, "step": 28969 }, { "epoch": 0.8878877038126762, "grad_norm": 1.5181810774297027, "learning_rate": 3.2600949558497076e-07, "loss": 0.6004, "step": 28970 }, { "epoch": 0.8879183523354174, "grad_norm": 1.2930310895634443, "learning_rate": 3.2583323666727174e-07, "loss": 0.5694, "step": 28971 }, { "epoch": 0.8879490008581586, "grad_norm": 1.2878351140113768, "learning_rate": 3.256570238053336e-07, "loss": 0.5537, "step": 28972 }, { "epoch": 0.8879796493808998, "grad_norm": 1.2911299844642288, "learning_rate": 3.2548085700089273e-07, "loss": 0.6203, "step": 28973 }, { "epoch": 0.888010297903641, "grad_norm": 1.3259950796461382, "learning_rate": 3.2530473625568606e-07, "loss": 0.5516, "step": 28974 }, { "epoch": 0.8880409464263822, "grad_norm": 1.2347931738910425, "learning_rate": 3.251286615714466e-07, "loss": 0.5552, "step": 28975 }, { "epoch": 0.8880715949491235, "grad_norm": 1.400871257354922, "learning_rate": 3.2495263294991084e-07, "loss": 0.4934, "step": 28976 }, { "epoch": 0.8881022434718646, "grad_norm": 1.6461933122394463, "learning_rate": 3.247766503928129e-07, "loss": 0.6989, "step": 28977 }, { "epoch": 0.8881328919946059, "grad_norm": 1.3655973321295753, "learning_rate": 3.246007139018875e-07, "loss": 0.5256, "step": 28978 }, { "epoch": 0.888163540517347, "grad_norm": 1.3860841415846668, "learning_rate": 3.244248234788677e-07, "loss": 0.5729, "step": 28979 }, { "epoch": 0.8881941890400883, "grad_norm": 1.2591955010305786, "learning_rate": 3.242489791254849e-07, "loss": 0.5753, "step": 28980 }, { "epoch": 0.8882248375628294, "grad_norm": 1.40675691594001, "learning_rate": 3.2407318084347494e-07, "loss": 0.5716, "step": 28981 }, { "epoch": 0.8882554860855707, "grad_norm": 1.2379506298344338, "learning_rate": 3.238974286345681e-07, "loss": 0.6138, "step": 28982 }, { "epoch": 0.8882861346083119, "grad_norm": 1.3743509016714957, "learning_rate": 3.2372172250049513e-07, "loss": 0.5338, "step": 28983 }, { "epoch": 0.8883167831310531, "grad_norm": 1.5999971288859183, "learning_rate": 3.2354606244298925e-07, "loss": 0.6091, "step": 28984 }, { "epoch": 0.8883474316537943, "grad_norm": 1.4260495923900565, "learning_rate": 3.233704484637801e-07, "loss": 0.6191, "step": 28985 }, { "epoch": 0.8883780801765355, "grad_norm": 1.5968718803504018, "learning_rate": 3.23194880564599e-07, "loss": 0.65, "step": 28986 }, { "epoch": 0.8884087286992767, "grad_norm": 1.3017684599607422, "learning_rate": 3.2301935874717527e-07, "loss": 0.6007, "step": 28987 }, { "epoch": 0.8884393772220179, "grad_norm": 1.4161810793727403, "learning_rate": 3.22843883013238e-07, "loss": 0.6174, "step": 28988 }, { "epoch": 0.8884700257447591, "grad_norm": 0.46356710163771386, "learning_rate": 3.2266845336451747e-07, "loss": 0.3986, "step": 28989 }, { "epoch": 0.8885006742675003, "grad_norm": 1.3208737424553103, "learning_rate": 3.224930698027412e-07, "loss": 0.4673, "step": 28990 }, { "epoch": 0.8885313227902415, "grad_norm": 1.2705072175263443, "learning_rate": 3.223177323296367e-07, "loss": 0.6075, "step": 28991 }, { "epoch": 0.8885619713129828, "grad_norm": 1.4522184724794163, "learning_rate": 3.2214244094693313e-07, "loss": 0.6073, "step": 28992 }, { "epoch": 0.8885926198357239, "grad_norm": 1.4015410094228755, "learning_rate": 3.2196719565635747e-07, "loss": 0.57, "step": 28993 }, { "epoch": 0.8886232683584652, "grad_norm": 1.4522850186450873, "learning_rate": 3.21791996459635e-07, "loss": 0.6613, "step": 28994 }, { "epoch": 0.8886539168812063, "grad_norm": 1.3670702828888872, "learning_rate": 3.2161684335849317e-07, "loss": 0.5555, "step": 28995 }, { "epoch": 0.8886845654039476, "grad_norm": 1.3563408407458342, "learning_rate": 3.2144173635465735e-07, "loss": 0.6236, "step": 28996 }, { "epoch": 0.8887152139266887, "grad_norm": 1.296493626635086, "learning_rate": 3.2126667544985393e-07, "loss": 0.547, "step": 28997 }, { "epoch": 0.88874586244943, "grad_norm": 1.3341048813606515, "learning_rate": 3.210916606458064e-07, "loss": 0.5553, "step": 28998 }, { "epoch": 0.8887765109721711, "grad_norm": 1.2628633725023612, "learning_rate": 3.2091669194424025e-07, "loss": 0.6189, "step": 28999 }, { "epoch": 0.8888071594949123, "grad_norm": 1.4138520007907995, "learning_rate": 3.207417693468795e-07, "loss": 0.5868, "step": 29000 }, { "epoch": 0.8888378080176536, "grad_norm": 1.4578869576452596, "learning_rate": 3.205668928554473e-07, "loss": 0.6335, "step": 29001 }, { "epoch": 0.8888684565403947, "grad_norm": 1.3858191771527777, "learning_rate": 3.203920624716661e-07, "loss": 0.555, "step": 29002 }, { "epoch": 0.888899105063136, "grad_norm": 1.3016924332618451, "learning_rate": 3.20217278197259e-07, "loss": 0.5958, "step": 29003 }, { "epoch": 0.8889297535858771, "grad_norm": 1.5470899492428876, "learning_rate": 3.200425400339485e-07, "loss": 0.6514, "step": 29004 }, { "epoch": 0.8889604021086184, "grad_norm": 1.3882935847256848, "learning_rate": 3.198678479834572e-07, "loss": 0.5593, "step": 29005 }, { "epoch": 0.8889910506313595, "grad_norm": 1.2610065584005026, "learning_rate": 3.1969320204750467e-07, "loss": 0.5333, "step": 29006 }, { "epoch": 0.8890216991541008, "grad_norm": 0.4612514445361171, "learning_rate": 3.1951860222781296e-07, "loss": 0.39, "step": 29007 }, { "epoch": 0.8890523476768419, "grad_norm": 1.2329945642927131, "learning_rate": 3.1934404852610235e-07, "loss": 0.5314, "step": 29008 }, { "epoch": 0.8890829961995832, "grad_norm": 1.3302644814801272, "learning_rate": 3.191695409440915e-07, "loss": 0.6468, "step": 29009 }, { "epoch": 0.8891136447223243, "grad_norm": 1.53496801431706, "learning_rate": 3.1899507948350115e-07, "loss": 0.6333, "step": 29010 }, { "epoch": 0.8891442932450656, "grad_norm": 1.438871938305732, "learning_rate": 3.1882066414605063e-07, "loss": 0.6576, "step": 29011 }, { "epoch": 0.8891749417678068, "grad_norm": 1.5048327867359026, "learning_rate": 3.186462949334568e-07, "loss": 0.6531, "step": 29012 }, { "epoch": 0.889205590290548, "grad_norm": 1.3584078213650574, "learning_rate": 3.1847197184743997e-07, "loss": 0.5882, "step": 29013 }, { "epoch": 0.8892362388132892, "grad_norm": 1.2979744464034733, "learning_rate": 3.182976948897154e-07, "loss": 0.6491, "step": 29014 }, { "epoch": 0.8892668873360304, "grad_norm": 0.4365300417475798, "learning_rate": 3.1812346406200176e-07, "loss": 0.3787, "step": 29015 }, { "epoch": 0.8892975358587716, "grad_norm": 0.45024583607021396, "learning_rate": 3.179492793660166e-07, "loss": 0.3864, "step": 29016 }, { "epoch": 0.8893281843815128, "grad_norm": 1.3423841614124024, "learning_rate": 3.1777514080347404e-07, "loss": 0.6299, "step": 29017 }, { "epoch": 0.889358832904254, "grad_norm": 1.379110883403265, "learning_rate": 3.176010483760911e-07, "loss": 0.5777, "step": 29018 }, { "epoch": 0.8893894814269953, "grad_norm": 1.3025030831107534, "learning_rate": 3.174270020855835e-07, "loss": 0.648, "step": 29019 }, { "epoch": 0.8894201299497364, "grad_norm": 1.337675717587274, "learning_rate": 3.1725300193366615e-07, "loss": 0.5933, "step": 29020 }, { "epoch": 0.8894507784724777, "grad_norm": 1.649892273428425, "learning_rate": 3.1707904792205144e-07, "loss": 0.6054, "step": 29021 }, { "epoch": 0.8894814269952188, "grad_norm": 1.4208704320709107, "learning_rate": 3.1690514005245643e-07, "loss": 0.526, "step": 29022 }, { "epoch": 0.8895120755179601, "grad_norm": 0.474976964671932, "learning_rate": 3.16731278326593e-07, "loss": 0.4126, "step": 29023 }, { "epoch": 0.8895427240407012, "grad_norm": 1.387167476583917, "learning_rate": 3.165574627461748e-07, "loss": 0.6347, "step": 29024 }, { "epoch": 0.8895733725634425, "grad_norm": 1.6812559730337016, "learning_rate": 3.1638369331291386e-07, "loss": 0.6393, "step": 29025 }, { "epoch": 0.8896040210861836, "grad_norm": 1.3610282761163417, "learning_rate": 3.162099700285226e-07, "loss": 0.5479, "step": 29026 }, { "epoch": 0.8896346696089249, "grad_norm": 0.4627577445067918, "learning_rate": 3.160362928947136e-07, "loss": 0.3901, "step": 29027 }, { "epoch": 0.889665318131666, "grad_norm": 1.3913712447511246, "learning_rate": 3.158626619131966e-07, "loss": 0.5849, "step": 29028 }, { "epoch": 0.8896959666544073, "grad_norm": 1.499704956309271, "learning_rate": 3.156890770856835e-07, "loss": 0.6641, "step": 29029 }, { "epoch": 0.8897266151771485, "grad_norm": 1.4534667738072904, "learning_rate": 3.1551553841388526e-07, "loss": 0.6446, "step": 29030 }, { "epoch": 0.8897572636998896, "grad_norm": 1.458719063894563, "learning_rate": 3.153420458995099e-07, "loss": 0.592, "step": 29031 }, { "epoch": 0.8897879122226309, "grad_norm": 1.434745854508761, "learning_rate": 3.1516859954426826e-07, "loss": 0.6062, "step": 29032 }, { "epoch": 0.889818560745372, "grad_norm": 1.3785596888654812, "learning_rate": 3.1499519934986956e-07, "loss": 0.6193, "step": 29033 }, { "epoch": 0.8898492092681133, "grad_norm": 1.2414704213331458, "learning_rate": 3.148218453180213e-07, "loss": 0.4984, "step": 29034 }, { "epoch": 0.8898798577908544, "grad_norm": 1.3899577359713753, "learning_rate": 3.1464853745043324e-07, "loss": 0.6583, "step": 29035 }, { "epoch": 0.8899105063135957, "grad_norm": 1.3960477065666312, "learning_rate": 3.1447527574881064e-07, "loss": 0.5543, "step": 29036 }, { "epoch": 0.8899411548363368, "grad_norm": 1.2319284146106153, "learning_rate": 3.143020602148622e-07, "loss": 0.4846, "step": 29037 }, { "epoch": 0.8899718033590781, "grad_norm": 1.4023735075759876, "learning_rate": 3.141288908502954e-07, "loss": 0.6176, "step": 29038 }, { "epoch": 0.8900024518818193, "grad_norm": 1.1419956240049032, "learning_rate": 3.139557676568145e-07, "loss": 0.539, "step": 29039 }, { "epoch": 0.8900331004045605, "grad_norm": 1.3795557596595394, "learning_rate": 3.137826906361263e-07, "loss": 0.6626, "step": 29040 }, { "epoch": 0.8900637489273017, "grad_norm": 1.4872046691606005, "learning_rate": 3.136096597899374e-07, "loss": 0.6806, "step": 29041 }, { "epoch": 0.8900943974500429, "grad_norm": 1.272466737871599, "learning_rate": 3.134366751199508e-07, "loss": 0.5316, "step": 29042 }, { "epoch": 0.8901250459727841, "grad_norm": 1.3750105144544629, "learning_rate": 3.132637366278718e-07, "loss": 0.6525, "step": 29043 }, { "epoch": 0.8901556944955253, "grad_norm": 1.5090257005421712, "learning_rate": 3.130908443154046e-07, "loss": 0.6391, "step": 29044 }, { "epoch": 0.8901863430182665, "grad_norm": 1.5303979932130256, "learning_rate": 3.129179981842523e-07, "loss": 0.6318, "step": 29045 }, { "epoch": 0.8902169915410078, "grad_norm": 1.309651412855223, "learning_rate": 3.1274519823611847e-07, "loss": 0.555, "step": 29046 }, { "epoch": 0.8902476400637489, "grad_norm": 1.3244225541886183, "learning_rate": 3.125724444727052e-07, "loss": 0.5049, "step": 29047 }, { "epoch": 0.8902782885864902, "grad_norm": 1.3171862923853461, "learning_rate": 3.123997368957149e-07, "loss": 0.5717, "step": 29048 }, { "epoch": 0.8903089371092313, "grad_norm": 1.3639496666188873, "learning_rate": 3.122270755068502e-07, "loss": 0.5865, "step": 29049 }, { "epoch": 0.8903395856319726, "grad_norm": 1.3302640903542289, "learning_rate": 3.1205446030781016e-07, "loss": 0.5708, "step": 29050 }, { "epoch": 0.8903702341547137, "grad_norm": 0.43485630872673386, "learning_rate": 3.1188189130029747e-07, "loss": 0.379, "step": 29051 }, { "epoch": 0.890400882677455, "grad_norm": 1.5221375765481162, "learning_rate": 3.1170936848601285e-07, "loss": 0.6459, "step": 29052 }, { "epoch": 0.8904315312001961, "grad_norm": 1.5993989184493047, "learning_rate": 3.1153689186665446e-07, "loss": 0.656, "step": 29053 }, { "epoch": 0.8904621797229374, "grad_norm": 5.80275540674914, "learning_rate": 3.1136446144392376e-07, "loss": 0.6704, "step": 29054 }, { "epoch": 0.8904928282456785, "grad_norm": 1.3279201503737443, "learning_rate": 3.1119207721951704e-07, "loss": 0.6508, "step": 29055 }, { "epoch": 0.8905234767684198, "grad_norm": 0.4352752838380178, "learning_rate": 3.1101973919513526e-07, "loss": 0.3993, "step": 29056 }, { "epoch": 0.890554125291161, "grad_norm": 1.3478054994561397, "learning_rate": 3.108474473724765e-07, "loss": 0.633, "step": 29057 }, { "epoch": 0.8905847738139022, "grad_norm": 1.61208715941617, "learning_rate": 3.1067520175323605e-07, "loss": 0.6169, "step": 29058 }, { "epoch": 0.8906154223366434, "grad_norm": 0.46030548516152053, "learning_rate": 3.105030023391137e-07, "loss": 0.4013, "step": 29059 }, { "epoch": 0.8906460708593846, "grad_norm": 1.3105130774597988, "learning_rate": 3.103308491318052e-07, "loss": 0.5475, "step": 29060 }, { "epoch": 0.8906767193821258, "grad_norm": 1.500419513565914, "learning_rate": 3.10158742133006e-07, "loss": 0.6362, "step": 29061 }, { "epoch": 0.8907073679048669, "grad_norm": 1.2576933081336987, "learning_rate": 3.0998668134441304e-07, "loss": 0.5566, "step": 29062 }, { "epoch": 0.8907380164276082, "grad_norm": 1.2610837079356312, "learning_rate": 3.098146667677215e-07, "loss": 0.6166, "step": 29063 }, { "epoch": 0.8907686649503493, "grad_norm": 1.4634724395633971, "learning_rate": 3.096426984046258e-07, "loss": 0.6906, "step": 29064 }, { "epoch": 0.8907993134730906, "grad_norm": 0.46732270250005864, "learning_rate": 3.0947077625682165e-07, "loss": 0.3934, "step": 29065 }, { "epoch": 0.8908299619958318, "grad_norm": 1.4719079072923975, "learning_rate": 3.09298900326e-07, "loss": 0.5158, "step": 29066 }, { "epoch": 0.890860610518573, "grad_norm": 1.39174834309364, "learning_rate": 3.0912707061385825e-07, "loss": 0.6216, "step": 29067 }, { "epoch": 0.8908912590413142, "grad_norm": 1.3019215305390504, "learning_rate": 3.0895528712208745e-07, "loss": 0.6337, "step": 29068 }, { "epoch": 0.8909219075640554, "grad_norm": 1.3004604354680322, "learning_rate": 3.0878354985238e-07, "loss": 0.5954, "step": 29069 }, { "epoch": 0.8909525560867966, "grad_norm": 1.1474471034518077, "learning_rate": 3.0861185880642854e-07, "loss": 0.5676, "step": 29070 }, { "epoch": 0.8909832046095378, "grad_norm": 0.4781193407359288, "learning_rate": 3.084402139859249e-07, "loss": 0.4036, "step": 29071 }, { "epoch": 0.891013853132279, "grad_norm": 1.2836890015141686, "learning_rate": 3.082686153925601e-07, "loss": 0.5556, "step": 29072 }, { "epoch": 0.8910445016550202, "grad_norm": 1.3864469107119117, "learning_rate": 3.08097063028025e-07, "loss": 0.7279, "step": 29073 }, { "epoch": 0.8910751501777614, "grad_norm": 1.2869814321863262, "learning_rate": 3.0792555689401093e-07, "loss": 0.6893, "step": 29074 }, { "epoch": 0.8911057987005027, "grad_norm": 0.44535547507100876, "learning_rate": 3.0775409699220547e-07, "loss": 0.3844, "step": 29075 }, { "epoch": 0.8911364472232438, "grad_norm": 0.4492630611241596, "learning_rate": 3.0758268332430064e-07, "loss": 0.3948, "step": 29076 }, { "epoch": 0.8911670957459851, "grad_norm": 1.33411232536703, "learning_rate": 3.074113158919828e-07, "loss": 0.5859, "step": 29077 }, { "epoch": 0.8911977442687262, "grad_norm": 1.2583042614023652, "learning_rate": 3.0723999469694344e-07, "loss": 0.602, "step": 29078 }, { "epoch": 0.8912283927914675, "grad_norm": 1.3144106609787645, "learning_rate": 3.0706871974086893e-07, "loss": 0.7055, "step": 29079 }, { "epoch": 0.8912590413142086, "grad_norm": 1.33994673035336, "learning_rate": 3.068974910254463e-07, "loss": 0.6588, "step": 29080 }, { "epoch": 0.8912896898369499, "grad_norm": 1.4009856227716502, "learning_rate": 3.0672630855236363e-07, "loss": 0.6541, "step": 29081 }, { "epoch": 0.891320338359691, "grad_norm": 1.3335123581202892, "learning_rate": 3.065551723233079e-07, "loss": 0.6475, "step": 29082 }, { "epoch": 0.8913509868824323, "grad_norm": 1.4472927766725794, "learning_rate": 3.063840823399644e-07, "loss": 0.6343, "step": 29083 }, { "epoch": 0.8913816354051735, "grad_norm": 1.353969538304538, "learning_rate": 3.062130386040196e-07, "loss": 0.6087, "step": 29084 }, { "epoch": 0.8914122839279147, "grad_norm": 0.4345881481028755, "learning_rate": 3.060420411171583e-07, "loss": 0.3892, "step": 29085 }, { "epoch": 0.8914429324506559, "grad_norm": 1.2653637663338453, "learning_rate": 3.0587108988106684e-07, "loss": 0.5649, "step": 29086 }, { "epoch": 0.8914735809733971, "grad_norm": 1.2801990591168073, "learning_rate": 3.0570018489742836e-07, "loss": 0.5926, "step": 29087 }, { "epoch": 0.8915042294961383, "grad_norm": 1.2650398142645127, "learning_rate": 3.05529326167926e-07, "loss": 0.6154, "step": 29088 }, { "epoch": 0.8915348780188795, "grad_norm": 0.4325562179153304, "learning_rate": 3.053585136942455e-07, "loss": 0.3871, "step": 29089 }, { "epoch": 0.8915655265416207, "grad_norm": 1.2438115463719228, "learning_rate": 3.0518774747806844e-07, "loss": 0.6153, "step": 29090 }, { "epoch": 0.891596175064362, "grad_norm": 1.20920379601619, "learning_rate": 3.0501702752107733e-07, "loss": 0.539, "step": 29091 }, { "epoch": 0.8916268235871031, "grad_norm": 1.3001920931068105, "learning_rate": 3.0484635382495465e-07, "loss": 0.5828, "step": 29092 }, { "epoch": 0.8916574721098443, "grad_norm": 1.4927578233515362, "learning_rate": 3.0467572639138243e-07, "loss": 0.5689, "step": 29093 }, { "epoch": 0.8916881206325855, "grad_norm": 1.4334661811478608, "learning_rate": 3.045051452220421e-07, "loss": 0.5336, "step": 29094 }, { "epoch": 0.8917187691553267, "grad_norm": 1.2075865124566412, "learning_rate": 3.043346103186129e-07, "loss": 0.5342, "step": 29095 }, { "epoch": 0.8917494176780679, "grad_norm": 1.4303415043886467, "learning_rate": 3.0416412168277675e-07, "loss": 0.5107, "step": 29096 }, { "epoch": 0.8917800662008091, "grad_norm": 1.2952612257258116, "learning_rate": 3.039936793162135e-07, "loss": 0.5741, "step": 29097 }, { "epoch": 0.8918107147235503, "grad_norm": 1.3665142855594876, "learning_rate": 3.038232832206023e-07, "loss": 0.5692, "step": 29098 }, { "epoch": 0.8918413632462915, "grad_norm": 1.4027181570962686, "learning_rate": 3.036529333976207e-07, "loss": 0.5885, "step": 29099 }, { "epoch": 0.8918720117690327, "grad_norm": 1.4169945459454487, "learning_rate": 3.034826298489485e-07, "loss": 0.6127, "step": 29100 }, { "epoch": 0.8919026602917739, "grad_norm": 1.204601501571472, "learning_rate": 3.033123725762643e-07, "loss": 0.5584, "step": 29101 }, { "epoch": 0.8919333088145152, "grad_norm": 1.3223121183046616, "learning_rate": 3.0314216158124465e-07, "loss": 0.5552, "step": 29102 }, { "epoch": 0.8919639573372563, "grad_norm": 1.2795097391303867, "learning_rate": 3.0297199686556646e-07, "loss": 0.6555, "step": 29103 }, { "epoch": 0.8919946058599976, "grad_norm": 1.4395206907885685, "learning_rate": 3.0280187843090723e-07, "loss": 0.6721, "step": 29104 }, { "epoch": 0.8920252543827387, "grad_norm": 1.24637091539211, "learning_rate": 3.026318062789441e-07, "loss": 0.6083, "step": 29105 }, { "epoch": 0.89205590290548, "grad_norm": 0.4304840898950167, "learning_rate": 3.024617804113511e-07, "loss": 0.3737, "step": 29106 }, { "epoch": 0.8920865514282211, "grad_norm": 1.344057754778192, "learning_rate": 3.022918008298026e-07, "loss": 0.7012, "step": 29107 }, { "epoch": 0.8921171999509624, "grad_norm": 1.53196217832028, "learning_rate": 3.021218675359766e-07, "loss": 0.6716, "step": 29108 }, { "epoch": 0.8921478484737035, "grad_norm": 1.55121773273509, "learning_rate": 3.0195198053154574e-07, "loss": 0.5746, "step": 29109 }, { "epoch": 0.8921784969964448, "grad_norm": 1.5076829255761117, "learning_rate": 3.017821398181836e-07, "loss": 0.7034, "step": 29110 }, { "epoch": 0.892209145519186, "grad_norm": 1.2578461849015328, "learning_rate": 3.016123453975639e-07, "loss": 0.6146, "step": 29111 }, { "epoch": 0.8922397940419272, "grad_norm": 1.433324441476362, "learning_rate": 3.0144259727135974e-07, "loss": 0.6595, "step": 29112 }, { "epoch": 0.8922704425646684, "grad_norm": 0.4568217726682204, "learning_rate": 3.0127289544124473e-07, "loss": 0.3911, "step": 29113 }, { "epoch": 0.8923010910874096, "grad_norm": 0.4380971722488822, "learning_rate": 3.0110323990888924e-07, "loss": 0.4017, "step": 29114 }, { "epoch": 0.8923317396101508, "grad_norm": 1.455352934690407, "learning_rate": 3.0093363067596635e-07, "loss": 0.5681, "step": 29115 }, { "epoch": 0.892362388132892, "grad_norm": 1.476457063606853, "learning_rate": 3.00764067744147e-07, "loss": 0.7203, "step": 29116 }, { "epoch": 0.8923930366556332, "grad_norm": 1.4472102100146682, "learning_rate": 3.005945511151015e-07, "loss": 0.6257, "step": 29117 }, { "epoch": 0.8924236851783744, "grad_norm": 1.5241949019722567, "learning_rate": 3.0042508079049905e-07, "loss": 0.5817, "step": 29118 }, { "epoch": 0.8924543337011156, "grad_norm": 1.2627957478886163, "learning_rate": 3.002556567720122e-07, "loss": 0.6334, "step": 29119 }, { "epoch": 0.8924849822238569, "grad_norm": 1.3703214308828384, "learning_rate": 3.0008627906130796e-07, "loss": 0.4558, "step": 29120 }, { "epoch": 0.892515630746598, "grad_norm": 1.248796361404809, "learning_rate": 2.999169476600572e-07, "loss": 0.6162, "step": 29121 }, { "epoch": 0.8925462792693393, "grad_norm": 1.4291464101923592, "learning_rate": 2.997476625699258e-07, "loss": 0.6142, "step": 29122 }, { "epoch": 0.8925769277920804, "grad_norm": 0.41956831404383554, "learning_rate": 2.9957842379258417e-07, "loss": 0.374, "step": 29123 }, { "epoch": 0.8926075763148216, "grad_norm": 1.4800878617806004, "learning_rate": 2.9940923132969923e-07, "loss": 0.511, "step": 29124 }, { "epoch": 0.8926382248375628, "grad_norm": 1.4009628498535525, "learning_rate": 2.992400851829375e-07, "loss": 0.5602, "step": 29125 }, { "epoch": 0.892668873360304, "grad_norm": 1.3755795050742095, "learning_rate": 2.990709853539653e-07, "loss": 0.5298, "step": 29126 }, { "epoch": 0.8926995218830452, "grad_norm": 1.346359232410224, "learning_rate": 2.9890193184445085e-07, "loss": 0.5072, "step": 29127 }, { "epoch": 0.8927301704057864, "grad_norm": 1.3003317055515624, "learning_rate": 2.987329246560583e-07, "loss": 0.4663, "step": 29128 }, { "epoch": 0.8927608189285277, "grad_norm": 1.23956561846754, "learning_rate": 2.985639637904514e-07, "loss": 0.5176, "step": 29129 }, { "epoch": 0.8927914674512688, "grad_norm": 1.4336720312194422, "learning_rate": 2.9839504924929875e-07, "loss": 0.7445, "step": 29130 }, { "epoch": 0.8928221159740101, "grad_norm": 1.4126430108539423, "learning_rate": 2.9822618103426127e-07, "loss": 0.6617, "step": 29131 }, { "epoch": 0.8928527644967512, "grad_norm": 1.2302871114967113, "learning_rate": 2.980573591470054e-07, "loss": 0.54, "step": 29132 }, { "epoch": 0.8928834130194925, "grad_norm": 1.496310327357534, "learning_rate": 2.97888583589192e-07, "loss": 0.6398, "step": 29133 }, { "epoch": 0.8929140615422336, "grad_norm": 1.5053948388913228, "learning_rate": 2.9771985436248594e-07, "loss": 0.6486, "step": 29134 }, { "epoch": 0.8929447100649749, "grad_norm": 1.3445330687390624, "learning_rate": 2.975511714685503e-07, "loss": 0.6019, "step": 29135 }, { "epoch": 0.892975358587716, "grad_norm": 1.2901310057256887, "learning_rate": 2.9738253490904477e-07, "loss": 0.5025, "step": 29136 }, { "epoch": 0.8930060071104573, "grad_norm": 1.3392658384765863, "learning_rate": 2.9721394468563316e-07, "loss": 0.5864, "step": 29137 }, { "epoch": 0.8930366556331985, "grad_norm": 1.252643294916606, "learning_rate": 2.970454007999757e-07, "loss": 0.6218, "step": 29138 }, { "epoch": 0.8930673041559397, "grad_norm": 1.3895941325861385, "learning_rate": 2.9687690325373273e-07, "loss": 0.6453, "step": 29139 }, { "epoch": 0.8930979526786809, "grad_norm": 1.4463942875411135, "learning_rate": 2.9670845204856523e-07, "loss": 0.6557, "step": 29140 }, { "epoch": 0.8931286012014221, "grad_norm": 1.5273629382741734, "learning_rate": 2.9654004718613347e-07, "loss": 0.5169, "step": 29141 }, { "epoch": 0.8931592497241633, "grad_norm": 1.3275854834644252, "learning_rate": 2.9637168866809505e-07, "loss": 0.6081, "step": 29142 }, { "epoch": 0.8931898982469045, "grad_norm": 0.43968227331511434, "learning_rate": 2.962033764961109e-07, "loss": 0.4009, "step": 29143 }, { "epoch": 0.8932205467696457, "grad_norm": 1.3567489714520713, "learning_rate": 2.960351106718373e-07, "loss": 0.6104, "step": 29144 }, { "epoch": 0.893251195292387, "grad_norm": 0.44626425172011563, "learning_rate": 2.958668911969337e-07, "loss": 0.3881, "step": 29145 }, { "epoch": 0.8932818438151281, "grad_norm": 1.4935257133416628, "learning_rate": 2.9569871807305806e-07, "loss": 0.6028, "step": 29146 }, { "epoch": 0.8933124923378694, "grad_norm": 1.2636057175841784, "learning_rate": 2.9553059130186526e-07, "loss": 0.4732, "step": 29147 }, { "epoch": 0.8933431408606105, "grad_norm": 1.5709437687388084, "learning_rate": 2.9536251088501387e-07, "loss": 0.6677, "step": 29148 }, { "epoch": 0.8933737893833518, "grad_norm": 0.4748433917923891, "learning_rate": 2.951944768241594e-07, "loss": 0.3919, "step": 29149 }, { "epoch": 0.8934044379060929, "grad_norm": 1.4434421309426115, "learning_rate": 2.9502648912095756e-07, "loss": 0.6497, "step": 29150 }, { "epoch": 0.8934350864288342, "grad_norm": 1.3534239354622273, "learning_rate": 2.948585477770638e-07, "loss": 0.6445, "step": 29151 }, { "epoch": 0.8934657349515753, "grad_norm": 1.2151494715091191, "learning_rate": 2.946906527941318e-07, "loss": 0.563, "step": 29152 }, { "epoch": 0.8934963834743166, "grad_norm": 1.4567824326708603, "learning_rate": 2.945228041738174e-07, "loss": 0.6375, "step": 29153 }, { "epoch": 0.8935270319970577, "grad_norm": 1.338494536705453, "learning_rate": 2.9435500191777377e-07, "loss": 0.6095, "step": 29154 }, { "epoch": 0.8935576805197989, "grad_norm": 1.475486377152838, "learning_rate": 2.94187246027654e-07, "loss": 0.6224, "step": 29155 }, { "epoch": 0.8935883290425402, "grad_norm": 1.3311956185153246, "learning_rate": 2.9401953650511073e-07, "loss": 0.6133, "step": 29156 }, { "epoch": 0.8936189775652813, "grad_norm": 1.3181827535760453, "learning_rate": 2.9385187335179864e-07, "loss": 0.5389, "step": 29157 }, { "epoch": 0.8936496260880226, "grad_norm": 1.3378153673871, "learning_rate": 2.936842565693665e-07, "loss": 0.6041, "step": 29158 }, { "epoch": 0.8936802746107637, "grad_norm": 1.3705853588789423, "learning_rate": 2.935166861594685e-07, "loss": 0.5921, "step": 29159 }, { "epoch": 0.893710923133505, "grad_norm": 0.4294201035209149, "learning_rate": 2.9334916212375495e-07, "loss": 0.3727, "step": 29160 }, { "epoch": 0.8937415716562461, "grad_norm": 1.3475074019267805, "learning_rate": 2.9318168446387574e-07, "loss": 0.6005, "step": 29161 }, { "epoch": 0.8937722201789874, "grad_norm": 1.3461461782162503, "learning_rate": 2.9301425318148223e-07, "loss": 0.5829, "step": 29162 }, { "epoch": 0.8938028687017285, "grad_norm": 1.417324621389606, "learning_rate": 2.9284686827822316e-07, "loss": 0.5773, "step": 29163 }, { "epoch": 0.8938335172244698, "grad_norm": 0.4556780466254348, "learning_rate": 2.926795297557483e-07, "loss": 0.3918, "step": 29164 }, { "epoch": 0.893864165747211, "grad_norm": 1.487380779897631, "learning_rate": 2.925122376157069e-07, "loss": 0.6434, "step": 29165 }, { "epoch": 0.8938948142699522, "grad_norm": 1.4496261994203568, "learning_rate": 2.9234499185974594e-07, "loss": 0.6478, "step": 29166 }, { "epoch": 0.8939254627926934, "grad_norm": 1.4497265164410393, "learning_rate": 2.9217779248951474e-07, "loss": 0.562, "step": 29167 }, { "epoch": 0.8939561113154346, "grad_norm": 1.3215130686876855, "learning_rate": 2.920106395066613e-07, "loss": 0.4903, "step": 29168 }, { "epoch": 0.8939867598381758, "grad_norm": 0.44822356054535845, "learning_rate": 2.918435329128305e-07, "loss": 0.3813, "step": 29169 }, { "epoch": 0.894017408360917, "grad_norm": 1.2541448727741058, "learning_rate": 2.916764727096699e-07, "loss": 0.5581, "step": 29170 }, { "epoch": 0.8940480568836582, "grad_norm": 1.400702431820714, "learning_rate": 2.915094588988265e-07, "loss": 0.6734, "step": 29171 }, { "epoch": 0.8940787054063994, "grad_norm": 1.3764180987087162, "learning_rate": 2.913424914819446e-07, "loss": 0.5134, "step": 29172 }, { "epoch": 0.8941093539291406, "grad_norm": 1.2933438220899152, "learning_rate": 2.911755704606706e-07, "loss": 0.5554, "step": 29173 }, { "epoch": 0.8941400024518819, "grad_norm": 1.327876668414631, "learning_rate": 2.9100869583664757e-07, "loss": 0.6821, "step": 29174 }, { "epoch": 0.894170650974623, "grad_norm": 1.2448282390475425, "learning_rate": 2.9084186761152044e-07, "loss": 0.6219, "step": 29175 }, { "epoch": 0.8942012994973643, "grad_norm": 1.2691091827504442, "learning_rate": 2.906750857869345e-07, "loss": 0.5127, "step": 29176 }, { "epoch": 0.8942319480201054, "grad_norm": 1.1780595954424926, "learning_rate": 2.905083503645312e-07, "loss": 0.5638, "step": 29177 }, { "epoch": 0.8942625965428467, "grad_norm": 1.546480294088202, "learning_rate": 2.9034166134595365e-07, "loss": 0.6338, "step": 29178 }, { "epoch": 0.8942932450655878, "grad_norm": 0.44724424445329247, "learning_rate": 2.901750187328456e-07, "loss": 0.3893, "step": 29179 }, { "epoch": 0.8943238935883291, "grad_norm": 1.3596434330427853, "learning_rate": 2.900084225268474e-07, "loss": 0.5919, "step": 29180 }, { "epoch": 0.8943545421110702, "grad_norm": 1.3669495443333832, "learning_rate": 2.8984187272960154e-07, "loss": 0.5682, "step": 29181 }, { "epoch": 0.8943851906338115, "grad_norm": 2.0395272815491983, "learning_rate": 2.896753693427495e-07, "loss": 0.5945, "step": 29182 }, { "epoch": 0.8944158391565527, "grad_norm": 1.2692216655164739, "learning_rate": 2.8950891236793065e-07, "loss": 0.634, "step": 29183 }, { "epoch": 0.8944464876792939, "grad_norm": 1.400137460439918, "learning_rate": 2.893425018067864e-07, "loss": 0.6388, "step": 29184 }, { "epoch": 0.8944771362020351, "grad_norm": 1.4181337469882118, "learning_rate": 2.891761376609542e-07, "loss": 0.6077, "step": 29185 }, { "epoch": 0.8945077847247762, "grad_norm": 1.5139841749223244, "learning_rate": 2.890098199320762e-07, "loss": 0.5829, "step": 29186 }, { "epoch": 0.8945384332475175, "grad_norm": 1.2073504020053707, "learning_rate": 2.888435486217894e-07, "loss": 0.6137, "step": 29187 }, { "epoch": 0.8945690817702586, "grad_norm": 1.2578746638916998, "learning_rate": 2.8867732373173254e-07, "loss": 0.5433, "step": 29188 }, { "epoch": 0.8945997302929999, "grad_norm": 1.4583659021763586, "learning_rate": 2.885111452635431e-07, "loss": 0.5797, "step": 29189 }, { "epoch": 0.894630378815741, "grad_norm": 1.3402196641684756, "learning_rate": 2.883450132188598e-07, "loss": 0.5024, "step": 29190 }, { "epoch": 0.8946610273384823, "grad_norm": 1.3475989960067223, "learning_rate": 2.881789275993174e-07, "loss": 0.5644, "step": 29191 }, { "epoch": 0.8946916758612234, "grad_norm": 1.4375420785290325, "learning_rate": 2.8801288840655415e-07, "loss": 0.595, "step": 29192 }, { "epoch": 0.8947223243839647, "grad_norm": 1.2465035425995572, "learning_rate": 2.878468956422054e-07, "loss": 0.5111, "step": 29193 }, { "epoch": 0.8947529729067059, "grad_norm": 1.3604173103494053, "learning_rate": 2.876809493079075e-07, "loss": 0.5939, "step": 29194 }, { "epoch": 0.8947836214294471, "grad_norm": 1.345006783930981, "learning_rate": 2.8751504940529474e-07, "loss": 0.5819, "step": 29195 }, { "epoch": 0.8948142699521883, "grad_norm": 1.3012978003673437, "learning_rate": 2.873491959360014e-07, "loss": 0.5714, "step": 29196 }, { "epoch": 0.8948449184749295, "grad_norm": 1.3890136951115406, "learning_rate": 2.8718338890166177e-07, "loss": 0.5908, "step": 29197 }, { "epoch": 0.8948755669976707, "grad_norm": 1.2084175032694033, "learning_rate": 2.8701762830391167e-07, "loss": 0.6026, "step": 29198 }, { "epoch": 0.8949062155204119, "grad_norm": 1.2960279476704473, "learning_rate": 2.8685191414438096e-07, "loss": 0.6196, "step": 29199 }, { "epoch": 0.8949368640431531, "grad_norm": 1.4957685936261294, "learning_rate": 2.86686246424705e-07, "loss": 0.6126, "step": 29200 }, { "epoch": 0.8949675125658944, "grad_norm": 1.3789933061183315, "learning_rate": 2.865206251465158e-07, "loss": 0.5985, "step": 29201 }, { "epoch": 0.8949981610886355, "grad_norm": 1.314504594088433, "learning_rate": 2.863550503114443e-07, "loss": 0.5246, "step": 29202 }, { "epoch": 0.8950288096113768, "grad_norm": 1.5431454844410943, "learning_rate": 2.861895219211236e-07, "loss": 0.7025, "step": 29203 }, { "epoch": 0.8950594581341179, "grad_norm": 1.1542935749803727, "learning_rate": 2.860240399771813e-07, "loss": 0.4876, "step": 29204 }, { "epoch": 0.8950901066568592, "grad_norm": 1.4408874921277421, "learning_rate": 2.8585860448125226e-07, "loss": 0.5384, "step": 29205 }, { "epoch": 0.8951207551796003, "grad_norm": 1.3077042228880205, "learning_rate": 2.856932154349645e-07, "loss": 0.5251, "step": 29206 }, { "epoch": 0.8951514037023416, "grad_norm": 1.3117402481614036, "learning_rate": 2.855278728399469e-07, "loss": 0.5497, "step": 29207 }, { "epoch": 0.8951820522250827, "grad_norm": 1.3403953932922756, "learning_rate": 2.853625766978296e-07, "loss": 0.6854, "step": 29208 }, { "epoch": 0.895212700747824, "grad_norm": 1.5393262126366627, "learning_rate": 2.8519732701024204e-07, "loss": 0.6571, "step": 29209 }, { "epoch": 0.8952433492705651, "grad_norm": 1.2829891671483278, "learning_rate": 2.850321237788101e-07, "loss": 0.6229, "step": 29210 }, { "epoch": 0.8952739977933064, "grad_norm": 1.3959229415555832, "learning_rate": 2.8486696700516345e-07, "loss": 0.541, "step": 29211 }, { "epoch": 0.8953046463160476, "grad_norm": 1.9028585232558084, "learning_rate": 2.8470185669092934e-07, "loss": 0.5006, "step": 29212 }, { "epoch": 0.8953352948387888, "grad_norm": 1.335200832221395, "learning_rate": 2.8453679283773463e-07, "loss": 0.6206, "step": 29213 }, { "epoch": 0.89536594336153, "grad_norm": 1.3915877903228253, "learning_rate": 2.8437177544720526e-07, "loss": 0.5505, "step": 29214 }, { "epoch": 0.8953965918842712, "grad_norm": 1.2266272708773316, "learning_rate": 2.8420680452096616e-07, "loss": 0.6224, "step": 29215 }, { "epoch": 0.8954272404070124, "grad_norm": 0.4544536289803248, "learning_rate": 2.8404188006064537e-07, "loss": 0.392, "step": 29216 }, { "epoch": 0.8954578889297535, "grad_norm": 1.362216105761007, "learning_rate": 2.838770020678666e-07, "loss": 0.656, "step": 29217 }, { "epoch": 0.8954885374524948, "grad_norm": 1.2797662483724142, "learning_rate": 2.837121705442536e-07, "loss": 0.5525, "step": 29218 }, { "epoch": 0.8955191859752359, "grad_norm": 1.450145288546047, "learning_rate": 2.835473854914311e-07, "loss": 0.6334, "step": 29219 }, { "epoch": 0.8955498344979772, "grad_norm": 1.5458898291538845, "learning_rate": 2.833826469110235e-07, "loss": 0.6459, "step": 29220 }, { "epoch": 0.8955804830207184, "grad_norm": 1.3227961641100086, "learning_rate": 2.832179548046537e-07, "loss": 0.589, "step": 29221 }, { "epoch": 0.8956111315434596, "grad_norm": 1.3109414643382027, "learning_rate": 2.830533091739435e-07, "loss": 0.5515, "step": 29222 }, { "epoch": 0.8956417800662008, "grad_norm": 1.5169440672502166, "learning_rate": 2.828887100205163e-07, "loss": 0.6843, "step": 29223 }, { "epoch": 0.895672428588942, "grad_norm": 0.44837154150093045, "learning_rate": 2.8272415734599435e-07, "loss": 0.4062, "step": 29224 }, { "epoch": 0.8957030771116832, "grad_norm": 1.3385543596514082, "learning_rate": 2.825596511519979e-07, "loss": 0.6778, "step": 29225 }, { "epoch": 0.8957337256344244, "grad_norm": 0.4555226373837888, "learning_rate": 2.823951914401468e-07, "loss": 0.3861, "step": 29226 }, { "epoch": 0.8957643741571656, "grad_norm": 0.46410622651337247, "learning_rate": 2.8223077821206425e-07, "loss": 0.4115, "step": 29227 }, { "epoch": 0.8957950226799068, "grad_norm": 1.4292082266946318, "learning_rate": 2.820664114693694e-07, "loss": 0.5065, "step": 29228 }, { "epoch": 0.895825671202648, "grad_norm": 1.3739927718469387, "learning_rate": 2.8190209121367996e-07, "loss": 0.5998, "step": 29229 }, { "epoch": 0.8958563197253893, "grad_norm": 1.3056429511804137, "learning_rate": 2.8173781744661676e-07, "loss": 0.58, "step": 29230 }, { "epoch": 0.8958869682481304, "grad_norm": 1.4691400314601888, "learning_rate": 2.8157359016979855e-07, "loss": 0.5472, "step": 29231 }, { "epoch": 0.8959176167708717, "grad_norm": 1.168210021586051, "learning_rate": 2.8140940938484296e-07, "loss": 0.5712, "step": 29232 }, { "epoch": 0.8959482652936128, "grad_norm": 1.2464981790287726, "learning_rate": 2.812452750933675e-07, "loss": 0.6013, "step": 29233 }, { "epoch": 0.8959789138163541, "grad_norm": 1.3624073859731505, "learning_rate": 2.810811872969893e-07, "loss": 0.5814, "step": 29234 }, { "epoch": 0.8960095623390952, "grad_norm": 1.1968548746718763, "learning_rate": 2.809171459973264e-07, "loss": 0.5961, "step": 29235 }, { "epoch": 0.8960402108618365, "grad_norm": 0.4297327059399576, "learning_rate": 2.8075315119599487e-07, "loss": 0.386, "step": 29236 }, { "epoch": 0.8960708593845776, "grad_norm": 1.2755316976943625, "learning_rate": 2.805892028946078e-07, "loss": 0.526, "step": 29237 }, { "epoch": 0.8961015079073189, "grad_norm": 1.2216357474357047, "learning_rate": 2.804253010947849e-07, "loss": 0.6067, "step": 29238 }, { "epoch": 0.8961321564300601, "grad_norm": 0.4580968735857316, "learning_rate": 2.8026144579813786e-07, "loss": 0.3986, "step": 29239 }, { "epoch": 0.8961628049528013, "grad_norm": 1.5113405064290184, "learning_rate": 2.80097637006283e-07, "loss": 0.6544, "step": 29240 }, { "epoch": 0.8961934534755425, "grad_norm": 0.4440373618160362, "learning_rate": 2.799338747208336e-07, "loss": 0.4069, "step": 29241 }, { "epoch": 0.8962241019982837, "grad_norm": 1.3452523940804024, "learning_rate": 2.797701589434032e-07, "loss": 0.6354, "step": 29242 }, { "epoch": 0.8962547505210249, "grad_norm": 1.3179621784635163, "learning_rate": 2.796064896756057e-07, "loss": 0.5497, "step": 29243 }, { "epoch": 0.8962853990437661, "grad_norm": 1.3257258796770528, "learning_rate": 2.7944286691905244e-07, "loss": 0.5632, "step": 29244 }, { "epoch": 0.8963160475665073, "grad_norm": 1.3859965732428776, "learning_rate": 2.7927929067535664e-07, "loss": 0.6262, "step": 29245 }, { "epoch": 0.8963466960892486, "grad_norm": 0.4381714780090729, "learning_rate": 2.7911576094613035e-07, "loss": 0.3712, "step": 29246 }, { "epoch": 0.8963773446119897, "grad_norm": 1.3122392824264892, "learning_rate": 2.789522777329839e-07, "loss": 0.5891, "step": 29247 }, { "epoch": 0.8964079931347309, "grad_norm": 1.2246749994307415, "learning_rate": 2.7878884103752944e-07, "loss": 0.5893, "step": 29248 }, { "epoch": 0.8964386416574721, "grad_norm": 1.49979479009067, "learning_rate": 2.786254508613756e-07, "loss": 0.6702, "step": 29249 }, { "epoch": 0.8964692901802133, "grad_norm": 1.2233039950275078, "learning_rate": 2.7846210720613276e-07, "loss": 0.5499, "step": 29250 }, { "epoch": 0.8964999387029545, "grad_norm": 1.3688963869140085, "learning_rate": 2.7829881007341184e-07, "loss": 0.4877, "step": 29251 }, { "epoch": 0.8965305872256957, "grad_norm": 1.490184270035357, "learning_rate": 2.781355594648205e-07, "loss": 0.6982, "step": 29252 }, { "epoch": 0.8965612357484369, "grad_norm": 1.4030727209824536, "learning_rate": 2.779723553819674e-07, "loss": 0.6548, "step": 29253 }, { "epoch": 0.8965918842711781, "grad_norm": 1.2971028351511018, "learning_rate": 2.778091978264613e-07, "loss": 0.616, "step": 29254 }, { "epoch": 0.8966225327939193, "grad_norm": 1.3936985256138879, "learning_rate": 2.776460867999098e-07, "loss": 0.5712, "step": 29255 }, { "epoch": 0.8966531813166605, "grad_norm": 1.400149766405238, "learning_rate": 2.774830223039182e-07, "loss": 0.597, "step": 29256 }, { "epoch": 0.8966838298394018, "grad_norm": 1.490190766812008, "learning_rate": 2.7732000434009586e-07, "loss": 0.7159, "step": 29257 }, { "epoch": 0.8967144783621429, "grad_norm": 1.4966946345781857, "learning_rate": 2.77157032910047e-07, "loss": 0.5945, "step": 29258 }, { "epoch": 0.8967451268848842, "grad_norm": 1.3003457587638916, "learning_rate": 2.769941080153793e-07, "loss": 0.5547, "step": 29259 }, { "epoch": 0.8967757754076253, "grad_norm": 0.43421646194907804, "learning_rate": 2.768312296576964e-07, "loss": 0.3957, "step": 29260 }, { "epoch": 0.8968064239303666, "grad_norm": 1.3717919946717219, "learning_rate": 2.7666839783860424e-07, "loss": 0.5129, "step": 29261 }, { "epoch": 0.8968370724531077, "grad_norm": 1.559201767358689, "learning_rate": 2.765056125597071e-07, "loss": 0.5659, "step": 29262 }, { "epoch": 0.896867720975849, "grad_norm": 0.4413221133085797, "learning_rate": 2.7634287382260816e-07, "loss": 0.3911, "step": 29263 }, { "epoch": 0.8968983694985901, "grad_norm": 1.2597231017297315, "learning_rate": 2.7618018162891116e-07, "loss": 0.5832, "step": 29264 }, { "epoch": 0.8969290180213314, "grad_norm": 1.1863588065848008, "learning_rate": 2.760175359802203e-07, "loss": 0.5981, "step": 29265 }, { "epoch": 0.8969596665440726, "grad_norm": 1.419713206583795, "learning_rate": 2.7585493687813656e-07, "loss": 0.6254, "step": 29266 }, { "epoch": 0.8969903150668138, "grad_norm": 1.2923362178190179, "learning_rate": 2.7569238432426313e-07, "loss": 0.5919, "step": 29267 }, { "epoch": 0.897020963589555, "grad_norm": 1.3685293386614101, "learning_rate": 2.755298783202021e-07, "loss": 0.6441, "step": 29268 }, { "epoch": 0.8970516121122962, "grad_norm": 1.3492118982413677, "learning_rate": 2.7536741886755313e-07, "loss": 0.6204, "step": 29269 }, { "epoch": 0.8970822606350374, "grad_norm": 1.3976215732289585, "learning_rate": 2.75205005967919e-07, "loss": 0.5183, "step": 29270 }, { "epoch": 0.8971129091577786, "grad_norm": 1.2852192950304728, "learning_rate": 2.750426396228978e-07, "loss": 0.6097, "step": 29271 }, { "epoch": 0.8971435576805198, "grad_norm": 1.3565089987915215, "learning_rate": 2.748803198340899e-07, "loss": 0.6205, "step": 29272 }, { "epoch": 0.897174206203261, "grad_norm": 1.4118868982526263, "learning_rate": 2.747180466030963e-07, "loss": 0.6284, "step": 29273 }, { "epoch": 0.8972048547260022, "grad_norm": 1.3521577399798275, "learning_rate": 2.745558199315146e-07, "loss": 0.5156, "step": 29274 }, { "epoch": 0.8972355032487435, "grad_norm": 0.44162336642458094, "learning_rate": 2.7439363982094293e-07, "loss": 0.3704, "step": 29275 }, { "epoch": 0.8972661517714846, "grad_norm": 1.3931001435663677, "learning_rate": 2.7423150627298056e-07, "loss": 0.6198, "step": 29276 }, { "epoch": 0.8972968002942259, "grad_norm": 1.3512102706801368, "learning_rate": 2.740694192892235e-07, "loss": 0.5374, "step": 29277 }, { "epoch": 0.897327448816967, "grad_norm": 1.3911225905634261, "learning_rate": 2.7390737887126986e-07, "loss": 0.5854, "step": 29278 }, { "epoch": 0.8973580973397082, "grad_norm": 1.4477725556130996, "learning_rate": 2.737453850207167e-07, "loss": 0.4526, "step": 29279 }, { "epoch": 0.8973887458624494, "grad_norm": 1.5525585571923177, "learning_rate": 2.7358343773915887e-07, "loss": 0.5622, "step": 29280 }, { "epoch": 0.8974193943851906, "grad_norm": 1.5705462648230306, "learning_rate": 2.7342153702819284e-07, "loss": 0.7025, "step": 29281 }, { "epoch": 0.8974500429079318, "grad_norm": 1.2235669706342107, "learning_rate": 2.732596828894141e-07, "loss": 0.5354, "step": 29282 }, { "epoch": 0.897480691430673, "grad_norm": 1.3814592232533118, "learning_rate": 2.730978753244162e-07, "loss": 0.6054, "step": 29283 }, { "epoch": 0.8975113399534143, "grad_norm": 0.4156185003640789, "learning_rate": 2.7293611433479527e-07, "loss": 0.3812, "step": 29284 }, { "epoch": 0.8975419884761554, "grad_norm": 1.3490002582696816, "learning_rate": 2.7277439992214385e-07, "loss": 0.5753, "step": 29285 }, { "epoch": 0.8975726369988967, "grad_norm": 1.3649491422920532, "learning_rate": 2.726127320880556e-07, "loss": 0.6057, "step": 29286 }, { "epoch": 0.8976032855216378, "grad_norm": 1.2244149054630329, "learning_rate": 2.7245111083412436e-07, "loss": 0.6585, "step": 29287 }, { "epoch": 0.8976339340443791, "grad_norm": 1.3004727796380648, "learning_rate": 2.7228953616194155e-07, "loss": 0.6336, "step": 29288 }, { "epoch": 0.8976645825671202, "grad_norm": 1.7557795969113512, "learning_rate": 2.721280080730992e-07, "loss": 0.6594, "step": 29289 }, { "epoch": 0.8976952310898615, "grad_norm": 1.4244639209118146, "learning_rate": 2.719665265691901e-07, "loss": 0.5132, "step": 29290 }, { "epoch": 0.8977258796126026, "grad_norm": 1.3498367147663473, "learning_rate": 2.7180509165180337e-07, "loss": 0.6581, "step": 29291 }, { "epoch": 0.8977565281353439, "grad_norm": 1.3153081037707803, "learning_rate": 2.716437033225322e-07, "loss": 0.5553, "step": 29292 }, { "epoch": 0.897787176658085, "grad_norm": 1.1502935163447443, "learning_rate": 2.7148236158296427e-07, "loss": 0.4778, "step": 29293 }, { "epoch": 0.8978178251808263, "grad_norm": 1.3339841366598375, "learning_rate": 2.7132106643469103e-07, "loss": 0.5874, "step": 29294 }, { "epoch": 0.8978484737035675, "grad_norm": 1.3446232348258782, "learning_rate": 2.711598178793012e-07, "loss": 0.7064, "step": 29295 }, { "epoch": 0.8978791222263087, "grad_norm": 1.4069863152485804, "learning_rate": 2.709986159183836e-07, "loss": 0.665, "step": 29296 }, { "epoch": 0.8979097707490499, "grad_norm": 1.1863598532915263, "learning_rate": 2.7083746055352635e-07, "loss": 0.6116, "step": 29297 }, { "epoch": 0.8979404192717911, "grad_norm": 1.4049570299319463, "learning_rate": 2.7067635178631814e-07, "loss": 0.5879, "step": 29298 }, { "epoch": 0.8979710677945323, "grad_norm": 1.3381813316587425, "learning_rate": 2.7051528961834493e-07, "loss": 0.6073, "step": 29299 }, { "epoch": 0.8980017163172735, "grad_norm": 0.44227940126672843, "learning_rate": 2.703542740511961e-07, "loss": 0.3807, "step": 29300 }, { "epoch": 0.8980323648400147, "grad_norm": 1.2131198551969955, "learning_rate": 2.7019330508645526e-07, "loss": 0.4966, "step": 29301 }, { "epoch": 0.898063013362756, "grad_norm": 1.3953903116351007, "learning_rate": 2.700323827257106e-07, "loss": 0.6844, "step": 29302 }, { "epoch": 0.8980936618854971, "grad_norm": 1.4286727323649857, "learning_rate": 2.6987150697054764e-07, "loss": 0.6377, "step": 29303 }, { "epoch": 0.8981243104082384, "grad_norm": 0.42601531463676445, "learning_rate": 2.6971067782255e-07, "loss": 0.3596, "step": 29304 }, { "epoch": 0.8981549589309795, "grad_norm": 1.493530931683169, "learning_rate": 2.695498952833037e-07, "loss": 0.5664, "step": 29305 }, { "epoch": 0.8981856074537208, "grad_norm": 0.4452139370627778, "learning_rate": 2.693891593543929e-07, "loss": 0.3777, "step": 29306 }, { "epoch": 0.8982162559764619, "grad_norm": 1.237089583670181, "learning_rate": 2.6922847003740036e-07, "loss": 0.5499, "step": 29307 }, { "epoch": 0.8982469044992032, "grad_norm": 1.141543333199194, "learning_rate": 2.6906782733391036e-07, "loss": 0.4419, "step": 29308 }, { "epoch": 0.8982775530219443, "grad_norm": 0.45986142756888465, "learning_rate": 2.689072312455066e-07, "loss": 0.3822, "step": 29309 }, { "epoch": 0.8983082015446855, "grad_norm": 1.2849084208226913, "learning_rate": 2.687466817737694e-07, "loss": 0.5848, "step": 29310 }, { "epoch": 0.8983388500674268, "grad_norm": 1.476188190133686, "learning_rate": 2.6858617892028203e-07, "loss": 0.587, "step": 29311 }, { "epoch": 0.8983694985901679, "grad_norm": 1.2513065756555275, "learning_rate": 2.6842572268662436e-07, "loss": 0.5073, "step": 29312 }, { "epoch": 0.8984001471129092, "grad_norm": 1.3043307138169586, "learning_rate": 2.6826531307438066e-07, "loss": 0.571, "step": 29313 }, { "epoch": 0.8984307956356503, "grad_norm": 1.2201808343681344, "learning_rate": 2.6810495008512907e-07, "loss": 0.5997, "step": 29314 }, { "epoch": 0.8984614441583916, "grad_norm": 0.45493810653575634, "learning_rate": 2.679446337204494e-07, "loss": 0.3731, "step": 29315 }, { "epoch": 0.8984920926811327, "grad_norm": 1.581228341045992, "learning_rate": 2.6778436398192165e-07, "loss": 0.6365, "step": 29316 }, { "epoch": 0.898522741203874, "grad_norm": 1.3968841998762551, "learning_rate": 2.6762414087112663e-07, "loss": 0.6229, "step": 29317 }, { "epoch": 0.8985533897266151, "grad_norm": 1.2026318711180977, "learning_rate": 2.6746396438964095e-07, "loss": 0.5608, "step": 29318 }, { "epoch": 0.8985840382493564, "grad_norm": 1.3122681429350724, "learning_rate": 2.673038345390433e-07, "loss": 0.5652, "step": 29319 }, { "epoch": 0.8986146867720975, "grad_norm": 1.5054298906138057, "learning_rate": 2.671437513209124e-07, "loss": 0.5409, "step": 29320 }, { "epoch": 0.8986453352948388, "grad_norm": 1.2618136702898237, "learning_rate": 2.669837147368254e-07, "loss": 0.5924, "step": 29321 }, { "epoch": 0.89867598381758, "grad_norm": 1.4290022180835904, "learning_rate": 2.6682372478835925e-07, "loss": 0.6183, "step": 29322 }, { "epoch": 0.8987066323403212, "grad_norm": 1.880853507884293, "learning_rate": 2.666637814770884e-07, "loss": 0.6074, "step": 29323 }, { "epoch": 0.8987372808630624, "grad_norm": 1.3849782430081616, "learning_rate": 2.6650388480459143e-07, "loss": 0.6396, "step": 29324 }, { "epoch": 0.8987679293858036, "grad_norm": 0.4378771095232372, "learning_rate": 2.663440347724433e-07, "loss": 0.3667, "step": 29325 }, { "epoch": 0.8987985779085448, "grad_norm": 1.327806412386514, "learning_rate": 2.661842313822177e-07, "loss": 0.5942, "step": 29326 }, { "epoch": 0.898829226431286, "grad_norm": 1.3236350136285706, "learning_rate": 2.660244746354901e-07, "loss": 0.6787, "step": 29327 }, { "epoch": 0.8988598749540272, "grad_norm": 1.5029384144328397, "learning_rate": 2.6586476453383526e-07, "loss": 0.5866, "step": 29328 }, { "epoch": 0.8988905234767685, "grad_norm": 1.501507931221561, "learning_rate": 2.657051010788253e-07, "loss": 0.6077, "step": 29329 }, { "epoch": 0.8989211719995096, "grad_norm": 1.3959748192167263, "learning_rate": 2.6554548427203455e-07, "loss": 0.6347, "step": 29330 }, { "epoch": 0.8989518205222509, "grad_norm": 1.327835599327513, "learning_rate": 2.65385914115035e-07, "loss": 0.6117, "step": 29331 }, { "epoch": 0.898982469044992, "grad_norm": 0.4556166689912434, "learning_rate": 2.652263906094005e-07, "loss": 0.392, "step": 29332 }, { "epoch": 0.8990131175677333, "grad_norm": 1.5261612287750754, "learning_rate": 2.650669137567019e-07, "loss": 0.562, "step": 29333 }, { "epoch": 0.8990437660904744, "grad_norm": 0.47684313205132134, "learning_rate": 2.6490748355850916e-07, "loss": 0.3754, "step": 29334 }, { "epoch": 0.8990744146132157, "grad_norm": 1.3467078603806788, "learning_rate": 2.6474810001639594e-07, "loss": 0.5079, "step": 29335 }, { "epoch": 0.8991050631359568, "grad_norm": 1.328524585717687, "learning_rate": 2.645887631319311e-07, "loss": 0.5758, "step": 29336 }, { "epoch": 0.8991357116586981, "grad_norm": 1.2666312800345492, "learning_rate": 2.6442947290668374e-07, "loss": 0.55, "step": 29337 }, { "epoch": 0.8991663601814393, "grad_norm": 1.2929229128854236, "learning_rate": 2.6427022934222503e-07, "loss": 0.5977, "step": 29338 }, { "epoch": 0.8991970087041805, "grad_norm": 1.4468594651579771, "learning_rate": 2.64111032440123e-07, "loss": 0.5699, "step": 29339 }, { "epoch": 0.8992276572269217, "grad_norm": 1.2546037885408943, "learning_rate": 2.6395188220194767e-07, "loss": 0.577, "step": 29340 }, { "epoch": 0.8992583057496628, "grad_norm": 1.2826782363022393, "learning_rate": 2.6379277862926546e-07, "loss": 0.6086, "step": 29341 }, { "epoch": 0.8992889542724041, "grad_norm": 1.244473599379953, "learning_rate": 2.6363372172364453e-07, "loss": 0.6316, "step": 29342 }, { "epoch": 0.8993196027951452, "grad_norm": 1.39099732878465, "learning_rate": 2.6347471148665373e-07, "loss": 0.5944, "step": 29343 }, { "epoch": 0.8993502513178865, "grad_norm": 1.2093868590596228, "learning_rate": 2.633157479198578e-07, "loss": 0.553, "step": 29344 }, { "epoch": 0.8993808998406276, "grad_norm": 0.44962627809718797, "learning_rate": 2.631568310248234e-07, "loss": 0.3794, "step": 29345 }, { "epoch": 0.8994115483633689, "grad_norm": 1.3960316201764165, "learning_rate": 2.629979608031169e-07, "loss": 0.6416, "step": 29346 }, { "epoch": 0.89944219688611, "grad_norm": 0.4252383351304036, "learning_rate": 2.6283913725630326e-07, "loss": 0.368, "step": 29347 }, { "epoch": 0.8994728454088513, "grad_norm": 1.2330445170201674, "learning_rate": 2.626803603859479e-07, "loss": 0.5883, "step": 29348 }, { "epoch": 0.8995034939315925, "grad_norm": 1.267308131976305, "learning_rate": 2.625216301936151e-07, "loss": 0.5951, "step": 29349 }, { "epoch": 0.8995341424543337, "grad_norm": 0.45265275489512063, "learning_rate": 2.623629466808686e-07, "loss": 0.4115, "step": 29350 }, { "epoch": 0.8995647909770749, "grad_norm": 1.2451710963542735, "learning_rate": 2.622043098492727e-07, "loss": 0.5174, "step": 29351 }, { "epoch": 0.8995954394998161, "grad_norm": 1.5961484124725633, "learning_rate": 2.620457197003901e-07, "loss": 0.5831, "step": 29352 }, { "epoch": 0.8996260880225573, "grad_norm": 1.2277317462684079, "learning_rate": 2.618871762357816e-07, "loss": 0.5531, "step": 29353 }, { "epoch": 0.8996567365452985, "grad_norm": 1.4375587539128234, "learning_rate": 2.6172867945701284e-07, "loss": 0.5938, "step": 29354 }, { "epoch": 0.8996873850680397, "grad_norm": 0.4248938507982674, "learning_rate": 2.615702293656436e-07, "loss": 0.36, "step": 29355 }, { "epoch": 0.899718033590781, "grad_norm": 1.4190024595365005, "learning_rate": 2.6141182596323423e-07, "loss": 0.5763, "step": 29356 }, { "epoch": 0.8997486821135221, "grad_norm": 0.4241245117753214, "learning_rate": 2.612534692513469e-07, "loss": 0.3889, "step": 29357 }, { "epoch": 0.8997793306362634, "grad_norm": 0.44460999380758365, "learning_rate": 2.6109515923154137e-07, "loss": 0.4021, "step": 29358 }, { "epoch": 0.8998099791590045, "grad_norm": 1.2692548796585763, "learning_rate": 2.6093689590537877e-07, "loss": 0.6242, "step": 29359 }, { "epoch": 0.8998406276817458, "grad_norm": 1.2602584090434892, "learning_rate": 2.6077867927441656e-07, "loss": 0.5931, "step": 29360 }, { "epoch": 0.8998712762044869, "grad_norm": 1.3965970670624352, "learning_rate": 2.6062050934021476e-07, "loss": 0.6975, "step": 29361 }, { "epoch": 0.8999019247272282, "grad_norm": 1.417772986358228, "learning_rate": 2.604623861043326e-07, "loss": 0.6537, "step": 29362 }, { "epoch": 0.8999325732499693, "grad_norm": 1.740905089356827, "learning_rate": 2.6030430956832664e-07, "loss": 0.6535, "step": 29363 }, { "epoch": 0.8999632217727106, "grad_norm": 1.4746764039558469, "learning_rate": 2.6014627973375395e-07, "loss": 0.6306, "step": 29364 }, { "epoch": 0.8999938702954517, "grad_norm": 0.43899014554666793, "learning_rate": 2.5998829660217383e-07, "loss": 0.3927, "step": 29365 }, { "epoch": 0.900024518818193, "grad_norm": 1.3686195225109954, "learning_rate": 2.5983036017514174e-07, "loss": 0.5999, "step": 29366 }, { "epoch": 0.9000551673409342, "grad_norm": 1.2537365598603758, "learning_rate": 2.596724704542142e-07, "loss": 0.4734, "step": 29367 }, { "epoch": 0.9000858158636754, "grad_norm": 1.3946303249063863, "learning_rate": 2.59514627440946e-07, "loss": 0.611, "step": 29368 }, { "epoch": 0.9001164643864166, "grad_norm": 1.405594816295135, "learning_rate": 2.5935683113689324e-07, "loss": 0.5846, "step": 29369 }, { "epoch": 0.9001471129091578, "grad_norm": 1.3798380634880967, "learning_rate": 2.5919908154361076e-07, "loss": 0.6187, "step": 29370 }, { "epoch": 0.900177761431899, "grad_norm": 1.2215052168892873, "learning_rate": 2.5904137866265277e-07, "loss": 0.4797, "step": 29371 }, { "epoch": 0.9002084099546401, "grad_norm": 1.3572273868124305, "learning_rate": 2.5888372249557256e-07, "loss": 0.555, "step": 29372 }, { "epoch": 0.9002390584773814, "grad_norm": 1.4625883253174348, "learning_rate": 2.5872611304392503e-07, "loss": 0.6374, "step": 29373 }, { "epoch": 0.9002697070001225, "grad_norm": 1.4469436727965568, "learning_rate": 2.585685503092611e-07, "loss": 0.5953, "step": 29374 }, { "epoch": 0.9003003555228638, "grad_norm": 1.289646329382817, "learning_rate": 2.5841103429313506e-07, "loss": 0.6986, "step": 29375 }, { "epoch": 0.900331004045605, "grad_norm": 1.3249684905603476, "learning_rate": 2.5825356499709853e-07, "loss": 0.6007, "step": 29376 }, { "epoch": 0.9003616525683462, "grad_norm": 1.5201738864546526, "learning_rate": 2.580961424227024e-07, "loss": 0.6261, "step": 29377 }, { "epoch": 0.9003923010910874, "grad_norm": 1.1522172841937919, "learning_rate": 2.5793876657149886e-07, "loss": 0.5037, "step": 29378 }, { "epoch": 0.9004229496138286, "grad_norm": 1.253610822117428, "learning_rate": 2.5778143744503714e-07, "loss": 0.6144, "step": 29379 }, { "epoch": 0.9004535981365698, "grad_norm": 0.4528889145438304, "learning_rate": 2.5762415504486827e-07, "loss": 0.4093, "step": 29380 }, { "epoch": 0.900484246659311, "grad_norm": 1.4221145288917951, "learning_rate": 2.5746691937254265e-07, "loss": 0.5893, "step": 29381 }, { "epoch": 0.9005148951820522, "grad_norm": 1.2408966238595687, "learning_rate": 2.573097304296085e-07, "loss": 0.5478, "step": 29382 }, { "epoch": 0.9005455437047934, "grad_norm": 1.2418361991202242, "learning_rate": 2.571525882176146e-07, "loss": 0.5655, "step": 29383 }, { "epoch": 0.9005761922275346, "grad_norm": 1.2319453397335496, "learning_rate": 2.5699549273811075e-07, "loss": 0.6209, "step": 29384 }, { "epoch": 0.9006068407502759, "grad_norm": 1.5003459670086121, "learning_rate": 2.56838443992643e-07, "loss": 0.6625, "step": 29385 }, { "epoch": 0.900637489273017, "grad_norm": 1.4660121863182853, "learning_rate": 2.566814419827601e-07, "loss": 0.658, "step": 29386 }, { "epoch": 0.9006681377957583, "grad_norm": 1.4415157881746874, "learning_rate": 2.5652448671000916e-07, "loss": 0.5871, "step": 29387 }, { "epoch": 0.9006987863184994, "grad_norm": 1.2132416003273596, "learning_rate": 2.5636757817593506e-07, "loss": 0.5113, "step": 29388 }, { "epoch": 0.9007294348412407, "grad_norm": 1.3630568577282427, "learning_rate": 2.5621071638208597e-07, "loss": 0.5237, "step": 29389 }, { "epoch": 0.9007600833639818, "grad_norm": 1.472081099591927, "learning_rate": 2.560539013300051e-07, "loss": 0.6294, "step": 29390 }, { "epoch": 0.9007907318867231, "grad_norm": 1.406244247736037, "learning_rate": 2.5589713302123955e-07, "loss": 0.5997, "step": 29391 }, { "epoch": 0.9008213804094642, "grad_norm": 1.3031020305697656, "learning_rate": 2.557404114573342e-07, "loss": 0.5139, "step": 29392 }, { "epoch": 0.9008520289322055, "grad_norm": 1.368668942455253, "learning_rate": 2.555837366398312e-07, "loss": 0.6266, "step": 29393 }, { "epoch": 0.9008826774549467, "grad_norm": 1.3319012733533857, "learning_rate": 2.554271085702759e-07, "loss": 0.6067, "step": 29394 }, { "epoch": 0.9009133259776879, "grad_norm": 1.3967132765652257, "learning_rate": 2.5527052725021204e-07, "loss": 0.4482, "step": 29395 }, { "epoch": 0.9009439745004291, "grad_norm": 1.4150254292234126, "learning_rate": 2.5511399268118076e-07, "loss": 0.5587, "step": 29396 }, { "epoch": 0.9009746230231703, "grad_norm": 1.2740144637795283, "learning_rate": 2.5495750486472625e-07, "loss": 0.589, "step": 29397 }, { "epoch": 0.9010052715459115, "grad_norm": 1.2937365707044124, "learning_rate": 2.5480106380238846e-07, "loss": 0.6334, "step": 29398 }, { "epoch": 0.9010359200686527, "grad_norm": 1.2186304157836056, "learning_rate": 2.5464466949571e-07, "loss": 0.6102, "step": 29399 }, { "epoch": 0.9010665685913939, "grad_norm": 0.43017718912193653, "learning_rate": 2.544883219462324e-07, "loss": 0.394, "step": 29400 }, { "epoch": 0.9010972171141352, "grad_norm": 1.3156893236718068, "learning_rate": 2.5433202115549503e-07, "loss": 0.5982, "step": 29401 }, { "epoch": 0.9011278656368763, "grad_norm": 1.2513736481155662, "learning_rate": 2.541757671250389e-07, "loss": 0.5741, "step": 29402 }, { "epoch": 0.9011585141596175, "grad_norm": 0.44752378273842464, "learning_rate": 2.5401955985640323e-07, "loss": 0.3768, "step": 29403 }, { "epoch": 0.9011891626823587, "grad_norm": 1.3365652954502998, "learning_rate": 2.5386339935112694e-07, "loss": 0.57, "step": 29404 }, { "epoch": 0.9012198112050999, "grad_norm": 1.3788251341928672, "learning_rate": 2.537072856107486e-07, "loss": 0.5893, "step": 29405 }, { "epoch": 0.9012504597278411, "grad_norm": 1.2868850444697966, "learning_rate": 2.535512186368072e-07, "loss": 0.5501, "step": 29406 }, { "epoch": 0.9012811082505823, "grad_norm": 1.3437742452461325, "learning_rate": 2.533951984308397e-07, "loss": 0.5812, "step": 29407 }, { "epoch": 0.9013117567733235, "grad_norm": 0.44944506134672996, "learning_rate": 2.532392249943849e-07, "loss": 0.3815, "step": 29408 }, { "epoch": 0.9013424052960647, "grad_norm": 1.3480718354741266, "learning_rate": 2.5308329832897715e-07, "loss": 0.582, "step": 29409 }, { "epoch": 0.901373053818806, "grad_norm": 1.2295805055297098, "learning_rate": 2.5292741843615466e-07, "loss": 0.5403, "step": 29410 }, { "epoch": 0.9014037023415471, "grad_norm": 0.47305994171880855, "learning_rate": 2.527715853174534e-07, "loss": 0.3941, "step": 29411 }, { "epoch": 0.9014343508642884, "grad_norm": 1.329249699898529, "learning_rate": 2.526157989744077e-07, "loss": 0.5138, "step": 29412 }, { "epoch": 0.9014649993870295, "grad_norm": 1.349003537886451, "learning_rate": 2.5246005940855303e-07, "loss": 0.5959, "step": 29413 }, { "epoch": 0.9014956479097708, "grad_norm": 1.3692023566939042, "learning_rate": 2.523043666214248e-07, "loss": 0.6248, "step": 29414 }, { "epoch": 0.9015262964325119, "grad_norm": 1.3230014712782083, "learning_rate": 2.521487206145562e-07, "loss": 0.7248, "step": 29415 }, { "epoch": 0.9015569449552532, "grad_norm": 1.288804388684088, "learning_rate": 2.5199312138948053e-07, "loss": 0.5028, "step": 29416 }, { "epoch": 0.9015875934779943, "grad_norm": 1.508147514300492, "learning_rate": 2.518375689477326e-07, "loss": 0.5631, "step": 29417 }, { "epoch": 0.9016182420007356, "grad_norm": 1.3928531355679168, "learning_rate": 2.516820632908429e-07, "loss": 0.6632, "step": 29418 }, { "epoch": 0.9016488905234767, "grad_norm": 1.4487777913706157, "learning_rate": 2.515266044203457e-07, "loss": 0.5655, "step": 29419 }, { "epoch": 0.901679539046218, "grad_norm": 1.5502442517231758, "learning_rate": 2.5137119233776984e-07, "loss": 0.597, "step": 29420 }, { "epoch": 0.9017101875689592, "grad_norm": 1.4210259987734357, "learning_rate": 2.5121582704465076e-07, "loss": 0.633, "step": 29421 }, { "epoch": 0.9017408360917004, "grad_norm": 1.4112288985910633, "learning_rate": 2.510605085425166e-07, "loss": 0.5555, "step": 29422 }, { "epoch": 0.9017714846144416, "grad_norm": 0.43828570358213054, "learning_rate": 2.509052368328979e-07, "loss": 0.361, "step": 29423 }, { "epoch": 0.9018021331371828, "grad_norm": 1.3539152885302657, "learning_rate": 2.5075001191732507e-07, "loss": 0.617, "step": 29424 }, { "epoch": 0.901832781659924, "grad_norm": 1.230871894410955, "learning_rate": 2.5059483379732797e-07, "loss": 0.6063, "step": 29425 }, { "epoch": 0.9018634301826652, "grad_norm": 1.4156748911816135, "learning_rate": 2.5043970247443484e-07, "loss": 0.5853, "step": 29426 }, { "epoch": 0.9018940787054064, "grad_norm": 1.3566055482599362, "learning_rate": 2.5028461795017446e-07, "loss": 0.6058, "step": 29427 }, { "epoch": 0.9019247272281476, "grad_norm": 0.45141019448552094, "learning_rate": 2.5012958022607446e-07, "loss": 0.3992, "step": 29428 }, { "epoch": 0.9019553757508888, "grad_norm": 1.354477712220112, "learning_rate": 2.4997458930366425e-07, "loss": 0.6143, "step": 29429 }, { "epoch": 0.9019860242736301, "grad_norm": 1.3241862625453769, "learning_rate": 2.498196451844698e-07, "loss": 0.5446, "step": 29430 }, { "epoch": 0.9020166727963712, "grad_norm": 1.405625195902564, "learning_rate": 2.4966474787001596e-07, "loss": 0.6128, "step": 29431 }, { "epoch": 0.9020473213191125, "grad_norm": 1.3001069150472035, "learning_rate": 2.4950989736183264e-07, "loss": 0.6002, "step": 29432 }, { "epoch": 0.9020779698418536, "grad_norm": 0.4303627001473125, "learning_rate": 2.493550936614436e-07, "loss": 0.4005, "step": 29433 }, { "epoch": 0.9021086183645948, "grad_norm": 1.3477709982017771, "learning_rate": 2.4920033677037327e-07, "loss": 0.6167, "step": 29434 }, { "epoch": 0.902139266887336, "grad_norm": 1.5424655673899146, "learning_rate": 2.490456266901481e-07, "loss": 0.5612, "step": 29435 }, { "epoch": 0.9021699154100772, "grad_norm": 1.185192094137463, "learning_rate": 2.4889096342229246e-07, "loss": 0.5096, "step": 29436 }, { "epoch": 0.9022005639328184, "grad_norm": 1.3421967708238782, "learning_rate": 2.4873634696832904e-07, "loss": 0.6246, "step": 29437 }, { "epoch": 0.9022312124555596, "grad_norm": 1.3283865879169077, "learning_rate": 2.485817773297816e-07, "loss": 0.5891, "step": 29438 }, { "epoch": 0.9022618609783009, "grad_norm": 1.3694036323817167, "learning_rate": 2.484272545081745e-07, "loss": 0.5634, "step": 29439 }, { "epoch": 0.902292509501042, "grad_norm": 1.2277580496739477, "learning_rate": 2.4827277850502926e-07, "loss": 0.5164, "step": 29440 }, { "epoch": 0.9023231580237833, "grad_norm": 1.5143984334314071, "learning_rate": 2.481183493218686e-07, "loss": 0.5088, "step": 29441 }, { "epoch": 0.9023538065465244, "grad_norm": 0.4533074301235884, "learning_rate": 2.4796396696021295e-07, "loss": 0.396, "step": 29442 }, { "epoch": 0.9023844550692657, "grad_norm": 1.3090177203774842, "learning_rate": 2.478096314215844e-07, "loss": 0.5662, "step": 29443 }, { "epoch": 0.9024151035920068, "grad_norm": 1.3272743340931008, "learning_rate": 2.4765534270750404e-07, "loss": 0.5385, "step": 29444 }, { "epoch": 0.9024457521147481, "grad_norm": 1.4896921345607854, "learning_rate": 2.4750110081949054e-07, "loss": 0.7093, "step": 29445 }, { "epoch": 0.9024764006374892, "grad_norm": 1.235423784768958, "learning_rate": 2.473469057590644e-07, "loss": 0.486, "step": 29446 }, { "epoch": 0.9025070491602305, "grad_norm": 1.60870753541627, "learning_rate": 2.471927575277461e-07, "loss": 0.6675, "step": 29447 }, { "epoch": 0.9025376976829717, "grad_norm": 0.46419652647268617, "learning_rate": 2.470386561270538e-07, "loss": 0.4099, "step": 29448 }, { "epoch": 0.9025683462057129, "grad_norm": 1.197406511537858, "learning_rate": 2.468846015585058e-07, "loss": 0.5899, "step": 29449 }, { "epoch": 0.9025989947284541, "grad_norm": 1.3509580971325654, "learning_rate": 2.4673059382361806e-07, "loss": 0.613, "step": 29450 }, { "epoch": 0.9026296432511953, "grad_norm": 1.1915427945657653, "learning_rate": 2.465766329239122e-07, "loss": 0.5768, "step": 29451 }, { "epoch": 0.9026602917739365, "grad_norm": 0.451832110022947, "learning_rate": 2.464227188609025e-07, "loss": 0.403, "step": 29452 }, { "epoch": 0.9026909402966777, "grad_norm": 1.4084338058239598, "learning_rate": 2.462688516361056e-07, "loss": 0.6082, "step": 29453 }, { "epoch": 0.9027215888194189, "grad_norm": 1.275247490957619, "learning_rate": 2.4611503125103744e-07, "loss": 0.5332, "step": 29454 }, { "epoch": 0.9027522373421601, "grad_norm": 1.1624454946532805, "learning_rate": 2.4596125770721456e-07, "loss": 0.4871, "step": 29455 }, { "epoch": 0.9027828858649013, "grad_norm": 1.3459116586575317, "learning_rate": 2.458075310061525e-07, "loss": 0.5711, "step": 29456 }, { "epoch": 0.9028135343876426, "grad_norm": 1.2595709977597531, "learning_rate": 2.45653851149365e-07, "loss": 0.6326, "step": 29457 }, { "epoch": 0.9028441829103837, "grad_norm": 1.5774542709360528, "learning_rate": 2.4550021813836587e-07, "loss": 0.6164, "step": 29458 }, { "epoch": 0.902874831433125, "grad_norm": 1.1773450382719213, "learning_rate": 2.4534663197467056e-07, "loss": 0.6114, "step": 29459 }, { "epoch": 0.9029054799558661, "grad_norm": 1.3998063000091965, "learning_rate": 2.451930926597912e-07, "loss": 0.6374, "step": 29460 }, { "epoch": 0.9029361284786074, "grad_norm": 1.2281194900795387, "learning_rate": 2.450396001952399e-07, "loss": 0.6129, "step": 29461 }, { "epoch": 0.9029667770013485, "grad_norm": 1.2101919816724331, "learning_rate": 2.4488615458253096e-07, "loss": 0.58, "step": 29462 }, { "epoch": 0.9029974255240898, "grad_norm": 1.4113836311283072, "learning_rate": 2.4473275582317545e-07, "loss": 0.5013, "step": 29463 }, { "epoch": 0.9030280740468309, "grad_norm": 1.3767398528083343, "learning_rate": 2.445794039186844e-07, "loss": 0.6204, "step": 29464 }, { "epoch": 0.9030587225695721, "grad_norm": 0.46307549832899003, "learning_rate": 2.4442609887056935e-07, "loss": 0.4079, "step": 29465 }, { "epoch": 0.9030893710923134, "grad_norm": 1.2922704879081148, "learning_rate": 2.442728406803402e-07, "loss": 0.6207, "step": 29466 }, { "epoch": 0.9031200196150545, "grad_norm": 1.407515773471726, "learning_rate": 2.4411962934950853e-07, "loss": 0.577, "step": 29467 }, { "epoch": 0.9031506681377958, "grad_norm": 1.1924832325901247, "learning_rate": 2.4396646487958195e-07, "loss": 0.597, "step": 29468 }, { "epoch": 0.9031813166605369, "grad_norm": 0.454323814307166, "learning_rate": 2.438133472720711e-07, "loss": 0.3774, "step": 29469 }, { "epoch": 0.9032119651832782, "grad_norm": 1.3424608726236842, "learning_rate": 2.4366027652848513e-07, "loss": 0.6135, "step": 29470 }, { "epoch": 0.9032426137060193, "grad_norm": 1.3768397633357186, "learning_rate": 2.435072526503307e-07, "loss": 0.5737, "step": 29471 }, { "epoch": 0.9032732622287606, "grad_norm": 1.4094280810535575, "learning_rate": 2.433542756391155e-07, "loss": 0.5545, "step": 29472 }, { "epoch": 0.9033039107515017, "grad_norm": 1.3357595456547011, "learning_rate": 2.432013454963489e-07, "loss": 0.57, "step": 29473 }, { "epoch": 0.903334559274243, "grad_norm": 1.3099365802190541, "learning_rate": 2.4304846222353573e-07, "loss": 0.6567, "step": 29474 }, { "epoch": 0.9033652077969841, "grad_norm": 1.4291767127939383, "learning_rate": 2.428956258221843e-07, "loss": 0.6923, "step": 29475 }, { "epoch": 0.9033958563197254, "grad_norm": 1.4010492178392846, "learning_rate": 2.4274283629379833e-07, "loss": 0.5838, "step": 29476 }, { "epoch": 0.9034265048424666, "grad_norm": 1.3973642225656437, "learning_rate": 2.4259009363988397e-07, "loss": 0.5206, "step": 29477 }, { "epoch": 0.9034571533652078, "grad_norm": 0.4355684789349614, "learning_rate": 2.424373978619482e-07, "loss": 0.3837, "step": 29478 }, { "epoch": 0.903487801887949, "grad_norm": 1.4038892371339011, "learning_rate": 2.4228474896149266e-07, "loss": 0.559, "step": 29479 }, { "epoch": 0.9035184504106902, "grad_norm": 1.2548529545598068, "learning_rate": 2.421321469400234e-07, "loss": 0.6026, "step": 29480 }, { "epoch": 0.9035490989334314, "grad_norm": 1.282733120605992, "learning_rate": 2.419795917990436e-07, "loss": 0.6309, "step": 29481 }, { "epoch": 0.9035797474561726, "grad_norm": 1.564928792615432, "learning_rate": 2.4182708354005656e-07, "loss": 0.6472, "step": 29482 }, { "epoch": 0.9036103959789138, "grad_norm": 1.2975962127962748, "learning_rate": 2.4167462216456326e-07, "loss": 0.6572, "step": 29483 }, { "epoch": 0.903641044501655, "grad_norm": 1.370098251048057, "learning_rate": 2.4152220767406863e-07, "loss": 0.6833, "step": 29484 }, { "epoch": 0.9036716930243962, "grad_norm": 0.4233269224738304, "learning_rate": 2.413698400700726e-07, "loss": 0.3852, "step": 29485 }, { "epoch": 0.9037023415471375, "grad_norm": 1.4373297488500463, "learning_rate": 2.4121751935407776e-07, "loss": 0.6105, "step": 29486 }, { "epoch": 0.9037329900698786, "grad_norm": 1.3676365900368952, "learning_rate": 2.4106524552758414e-07, "loss": 0.7153, "step": 29487 }, { "epoch": 0.9037636385926199, "grad_norm": 1.3254129113767132, "learning_rate": 2.409130185920916e-07, "loss": 0.5767, "step": 29488 }, { "epoch": 0.903794287115361, "grad_norm": 0.42939041825048935, "learning_rate": 2.407608385491017e-07, "loss": 0.3924, "step": 29489 }, { "epoch": 0.9038249356381023, "grad_norm": 1.2901211312262033, "learning_rate": 2.4060870540011216e-07, "loss": 0.5544, "step": 29490 }, { "epoch": 0.9038555841608434, "grad_norm": 1.320194562609375, "learning_rate": 2.404566191466229e-07, "loss": 0.6045, "step": 29491 }, { "epoch": 0.9038862326835847, "grad_norm": 1.3317717521984709, "learning_rate": 2.4030457979013265e-07, "loss": 0.5782, "step": 29492 }, { "epoch": 0.9039168812063259, "grad_norm": 1.202096721869871, "learning_rate": 2.401525873321392e-07, "loss": 0.505, "step": 29493 }, { "epoch": 0.9039475297290671, "grad_norm": 1.3238827335136685, "learning_rate": 2.400006417741402e-07, "loss": 0.5281, "step": 29494 }, { "epoch": 0.9039781782518083, "grad_norm": 1.3039872733190112, "learning_rate": 2.398487431176327e-07, "loss": 0.6278, "step": 29495 }, { "epoch": 0.9040088267745494, "grad_norm": 1.184204113540012, "learning_rate": 2.396968913641129e-07, "loss": 0.6041, "step": 29496 }, { "epoch": 0.9040394752972907, "grad_norm": 1.2928918117793606, "learning_rate": 2.3954508651507837e-07, "loss": 0.6457, "step": 29497 }, { "epoch": 0.9040701238200318, "grad_norm": 1.4422299136268335, "learning_rate": 2.3939332857202404e-07, "loss": 0.6919, "step": 29498 }, { "epoch": 0.9041007723427731, "grad_norm": 1.5360922483046286, "learning_rate": 2.392416175364448e-07, "loss": 0.6396, "step": 29499 }, { "epoch": 0.9041314208655142, "grad_norm": 1.4520342599797806, "learning_rate": 2.390899534098368e-07, "loss": 0.6023, "step": 29500 }, { "epoch": 0.9041620693882555, "grad_norm": 1.4271754124631493, "learning_rate": 2.3893833619369255e-07, "loss": 0.6023, "step": 29501 }, { "epoch": 0.9041927179109966, "grad_norm": 1.4264545895442224, "learning_rate": 2.387867658895077e-07, "loss": 0.5926, "step": 29502 }, { "epoch": 0.9042233664337379, "grad_norm": 1.290907857135801, "learning_rate": 2.386352424987753e-07, "loss": 0.5799, "step": 29503 }, { "epoch": 0.9042540149564791, "grad_norm": 1.3926129238690623, "learning_rate": 2.3848376602298716e-07, "loss": 0.6416, "step": 29504 }, { "epoch": 0.9042846634792203, "grad_norm": 1.4501924592537443, "learning_rate": 2.3833233646363806e-07, "loss": 0.6674, "step": 29505 }, { "epoch": 0.9043153120019615, "grad_norm": 0.44452808054086873, "learning_rate": 2.3818095382221795e-07, "loss": 0.3712, "step": 29506 }, { "epoch": 0.9043459605247027, "grad_norm": 1.3427733902500283, "learning_rate": 2.3802961810021896e-07, "loss": 0.5667, "step": 29507 }, { "epoch": 0.9043766090474439, "grad_norm": 1.2568537817157825, "learning_rate": 2.3787832929913324e-07, "loss": 0.5584, "step": 29508 }, { "epoch": 0.9044072575701851, "grad_norm": 1.314402458807025, "learning_rate": 2.377270874204507e-07, "loss": 0.5662, "step": 29509 }, { "epoch": 0.9044379060929263, "grad_norm": 1.40183775880588, "learning_rate": 2.3757589246566127e-07, "loss": 0.6768, "step": 29510 }, { "epoch": 0.9044685546156676, "grad_norm": 1.468839384518855, "learning_rate": 2.374247444362554e-07, "loss": 0.6844, "step": 29511 }, { "epoch": 0.9044992031384087, "grad_norm": 0.4479237094208568, "learning_rate": 2.3727364333372194e-07, "loss": 0.3891, "step": 29512 }, { "epoch": 0.90452985166115, "grad_norm": 1.4120957052619452, "learning_rate": 2.3712258915954966e-07, "loss": 0.4983, "step": 29513 }, { "epoch": 0.9045605001838911, "grad_norm": 1.3914985767740267, "learning_rate": 2.369715819152274e-07, "loss": 0.5754, "step": 29514 }, { "epoch": 0.9045911487066324, "grad_norm": 1.4056233194304788, "learning_rate": 2.3682062160224284e-07, "loss": 0.7188, "step": 29515 }, { "epoch": 0.9046217972293735, "grad_norm": 1.3481187619603587, "learning_rate": 2.366697082220837e-07, "loss": 0.5436, "step": 29516 }, { "epoch": 0.9046524457521148, "grad_norm": 1.315590237173466, "learning_rate": 2.3651884177623596e-07, "loss": 0.5361, "step": 29517 }, { "epoch": 0.9046830942748559, "grad_norm": 1.387369871174441, "learning_rate": 2.3636802226618737e-07, "loss": 0.5474, "step": 29518 }, { "epoch": 0.9047137427975972, "grad_norm": 1.2243349501442373, "learning_rate": 2.3621724969342342e-07, "loss": 0.5699, "step": 29519 }, { "epoch": 0.9047443913203383, "grad_norm": 1.3268675314635385, "learning_rate": 2.3606652405942954e-07, "loss": 0.558, "step": 29520 }, { "epoch": 0.9047750398430796, "grad_norm": 0.4473604114284457, "learning_rate": 2.3591584536569123e-07, "loss": 0.3954, "step": 29521 }, { "epoch": 0.9048056883658208, "grad_norm": 0.4491905943759474, "learning_rate": 2.3576521361369342e-07, "loss": 0.3965, "step": 29522 }, { "epoch": 0.904836336888562, "grad_norm": 1.2899501512648412, "learning_rate": 2.3561462880491935e-07, "loss": 0.5089, "step": 29523 }, { "epoch": 0.9048669854113032, "grad_norm": 0.44521443619798795, "learning_rate": 2.3546409094085342e-07, "loss": 0.4151, "step": 29524 }, { "epoch": 0.9048976339340444, "grad_norm": 0.4355695831039641, "learning_rate": 2.3531360002297944e-07, "loss": 0.3786, "step": 29525 }, { "epoch": 0.9049282824567856, "grad_norm": 1.452165721472854, "learning_rate": 2.3516315605277895e-07, "loss": 0.5975, "step": 29526 }, { "epoch": 0.9049589309795267, "grad_norm": 1.220907569050578, "learning_rate": 2.3501275903173582e-07, "loss": 0.5244, "step": 29527 }, { "epoch": 0.904989579502268, "grad_norm": 1.463804647672406, "learning_rate": 2.3486240896132996e-07, "loss": 0.5968, "step": 29528 }, { "epoch": 0.9050202280250091, "grad_norm": 0.44275541058575346, "learning_rate": 2.3471210584304514e-07, "loss": 0.3922, "step": 29529 }, { "epoch": 0.9050508765477504, "grad_norm": 1.3379341747759543, "learning_rate": 2.3456184967836138e-07, "loss": 0.4906, "step": 29530 }, { "epoch": 0.9050815250704916, "grad_norm": 1.367358158016012, "learning_rate": 2.3441164046875797e-07, "loss": 0.601, "step": 29531 }, { "epoch": 0.9051121735932328, "grad_norm": 1.3431909033433354, "learning_rate": 2.34261478215716e-07, "loss": 0.5949, "step": 29532 }, { "epoch": 0.905142822115974, "grad_norm": 0.46861114481080324, "learning_rate": 2.341113629207159e-07, "loss": 0.3883, "step": 29533 }, { "epoch": 0.9051734706387152, "grad_norm": 1.41115519591238, "learning_rate": 2.339612945852354e-07, "loss": 0.6054, "step": 29534 }, { "epoch": 0.9052041191614564, "grad_norm": 1.2946861919546893, "learning_rate": 2.3381127321075338e-07, "loss": 0.6255, "step": 29535 }, { "epoch": 0.9052347676841976, "grad_norm": 1.5586018757504614, "learning_rate": 2.3366129879874965e-07, "loss": 0.6074, "step": 29536 }, { "epoch": 0.9052654162069388, "grad_norm": 1.365521309235782, "learning_rate": 2.3351137135069922e-07, "loss": 0.6051, "step": 29537 }, { "epoch": 0.90529606472968, "grad_norm": 1.2100534157175726, "learning_rate": 2.3336149086808203e-07, "loss": 0.5808, "step": 29538 }, { "epoch": 0.9053267132524212, "grad_norm": 1.3425717921143572, "learning_rate": 2.3321165735237294e-07, "loss": 0.5664, "step": 29539 }, { "epoch": 0.9053573617751625, "grad_norm": 1.3182642817145809, "learning_rate": 2.330618708050486e-07, "loss": 0.6741, "step": 29540 }, { "epoch": 0.9053880102979036, "grad_norm": 1.3134122383925844, "learning_rate": 2.329121312275867e-07, "loss": 0.5384, "step": 29541 }, { "epoch": 0.9054186588206449, "grad_norm": 1.431299896383639, "learning_rate": 2.3276243862145998e-07, "loss": 0.674, "step": 29542 }, { "epoch": 0.905449307343386, "grad_norm": 0.458119909114807, "learning_rate": 2.32612792988145e-07, "loss": 0.3711, "step": 29543 }, { "epoch": 0.9054799558661273, "grad_norm": 1.286424961874732, "learning_rate": 2.324631943291167e-07, "loss": 0.5918, "step": 29544 }, { "epoch": 0.9055106043888684, "grad_norm": 0.4521168652165819, "learning_rate": 2.3231364264584721e-07, "loss": 0.4101, "step": 29545 }, { "epoch": 0.9055412529116097, "grad_norm": 1.2356685780543577, "learning_rate": 2.3216413793981207e-07, "loss": 0.508, "step": 29546 }, { "epoch": 0.9055719014343508, "grad_norm": 1.3322049697815177, "learning_rate": 2.3201468021248285e-07, "loss": 0.6434, "step": 29547 }, { "epoch": 0.9056025499570921, "grad_norm": 1.302196253406766, "learning_rate": 2.3186526946533395e-07, "loss": 0.6391, "step": 29548 }, { "epoch": 0.9056331984798333, "grad_norm": 0.4404580289708317, "learning_rate": 2.3171590569983636e-07, "loss": 0.3926, "step": 29549 }, { "epoch": 0.9056638470025745, "grad_norm": 1.441655141947858, "learning_rate": 2.315665889174612e-07, "loss": 0.5174, "step": 29550 }, { "epoch": 0.9056944955253157, "grad_norm": 1.28395578343523, "learning_rate": 2.3141731911968057e-07, "loss": 0.6566, "step": 29551 }, { "epoch": 0.9057251440480569, "grad_norm": 1.413702658238848, "learning_rate": 2.312680963079661e-07, "loss": 0.636, "step": 29552 }, { "epoch": 0.9057557925707981, "grad_norm": 0.44311481775016964, "learning_rate": 2.311189204837866e-07, "loss": 0.3897, "step": 29553 }, { "epoch": 0.9057864410935393, "grad_norm": 1.3720426801364833, "learning_rate": 2.30969791648612e-07, "loss": 0.5129, "step": 29554 }, { "epoch": 0.9058170896162805, "grad_norm": 1.6463289460037165, "learning_rate": 2.308207098039128e-07, "loss": 0.5485, "step": 29555 }, { "epoch": 0.9058477381390218, "grad_norm": 1.4514344315597354, "learning_rate": 2.3067167495115783e-07, "loss": 0.5303, "step": 29556 }, { "epoch": 0.9058783866617629, "grad_norm": 1.415949460257784, "learning_rate": 2.3052268709181536e-07, "loss": 0.5628, "step": 29557 }, { "epoch": 0.905909035184504, "grad_norm": 1.3418860054453627, "learning_rate": 2.3037374622735143e-07, "loss": 0.6277, "step": 29558 }, { "epoch": 0.9059396837072453, "grad_norm": 1.5522433672296496, "learning_rate": 2.3022485235923708e-07, "loss": 0.6061, "step": 29559 }, { "epoch": 0.9059703322299865, "grad_norm": 0.4424817295557262, "learning_rate": 2.3007600548893727e-07, "loss": 0.3877, "step": 29560 }, { "epoch": 0.9060009807527277, "grad_norm": 1.3499163985638152, "learning_rate": 2.299272056179186e-07, "loss": 0.5767, "step": 29561 }, { "epoch": 0.9060316292754689, "grad_norm": 1.3767005381010482, "learning_rate": 2.2977845274764764e-07, "loss": 0.6306, "step": 29562 }, { "epoch": 0.9060622777982101, "grad_norm": 1.2799932537924148, "learning_rate": 2.296297468795905e-07, "loss": 0.5542, "step": 29563 }, { "epoch": 0.9060929263209513, "grad_norm": 1.4061104853203363, "learning_rate": 2.2948108801521207e-07, "loss": 0.6362, "step": 29564 }, { "epoch": 0.9061235748436925, "grad_norm": 1.2294216231631654, "learning_rate": 2.293324761559762e-07, "loss": 0.5691, "step": 29565 }, { "epoch": 0.9061542233664337, "grad_norm": 1.3927673318997071, "learning_rate": 2.2918391130334838e-07, "loss": 0.5974, "step": 29566 }, { "epoch": 0.906184871889175, "grad_norm": 0.43636159607378283, "learning_rate": 2.29035393458793e-07, "loss": 0.3744, "step": 29567 }, { "epoch": 0.9062155204119161, "grad_norm": 0.4407971657752411, "learning_rate": 2.2888692262377276e-07, "loss": 0.3715, "step": 29568 }, { "epoch": 0.9062461689346574, "grad_norm": 1.5487632644892888, "learning_rate": 2.2873849879974874e-07, "loss": 0.6616, "step": 29569 }, { "epoch": 0.9062768174573985, "grad_norm": 1.3915777726774758, "learning_rate": 2.285901219881864e-07, "loss": 0.6348, "step": 29570 }, { "epoch": 0.9063074659801398, "grad_norm": 1.3143131718000185, "learning_rate": 2.284417921905463e-07, "loss": 0.6068, "step": 29571 }, { "epoch": 0.9063381145028809, "grad_norm": 1.4682995330555775, "learning_rate": 2.2829350940828943e-07, "loss": 0.5553, "step": 29572 }, { "epoch": 0.9063687630256222, "grad_norm": 1.475866093825771, "learning_rate": 2.2814527364287796e-07, "loss": 0.6678, "step": 29573 }, { "epoch": 0.9063994115483633, "grad_norm": 1.2837390524577255, "learning_rate": 2.2799708489577187e-07, "loss": 0.5298, "step": 29574 }, { "epoch": 0.9064300600711046, "grad_norm": 1.4105401161328912, "learning_rate": 2.2784894316843165e-07, "loss": 0.5634, "step": 29575 }, { "epoch": 0.9064607085938458, "grad_norm": 1.3071544528363743, "learning_rate": 2.2770084846231666e-07, "loss": 0.5634, "step": 29576 }, { "epoch": 0.906491357116587, "grad_norm": 1.1759935662112566, "learning_rate": 2.275528007788863e-07, "loss": 0.4519, "step": 29577 }, { "epoch": 0.9065220056393282, "grad_norm": 1.425362109223723, "learning_rate": 2.2740480011959942e-07, "loss": 0.62, "step": 29578 }, { "epoch": 0.9065526541620694, "grad_norm": 1.346077224588017, "learning_rate": 2.2725684648591427e-07, "loss": 0.5875, "step": 29579 }, { "epoch": 0.9065833026848106, "grad_norm": 1.2750867440170761, "learning_rate": 2.271089398792875e-07, "loss": 0.6585, "step": 29580 }, { "epoch": 0.9066139512075518, "grad_norm": 0.43439958280054647, "learning_rate": 2.2696108030117902e-07, "loss": 0.3763, "step": 29581 }, { "epoch": 0.906644599730293, "grad_norm": 1.321991134835378, "learning_rate": 2.2681326775304323e-07, "loss": 0.5744, "step": 29582 }, { "epoch": 0.9066752482530342, "grad_norm": 1.474938765817639, "learning_rate": 2.2666550223633844e-07, "loss": 0.603, "step": 29583 }, { "epoch": 0.9067058967757754, "grad_norm": 1.1845836062088, "learning_rate": 2.2651778375251897e-07, "loss": 0.5771, "step": 29584 }, { "epoch": 0.9067365452985167, "grad_norm": 1.4871404143977984, "learning_rate": 2.263701123030415e-07, "loss": 0.6869, "step": 29585 }, { "epoch": 0.9067671938212578, "grad_norm": 1.3801450601780105, "learning_rate": 2.2622248788936098e-07, "loss": 0.562, "step": 29586 }, { "epoch": 0.9067978423439991, "grad_norm": 1.4633221812802777, "learning_rate": 2.2607491051293119e-07, "loss": 0.563, "step": 29587 }, { "epoch": 0.9068284908667402, "grad_norm": 1.2802401023687826, "learning_rate": 2.2592738017520655e-07, "loss": 0.5127, "step": 29588 }, { "epoch": 0.9068591393894814, "grad_norm": 1.4371219844282286, "learning_rate": 2.25779896877642e-07, "loss": 0.5444, "step": 29589 }, { "epoch": 0.9068897879122226, "grad_norm": 0.4553637785360203, "learning_rate": 2.256324606216892e-07, "loss": 0.3745, "step": 29590 }, { "epoch": 0.9069204364349638, "grad_norm": 0.4385605939465735, "learning_rate": 2.2548507140880081e-07, "loss": 0.4053, "step": 29591 }, { "epoch": 0.906951084957705, "grad_norm": 1.4759994328464408, "learning_rate": 2.253377292404296e-07, "loss": 0.5777, "step": 29592 }, { "epoch": 0.9069817334804462, "grad_norm": 1.4122923632584308, "learning_rate": 2.2519043411802777e-07, "loss": 0.5304, "step": 29593 }, { "epoch": 0.9070123820031875, "grad_norm": 1.3544989487364896, "learning_rate": 2.2504318604304687e-07, "loss": 0.6333, "step": 29594 }, { "epoch": 0.9070430305259286, "grad_norm": 1.3572842844646105, "learning_rate": 2.2489598501693632e-07, "loss": 0.6149, "step": 29595 }, { "epoch": 0.9070736790486699, "grad_norm": 1.3384537672559957, "learning_rate": 2.2474883104114719e-07, "loss": 0.6985, "step": 29596 }, { "epoch": 0.907104327571411, "grad_norm": 1.5717563425407486, "learning_rate": 2.2460172411713054e-07, "loss": 0.5466, "step": 29597 }, { "epoch": 0.9071349760941523, "grad_norm": 1.173718535336649, "learning_rate": 2.244546642463352e-07, "loss": 0.5561, "step": 29598 }, { "epoch": 0.9071656246168934, "grad_norm": 1.3691713287459404, "learning_rate": 2.2430765143020783e-07, "loss": 0.5901, "step": 29599 }, { "epoch": 0.9071962731396347, "grad_norm": 1.242600593945895, "learning_rate": 2.241606856702011e-07, "loss": 0.5241, "step": 29600 }, { "epoch": 0.9072269216623758, "grad_norm": 1.5583873456109483, "learning_rate": 2.2401376696776e-07, "loss": 0.6345, "step": 29601 }, { "epoch": 0.9072575701851171, "grad_norm": 0.4141508543033155, "learning_rate": 2.2386689532433447e-07, "loss": 0.3664, "step": 29602 }, { "epoch": 0.9072882187078583, "grad_norm": 1.2829491478528057, "learning_rate": 2.237200707413695e-07, "loss": 0.5787, "step": 29603 }, { "epoch": 0.9073188672305995, "grad_norm": 1.3135186802932042, "learning_rate": 2.2357329322031273e-07, "loss": 0.6686, "step": 29604 }, { "epoch": 0.9073495157533407, "grad_norm": 1.3063107500107745, "learning_rate": 2.2342656276261087e-07, "loss": 0.5672, "step": 29605 }, { "epoch": 0.9073801642760819, "grad_norm": 1.3734551084401982, "learning_rate": 2.2327987936970885e-07, "loss": 0.6364, "step": 29606 }, { "epoch": 0.9074108127988231, "grad_norm": 0.46555357341886666, "learning_rate": 2.2313324304305217e-07, "loss": 0.41, "step": 29607 }, { "epoch": 0.9074414613215643, "grad_norm": 1.37315509425653, "learning_rate": 2.2298665378408635e-07, "loss": 0.6942, "step": 29608 }, { "epoch": 0.9074721098443055, "grad_norm": 1.3759112027098577, "learning_rate": 2.2284011159425466e-07, "loss": 0.5567, "step": 29609 }, { "epoch": 0.9075027583670467, "grad_norm": 1.428635114523151, "learning_rate": 2.226936164750021e-07, "loss": 0.6372, "step": 29610 }, { "epoch": 0.9075334068897879, "grad_norm": 1.2300024790701585, "learning_rate": 2.225471684277719e-07, "loss": 0.6334, "step": 29611 }, { "epoch": 0.9075640554125292, "grad_norm": 1.4687994758386103, "learning_rate": 2.224007674540063e-07, "loss": 0.5851, "step": 29612 }, { "epoch": 0.9075947039352703, "grad_norm": 1.2132720043916014, "learning_rate": 2.222544135551491e-07, "loss": 0.6502, "step": 29613 }, { "epoch": 0.9076253524580116, "grad_norm": 1.4724899696158473, "learning_rate": 2.2210810673264084e-07, "loss": 0.5729, "step": 29614 }, { "epoch": 0.9076560009807527, "grad_norm": 0.45722767514296525, "learning_rate": 2.2196184698792368e-07, "loss": 0.4219, "step": 29615 }, { "epoch": 0.907686649503494, "grad_norm": 1.3078469840064912, "learning_rate": 2.218156343224398e-07, "loss": 0.6112, "step": 29616 }, { "epoch": 0.9077172980262351, "grad_norm": 1.288134667010615, "learning_rate": 2.216694687376286e-07, "loss": 0.5848, "step": 29617 }, { "epoch": 0.9077479465489764, "grad_norm": 1.4927358278244547, "learning_rate": 2.215233502349301e-07, "loss": 0.6196, "step": 29618 }, { "epoch": 0.9077785950717175, "grad_norm": 1.4875482895190386, "learning_rate": 2.2137727881578586e-07, "loss": 0.5662, "step": 29619 }, { "epoch": 0.9078092435944587, "grad_norm": 1.3625461737413873, "learning_rate": 2.2123125448163307e-07, "loss": 0.5637, "step": 29620 }, { "epoch": 0.9078398921172, "grad_norm": 1.4073514579490218, "learning_rate": 2.2108527723391172e-07, "loss": 0.5649, "step": 29621 }, { "epoch": 0.9078705406399411, "grad_norm": 0.47655730113032896, "learning_rate": 2.2093934707406007e-07, "loss": 0.3939, "step": 29622 }, { "epoch": 0.9079011891626824, "grad_norm": 1.2818326776660778, "learning_rate": 2.2079346400351532e-07, "loss": 0.5343, "step": 29623 }, { "epoch": 0.9079318376854235, "grad_norm": 1.423965248923805, "learning_rate": 2.2064762802371632e-07, "loss": 0.5867, "step": 29624 }, { "epoch": 0.9079624862081648, "grad_norm": 0.43996014639622016, "learning_rate": 2.2050183913609802e-07, "loss": 0.382, "step": 29625 }, { "epoch": 0.9079931347309059, "grad_norm": 1.3487508712503393, "learning_rate": 2.2035609734209818e-07, "loss": 0.6114, "step": 29626 }, { "epoch": 0.9080237832536472, "grad_norm": 0.45106500088450513, "learning_rate": 2.202104026431534e-07, "loss": 0.3836, "step": 29627 }, { "epoch": 0.9080544317763883, "grad_norm": 1.2561099388440666, "learning_rate": 2.2006475504069757e-07, "loss": 0.5298, "step": 29628 }, { "epoch": 0.9080850802991296, "grad_norm": 1.4000672766027982, "learning_rate": 2.199191545361673e-07, "loss": 0.6218, "step": 29629 }, { "epoch": 0.9081157288218707, "grad_norm": 1.2674670030000297, "learning_rate": 2.1977360113099643e-07, "loss": 0.5711, "step": 29630 }, { "epoch": 0.908146377344612, "grad_norm": 1.1908431845249023, "learning_rate": 2.196280948266194e-07, "loss": 0.5512, "step": 29631 }, { "epoch": 0.9081770258673532, "grad_norm": 1.3423770268605362, "learning_rate": 2.194826356244695e-07, "loss": 0.6596, "step": 29632 }, { "epoch": 0.9082076743900944, "grad_norm": 1.3488893448449961, "learning_rate": 2.1933722352598109e-07, "loss": 0.6264, "step": 29633 }, { "epoch": 0.9082383229128356, "grad_norm": 1.4826579801658732, "learning_rate": 2.191918585325853e-07, "loss": 0.6836, "step": 29634 }, { "epoch": 0.9082689714355768, "grad_norm": 1.2530364804244498, "learning_rate": 2.190465406457165e-07, "loss": 0.5571, "step": 29635 }, { "epoch": 0.908299619958318, "grad_norm": 1.362472465633602, "learning_rate": 2.1890126986680416e-07, "loss": 0.5811, "step": 29636 }, { "epoch": 0.9083302684810592, "grad_norm": 1.3359450150298315, "learning_rate": 2.1875604619728153e-07, "loss": 0.6114, "step": 29637 }, { "epoch": 0.9083609170038004, "grad_norm": 1.391062740765562, "learning_rate": 2.1861086963857914e-07, "loss": 0.5362, "step": 29638 }, { "epoch": 0.9083915655265417, "grad_norm": 0.4330586858831105, "learning_rate": 2.1846574019212695e-07, "loss": 0.3843, "step": 29639 }, { "epoch": 0.9084222140492828, "grad_norm": 1.2416536206493771, "learning_rate": 2.1832065785935496e-07, "loss": 0.5722, "step": 29640 }, { "epoch": 0.9084528625720241, "grad_norm": 1.4007655138333444, "learning_rate": 2.1817562264169312e-07, "loss": 0.6447, "step": 29641 }, { "epoch": 0.9084835110947652, "grad_norm": 1.5262892159947472, "learning_rate": 2.1803063454057028e-07, "loss": 0.6395, "step": 29642 }, { "epoch": 0.9085141596175065, "grad_norm": 1.2821408678277613, "learning_rate": 2.1788569355741583e-07, "loss": 0.5699, "step": 29643 }, { "epoch": 0.9085448081402476, "grad_norm": 1.2963595665095602, "learning_rate": 2.1774079969365646e-07, "loss": 0.573, "step": 29644 }, { "epoch": 0.9085754566629889, "grad_norm": 1.239282287673269, "learning_rate": 2.1759595295072044e-07, "loss": 0.5428, "step": 29645 }, { "epoch": 0.90860610518573, "grad_norm": 1.382741961804701, "learning_rate": 2.1745115333003607e-07, "loss": 0.5666, "step": 29646 }, { "epoch": 0.9086367537084713, "grad_norm": 1.344786201126012, "learning_rate": 2.1730640083302834e-07, "loss": 0.6458, "step": 29647 }, { "epoch": 0.9086674022312125, "grad_norm": 1.2831805745763099, "learning_rate": 2.1716169546112442e-07, "loss": 0.5334, "step": 29648 }, { "epoch": 0.9086980507539537, "grad_norm": 1.4044034215182495, "learning_rate": 2.170170372157504e-07, "loss": 0.4991, "step": 29649 }, { "epoch": 0.9087286992766949, "grad_norm": 0.4443762696879261, "learning_rate": 2.168724260983307e-07, "loss": 0.3874, "step": 29650 }, { "epoch": 0.908759347799436, "grad_norm": 1.1884319480471954, "learning_rate": 2.1672786211029085e-07, "loss": 0.5161, "step": 29651 }, { "epoch": 0.9087899963221773, "grad_norm": 1.4952532854260208, "learning_rate": 2.1658334525305634e-07, "loss": 0.6255, "step": 29652 }, { "epoch": 0.9088206448449184, "grad_norm": 1.4259557054092489, "learning_rate": 2.1643887552804888e-07, "loss": 0.5894, "step": 29653 }, { "epoch": 0.9088512933676597, "grad_norm": 1.2067403925204805, "learning_rate": 2.1629445293669394e-07, "loss": 0.6377, "step": 29654 }, { "epoch": 0.9088819418904008, "grad_norm": 1.324188562489058, "learning_rate": 2.1615007748041205e-07, "loss": 0.5516, "step": 29655 }, { "epoch": 0.9089125904131421, "grad_norm": 1.3469685208161755, "learning_rate": 2.1600574916062934e-07, "loss": 0.6359, "step": 29656 }, { "epoch": 0.9089432389358832, "grad_norm": 0.43194610046195864, "learning_rate": 2.1586146797876574e-07, "loss": 0.3922, "step": 29657 }, { "epoch": 0.9089738874586245, "grad_norm": 1.3747028337085594, "learning_rate": 2.1571723393624232e-07, "loss": 0.5862, "step": 29658 }, { "epoch": 0.9090045359813657, "grad_norm": 1.3357774586709155, "learning_rate": 2.1557304703448134e-07, "loss": 0.5427, "step": 29659 }, { "epoch": 0.9090351845041069, "grad_norm": 1.3785131681390088, "learning_rate": 2.1542890727490385e-07, "loss": 0.7231, "step": 29660 }, { "epoch": 0.9090658330268481, "grad_norm": 1.5636864697459425, "learning_rate": 2.1528481465892869e-07, "loss": 0.5054, "step": 29661 }, { "epoch": 0.9090964815495893, "grad_norm": 0.4470871514194557, "learning_rate": 2.1514076918797698e-07, "loss": 0.3747, "step": 29662 }, { "epoch": 0.9091271300723305, "grad_norm": 1.267280156683007, "learning_rate": 2.149967708634676e-07, "loss": 0.621, "step": 29663 }, { "epoch": 0.9091577785950717, "grad_norm": 1.4720304264174215, "learning_rate": 2.1485281968681937e-07, "loss": 0.6674, "step": 29664 }, { "epoch": 0.9091884271178129, "grad_norm": 1.2744449989675757, "learning_rate": 2.1470891565945062e-07, "loss": 0.5983, "step": 29665 }, { "epoch": 0.9092190756405542, "grad_norm": 1.2496840841302592, "learning_rate": 2.1456505878277855e-07, "loss": 0.6005, "step": 29666 }, { "epoch": 0.9092497241632953, "grad_norm": 1.3162437187685834, "learning_rate": 2.1442124905822204e-07, "loss": 0.6412, "step": 29667 }, { "epoch": 0.9092803726860366, "grad_norm": 1.5074091494594124, "learning_rate": 2.142774864871977e-07, "loss": 0.6465, "step": 29668 }, { "epoch": 0.9093110212087777, "grad_norm": 2.730270524440683, "learning_rate": 2.141337710711211e-07, "loss": 0.5612, "step": 29669 }, { "epoch": 0.909341669731519, "grad_norm": 1.3655066541957017, "learning_rate": 2.1399010281140941e-07, "loss": 0.5848, "step": 29670 }, { "epoch": 0.9093723182542601, "grad_norm": 1.2975198121728013, "learning_rate": 2.138464817094782e-07, "loss": 0.5761, "step": 29671 }, { "epoch": 0.9094029667770014, "grad_norm": 1.236150660341479, "learning_rate": 2.137029077667413e-07, "loss": 0.6155, "step": 29672 }, { "epoch": 0.9094336152997425, "grad_norm": 1.3664524478933808, "learning_rate": 2.1355938098461427e-07, "loss": 0.5729, "step": 29673 }, { "epoch": 0.9094642638224838, "grad_norm": 1.2859351107168981, "learning_rate": 2.1341590136451152e-07, "loss": 0.603, "step": 29674 }, { "epoch": 0.909494912345225, "grad_norm": 1.430921970068713, "learning_rate": 2.1327246890784693e-07, "loss": 0.5801, "step": 29675 }, { "epoch": 0.9095255608679662, "grad_norm": 1.07724327059821, "learning_rate": 2.131290836160338e-07, "loss": 0.5464, "step": 29676 }, { "epoch": 0.9095562093907074, "grad_norm": 1.2309889386617494, "learning_rate": 2.129857454904838e-07, "loss": 0.6269, "step": 29677 }, { "epoch": 0.9095868579134486, "grad_norm": 1.330400028431428, "learning_rate": 2.1284245453261021e-07, "loss": 0.6772, "step": 29678 }, { "epoch": 0.9096175064361898, "grad_norm": 1.5712328191333869, "learning_rate": 2.1269921074382528e-07, "loss": 0.6573, "step": 29679 }, { "epoch": 0.909648154958931, "grad_norm": 1.4278475663713397, "learning_rate": 2.1255601412553895e-07, "loss": 0.6393, "step": 29680 }, { "epoch": 0.9096788034816722, "grad_norm": 1.296428190573758, "learning_rate": 2.1241286467916345e-07, "loss": 0.5872, "step": 29681 }, { "epoch": 0.9097094520044133, "grad_norm": 1.3814827012105608, "learning_rate": 2.1226976240610875e-07, "loss": 0.5294, "step": 29682 }, { "epoch": 0.9097401005271546, "grad_norm": 0.43164431364441425, "learning_rate": 2.1212670730778594e-07, "loss": 0.3897, "step": 29683 }, { "epoch": 0.9097707490498957, "grad_norm": 1.3495063709483364, "learning_rate": 2.1198369938560338e-07, "loss": 0.666, "step": 29684 }, { "epoch": 0.909801397572637, "grad_norm": 1.3419965250905839, "learning_rate": 2.1184073864096987e-07, "loss": 0.6571, "step": 29685 }, { "epoch": 0.9098320460953782, "grad_norm": 1.3755453657689651, "learning_rate": 2.1169782507529545e-07, "loss": 0.6626, "step": 29686 }, { "epoch": 0.9098626946181194, "grad_norm": 1.3711754186282505, "learning_rate": 2.1155495868998787e-07, "loss": 0.5813, "step": 29687 }, { "epoch": 0.9098933431408606, "grad_norm": 1.5219115337354614, "learning_rate": 2.114121394864538e-07, "loss": 0.5552, "step": 29688 }, { "epoch": 0.9099239916636018, "grad_norm": 1.2861547551426653, "learning_rate": 2.1126936746610094e-07, "loss": 0.6173, "step": 29689 }, { "epoch": 0.909954640186343, "grad_norm": 1.3870025074993615, "learning_rate": 2.1112664263033654e-07, "loss": 0.6371, "step": 29690 }, { "epoch": 0.9099852887090842, "grad_norm": 1.4521105297422905, "learning_rate": 2.1098396498056616e-07, "loss": 0.5588, "step": 29691 }, { "epoch": 0.9100159372318254, "grad_norm": 1.3503251589889984, "learning_rate": 2.1084133451819644e-07, "loss": 0.7686, "step": 29692 }, { "epoch": 0.9100465857545667, "grad_norm": 1.301347041155956, "learning_rate": 2.1069875124463235e-07, "loss": 0.5725, "step": 29693 }, { "epoch": 0.9100772342773078, "grad_norm": 1.4276737810371685, "learning_rate": 2.1055621516127945e-07, "loss": 0.5687, "step": 29694 }, { "epoch": 0.9101078828000491, "grad_norm": 1.5593167730114224, "learning_rate": 2.1041372626954103e-07, "loss": 0.6635, "step": 29695 }, { "epoch": 0.9101385313227902, "grad_norm": 1.3515904317681051, "learning_rate": 2.1027128457082102e-07, "loss": 0.5768, "step": 29696 }, { "epoch": 0.9101691798455315, "grad_norm": 1.541650691021134, "learning_rate": 2.1012889006652492e-07, "loss": 0.5859, "step": 29697 }, { "epoch": 0.9101998283682726, "grad_norm": 1.548145082347781, "learning_rate": 2.0998654275805385e-07, "loss": 0.6091, "step": 29698 }, { "epoch": 0.9102304768910139, "grad_norm": 1.178010145871163, "learning_rate": 2.0984424264681057e-07, "loss": 0.5498, "step": 29699 }, { "epoch": 0.910261125413755, "grad_norm": 1.2796036802014465, "learning_rate": 2.0970198973419786e-07, "loss": 0.5738, "step": 29700 }, { "epoch": 0.9102917739364963, "grad_norm": 1.1967945533952773, "learning_rate": 2.095597840216168e-07, "loss": 0.5774, "step": 29701 }, { "epoch": 0.9103224224592374, "grad_norm": 1.4954063913257691, "learning_rate": 2.0941762551046906e-07, "loss": 0.5693, "step": 29702 }, { "epoch": 0.9103530709819787, "grad_norm": 1.3490062573956936, "learning_rate": 2.092755142021552e-07, "loss": 0.7003, "step": 29703 }, { "epoch": 0.9103837195047199, "grad_norm": 1.3281161927618965, "learning_rate": 2.0913345009807518e-07, "loss": 0.5935, "step": 29704 }, { "epoch": 0.9104143680274611, "grad_norm": 1.3310520159736843, "learning_rate": 2.089914331996301e-07, "loss": 0.6677, "step": 29705 }, { "epoch": 0.9104450165502023, "grad_norm": 1.3006086568015431, "learning_rate": 2.088494635082178e-07, "loss": 0.5106, "step": 29706 }, { "epoch": 0.9104756650729435, "grad_norm": 1.3541132091930765, "learning_rate": 2.087075410252365e-07, "loss": 0.5934, "step": 29707 }, { "epoch": 0.9105063135956847, "grad_norm": 0.44543541682262233, "learning_rate": 2.0856566575208682e-07, "loss": 0.3852, "step": 29708 }, { "epoch": 0.9105369621184259, "grad_norm": 1.38805918241424, "learning_rate": 2.084238376901654e-07, "loss": 0.572, "step": 29709 }, { "epoch": 0.9105676106411671, "grad_norm": 1.3773481424122613, "learning_rate": 2.0828205684087e-07, "loss": 0.563, "step": 29710 }, { "epoch": 0.9105982591639084, "grad_norm": 1.2612381698932114, "learning_rate": 2.081403232055973e-07, "loss": 0.5883, "step": 29711 }, { "epoch": 0.9106289076866495, "grad_norm": 1.2599328378943653, "learning_rate": 2.0799863678574396e-07, "loss": 0.5825, "step": 29712 }, { "epoch": 0.9106595562093907, "grad_norm": 1.3217909903892693, "learning_rate": 2.078569975827066e-07, "loss": 0.5848, "step": 29713 }, { "epoch": 0.9106902047321319, "grad_norm": 1.3451468477009996, "learning_rate": 2.0771540559787973e-07, "loss": 0.593, "step": 29714 }, { "epoch": 0.9107208532548731, "grad_norm": 1.430962311479657, "learning_rate": 2.0757386083265885e-07, "loss": 0.6144, "step": 29715 }, { "epoch": 0.9107515017776143, "grad_norm": 0.45570892376227, "learning_rate": 2.0743236328844007e-07, "loss": 0.379, "step": 29716 }, { "epoch": 0.9107821503003555, "grad_norm": 1.3091930022107745, "learning_rate": 2.0729091296661618e-07, "loss": 0.5132, "step": 29717 }, { "epoch": 0.9108127988230967, "grad_norm": 1.304719785468485, "learning_rate": 2.0714950986857995e-07, "loss": 0.6082, "step": 29718 }, { "epoch": 0.9108434473458379, "grad_norm": 0.43312953980514757, "learning_rate": 2.0700815399572749e-07, "loss": 0.4038, "step": 29719 }, { "epoch": 0.9108740958685791, "grad_norm": 1.4793470363424996, "learning_rate": 2.0686684534944878e-07, "loss": 0.6607, "step": 29720 }, { "epoch": 0.9109047443913203, "grad_norm": 1.2730333970014045, "learning_rate": 2.0672558393113884e-07, "loss": 0.5685, "step": 29721 }, { "epoch": 0.9109353929140616, "grad_norm": 1.3679984221327843, "learning_rate": 2.0658436974218653e-07, "loss": 0.5989, "step": 29722 }, { "epoch": 0.9109660414368027, "grad_norm": 1.410601314192462, "learning_rate": 2.0644320278398578e-07, "loss": 0.5764, "step": 29723 }, { "epoch": 0.910996689959544, "grad_norm": 1.1816153743443518, "learning_rate": 2.0630208305792655e-07, "loss": 0.5359, "step": 29724 }, { "epoch": 0.9110273384822851, "grad_norm": 1.2677907200170935, "learning_rate": 2.061610105653994e-07, "loss": 0.5972, "step": 29725 }, { "epoch": 0.9110579870050264, "grad_norm": 1.458151627659379, "learning_rate": 2.0601998530779376e-07, "loss": 0.617, "step": 29726 }, { "epoch": 0.9110886355277675, "grad_norm": 1.2448797989180356, "learning_rate": 2.0587900728650078e-07, "loss": 0.4709, "step": 29727 }, { "epoch": 0.9111192840505088, "grad_norm": 0.4595421198915945, "learning_rate": 2.0573807650290823e-07, "loss": 0.3907, "step": 29728 }, { "epoch": 0.9111499325732499, "grad_norm": 1.2540106331784744, "learning_rate": 2.0559719295840552e-07, "loss": 0.677, "step": 29729 }, { "epoch": 0.9111805810959912, "grad_norm": 1.3539721072264743, "learning_rate": 2.0545635665437936e-07, "loss": 0.6262, "step": 29730 }, { "epoch": 0.9112112296187324, "grad_norm": 1.4758381910179135, "learning_rate": 2.053155675922186e-07, "loss": 0.5957, "step": 29731 }, { "epoch": 0.9112418781414736, "grad_norm": 1.2470686022535342, "learning_rate": 2.0517482577331105e-07, "loss": 0.6591, "step": 29732 }, { "epoch": 0.9112725266642148, "grad_norm": 1.2503308341535302, "learning_rate": 2.0503413119904224e-07, "loss": 0.5886, "step": 29733 }, { "epoch": 0.911303175186956, "grad_norm": 1.3045801039416485, "learning_rate": 2.0489348387079888e-07, "loss": 0.5504, "step": 29734 }, { "epoch": 0.9113338237096972, "grad_norm": 0.44735339897213994, "learning_rate": 2.047528837899676e-07, "loss": 0.4047, "step": 29735 }, { "epoch": 0.9113644722324384, "grad_norm": 1.5240249858653039, "learning_rate": 2.046123309579323e-07, "loss": 0.6017, "step": 29736 }, { "epoch": 0.9113951207551796, "grad_norm": 0.42459786760056123, "learning_rate": 2.0447182537607856e-07, "loss": 0.3679, "step": 29737 }, { "epoch": 0.9114257692779208, "grad_norm": 1.3571109237793615, "learning_rate": 2.0433136704579194e-07, "loss": 0.5419, "step": 29738 }, { "epoch": 0.911456417800662, "grad_norm": 1.3494124751165903, "learning_rate": 2.0419095596845462e-07, "loss": 0.5948, "step": 29739 }, { "epoch": 0.9114870663234033, "grad_norm": 1.406992996094483, "learning_rate": 2.0405059214545108e-07, "loss": 0.5748, "step": 29740 }, { "epoch": 0.9115177148461444, "grad_norm": 1.352096429565657, "learning_rate": 2.0391027557816412e-07, "loss": 0.6117, "step": 29741 }, { "epoch": 0.9115483633688857, "grad_norm": 1.2477341724918847, "learning_rate": 2.0377000626797595e-07, "loss": 0.5828, "step": 29742 }, { "epoch": 0.9115790118916268, "grad_norm": 1.4231835934217971, "learning_rate": 2.036297842162699e-07, "loss": 0.625, "step": 29743 }, { "epoch": 0.911609660414368, "grad_norm": 1.3831182067640495, "learning_rate": 2.0348960942442596e-07, "loss": 0.567, "step": 29744 }, { "epoch": 0.9116403089371092, "grad_norm": 1.3965809653998877, "learning_rate": 2.033494818938264e-07, "loss": 0.5817, "step": 29745 }, { "epoch": 0.9116709574598504, "grad_norm": 1.4305243690825118, "learning_rate": 2.0320940162585234e-07, "loss": 0.5437, "step": 29746 }, { "epoch": 0.9117016059825916, "grad_norm": 1.450397723310575, "learning_rate": 2.030693686218821e-07, "loss": 0.5666, "step": 29747 }, { "epoch": 0.9117322545053328, "grad_norm": 1.448022108048765, "learning_rate": 2.0292938288329733e-07, "loss": 0.6332, "step": 29748 }, { "epoch": 0.9117629030280741, "grad_norm": 1.3810287425298409, "learning_rate": 2.0278944441147751e-07, "loss": 0.5634, "step": 29749 }, { "epoch": 0.9117935515508152, "grad_norm": 1.2872971926096584, "learning_rate": 2.0264955320779934e-07, "loss": 0.5632, "step": 29750 }, { "epoch": 0.9118242000735565, "grad_norm": 1.3952558405469078, "learning_rate": 2.0250970927364387e-07, "loss": 0.5561, "step": 29751 }, { "epoch": 0.9118548485962976, "grad_norm": 1.387431515177976, "learning_rate": 2.0236991261038674e-07, "loss": 0.53, "step": 29752 }, { "epoch": 0.9118854971190389, "grad_norm": 1.2493986873554177, "learning_rate": 2.022301632194068e-07, "loss": 0.5238, "step": 29753 }, { "epoch": 0.91191614564178, "grad_norm": 0.4657299945449871, "learning_rate": 2.0209046110208074e-07, "loss": 0.4038, "step": 29754 }, { "epoch": 0.9119467941645213, "grad_norm": 1.34861100695412, "learning_rate": 2.019508062597847e-07, "loss": 0.5302, "step": 29755 }, { "epoch": 0.9119774426872624, "grad_norm": 1.4411337747098492, "learning_rate": 2.0181119869389477e-07, "loss": 0.6171, "step": 29756 }, { "epoch": 0.9120080912100037, "grad_norm": 1.4121604254767792, "learning_rate": 2.0167163840578762e-07, "loss": 0.5624, "step": 29757 }, { "epoch": 0.9120387397327449, "grad_norm": 1.3520488574009608, "learning_rate": 2.0153212539683664e-07, "loss": 0.6128, "step": 29758 }, { "epoch": 0.9120693882554861, "grad_norm": 1.2947336891727625, "learning_rate": 2.0139265966841738e-07, "loss": 0.647, "step": 29759 }, { "epoch": 0.9121000367782273, "grad_norm": 1.2589594545364169, "learning_rate": 2.0125324122190483e-07, "loss": 0.5557, "step": 29760 }, { "epoch": 0.9121306853009685, "grad_norm": 1.3672490737819243, "learning_rate": 2.0111387005867123e-07, "loss": 0.5509, "step": 29761 }, { "epoch": 0.9121613338237097, "grad_norm": 1.4894158017861696, "learning_rate": 2.0097454618009104e-07, "loss": 0.5948, "step": 29762 }, { "epoch": 0.9121919823464509, "grad_norm": 1.3043879377980852, "learning_rate": 2.008352695875354e-07, "loss": 0.6721, "step": 29763 }, { "epoch": 0.9122226308691921, "grad_norm": 1.244952479769301, "learning_rate": 2.0069604028237932e-07, "loss": 0.5786, "step": 29764 }, { "epoch": 0.9122532793919333, "grad_norm": 1.3079242666248903, "learning_rate": 2.005568582659928e-07, "loss": 0.5759, "step": 29765 }, { "epoch": 0.9122839279146745, "grad_norm": 1.5798136945375338, "learning_rate": 2.0041772353974699e-07, "loss": 0.5573, "step": 29766 }, { "epoch": 0.9123145764374158, "grad_norm": 0.4820144064214974, "learning_rate": 2.0027863610501297e-07, "loss": 0.3925, "step": 29767 }, { "epoch": 0.9123452249601569, "grad_norm": 1.407037739326669, "learning_rate": 2.0013959596316247e-07, "loss": 0.5467, "step": 29768 }, { "epoch": 0.9123758734828982, "grad_norm": 1.4717248035413206, "learning_rate": 2.0000060311556434e-07, "loss": 0.5511, "step": 29769 }, { "epoch": 0.9124065220056393, "grad_norm": 0.4453271720021841, "learning_rate": 1.998616575635881e-07, "loss": 0.3887, "step": 29770 }, { "epoch": 0.9124371705283806, "grad_norm": 1.4212208667867396, "learning_rate": 1.9972275930860374e-07, "loss": 0.5636, "step": 29771 }, { "epoch": 0.9124678190511217, "grad_norm": 1.1430492155496517, "learning_rate": 1.9958390835197849e-07, "loss": 0.5826, "step": 29772 }, { "epoch": 0.912498467573863, "grad_norm": 1.3070555761855498, "learning_rate": 1.994451046950824e-07, "loss": 0.563, "step": 29773 }, { "epoch": 0.9125291160966041, "grad_norm": 1.241949553340168, "learning_rate": 1.9930634833928097e-07, "loss": 0.5456, "step": 29774 }, { "epoch": 0.9125597646193453, "grad_norm": 1.4945618345093947, "learning_rate": 1.9916763928594206e-07, "loss": 0.7196, "step": 29775 }, { "epoch": 0.9125904131420866, "grad_norm": 1.27650148738329, "learning_rate": 1.99028977536434e-07, "loss": 0.6686, "step": 29776 }, { "epoch": 0.9126210616648277, "grad_norm": 1.2754037132009737, "learning_rate": 1.9889036309212073e-07, "loss": 0.5178, "step": 29777 }, { "epoch": 0.912651710187569, "grad_norm": 1.2575254494665498, "learning_rate": 1.9875179595436944e-07, "loss": 0.5674, "step": 29778 }, { "epoch": 0.9126823587103101, "grad_norm": 1.391243628833307, "learning_rate": 1.9861327612454519e-07, "loss": 0.5747, "step": 29779 }, { "epoch": 0.9127130072330514, "grad_norm": 0.4469960818674447, "learning_rate": 1.9847480360401296e-07, "loss": 0.3779, "step": 29780 }, { "epoch": 0.9127436557557925, "grad_norm": 0.4723531607302847, "learning_rate": 1.9833637839413722e-07, "loss": 0.4034, "step": 29781 }, { "epoch": 0.9127743042785338, "grad_norm": 1.477806803378493, "learning_rate": 1.981980004962808e-07, "loss": 0.6327, "step": 29782 }, { "epoch": 0.9128049528012749, "grad_norm": 1.517584592723049, "learning_rate": 1.9805966991180869e-07, "loss": 0.5805, "step": 29783 }, { "epoch": 0.9128356013240162, "grad_norm": 1.354035999802667, "learning_rate": 1.979213866420837e-07, "loss": 0.5036, "step": 29784 }, { "epoch": 0.9128662498467573, "grad_norm": 1.1797933397031901, "learning_rate": 1.9778315068846754e-07, "loss": 0.6106, "step": 29785 }, { "epoch": 0.9128968983694986, "grad_norm": 1.4261453783868123, "learning_rate": 1.9764496205232243e-07, "loss": 0.713, "step": 29786 }, { "epoch": 0.9129275468922398, "grad_norm": 0.46004118939458466, "learning_rate": 1.975068207350106e-07, "loss": 0.3925, "step": 29787 }, { "epoch": 0.912958195414981, "grad_norm": 1.3409152621445375, "learning_rate": 1.9736872673789266e-07, "loss": 0.5007, "step": 29788 }, { "epoch": 0.9129888439377222, "grad_norm": 1.2618202203150297, "learning_rate": 1.9723068006232916e-07, "loss": 0.7135, "step": 29789 }, { "epoch": 0.9130194924604634, "grad_norm": 1.338211521509932, "learning_rate": 1.9709268070968069e-07, "loss": 0.5938, "step": 29790 }, { "epoch": 0.9130501409832046, "grad_norm": 0.4389310708423597, "learning_rate": 1.9695472868130783e-07, "loss": 0.376, "step": 29791 }, { "epoch": 0.9130807895059458, "grad_norm": 1.4284481852680753, "learning_rate": 1.9681682397856838e-07, "loss": 0.6381, "step": 29792 }, { "epoch": 0.913111438028687, "grad_norm": 1.4459299498841587, "learning_rate": 1.9667896660282127e-07, "loss": 0.6079, "step": 29793 }, { "epoch": 0.9131420865514283, "grad_norm": 1.5668451286476004, "learning_rate": 1.9654115655542594e-07, "loss": 0.5725, "step": 29794 }, { "epoch": 0.9131727350741694, "grad_norm": 1.2867824933638252, "learning_rate": 1.9640339383773966e-07, "loss": 0.6228, "step": 29795 }, { "epoch": 0.9132033835969107, "grad_norm": 1.3220611819271928, "learning_rate": 1.962656784511191e-07, "loss": 0.565, "step": 29796 }, { "epoch": 0.9132340321196518, "grad_norm": 1.3698155136729733, "learning_rate": 1.9612801039692208e-07, "loss": 0.6246, "step": 29797 }, { "epoch": 0.9132646806423931, "grad_norm": 1.3631828973258944, "learning_rate": 1.959903896765053e-07, "loss": 0.6267, "step": 29798 }, { "epoch": 0.9132953291651342, "grad_norm": 1.3772922468343511, "learning_rate": 1.9585281629122377e-07, "loss": 0.7003, "step": 29799 }, { "epoch": 0.9133259776878755, "grad_norm": 1.397290615652333, "learning_rate": 1.957152902424342e-07, "loss": 0.6872, "step": 29800 }, { "epoch": 0.9133566262106166, "grad_norm": 1.4143185948995383, "learning_rate": 1.9557781153149047e-07, "loss": 0.6475, "step": 29801 }, { "epoch": 0.9133872747333579, "grad_norm": 1.187494814862237, "learning_rate": 1.9544038015974876e-07, "loss": 0.57, "step": 29802 }, { "epoch": 0.913417923256099, "grad_norm": 1.3152197383799877, "learning_rate": 1.953029961285624e-07, "loss": 0.529, "step": 29803 }, { "epoch": 0.9134485717788403, "grad_norm": 1.5146792247631822, "learning_rate": 1.9516565943928311e-07, "loss": 0.5972, "step": 29804 }, { "epoch": 0.9134792203015815, "grad_norm": 0.47073607233984266, "learning_rate": 1.9502837009326758e-07, "loss": 0.3942, "step": 29805 }, { "epoch": 0.9135098688243226, "grad_norm": 0.4436119116282167, "learning_rate": 1.9489112809186695e-07, "loss": 0.3877, "step": 29806 }, { "epoch": 0.9135405173470639, "grad_norm": 1.5793669925046612, "learning_rate": 1.947539334364329e-07, "loss": 0.5635, "step": 29807 }, { "epoch": 0.913571165869805, "grad_norm": 0.42794139287522776, "learning_rate": 1.946167861283177e-07, "loss": 0.3928, "step": 29808 }, { "epoch": 0.9136018143925463, "grad_norm": 0.46574547733809224, "learning_rate": 1.9447968616887302e-07, "loss": 0.4054, "step": 29809 }, { "epoch": 0.9136324629152874, "grad_norm": 1.4341077974028893, "learning_rate": 1.9434263355945004e-07, "loss": 0.6031, "step": 29810 }, { "epoch": 0.9136631114380287, "grad_norm": 1.5832809677952342, "learning_rate": 1.9420562830139766e-07, "loss": 0.6019, "step": 29811 }, { "epoch": 0.9136937599607698, "grad_norm": 1.5734565059242767, "learning_rate": 1.9406867039606759e-07, "loss": 0.6645, "step": 29812 }, { "epoch": 0.9137244084835111, "grad_norm": 1.2768671009395052, "learning_rate": 1.939317598448087e-07, "loss": 0.5653, "step": 29813 }, { "epoch": 0.9137550570062523, "grad_norm": 1.367122032863225, "learning_rate": 1.9379489664897e-07, "loss": 0.5003, "step": 29814 }, { "epoch": 0.9137857055289935, "grad_norm": 1.2034253278171547, "learning_rate": 1.9365808080989868e-07, "loss": 0.5726, "step": 29815 }, { "epoch": 0.9138163540517347, "grad_norm": 1.3354508704295889, "learning_rate": 1.9352131232894477e-07, "loss": 0.5871, "step": 29816 }, { "epoch": 0.9138470025744759, "grad_norm": 0.46965777052048613, "learning_rate": 1.9338459120745555e-07, "loss": 0.4119, "step": 29817 }, { "epoch": 0.9138776510972171, "grad_norm": 1.2219542568878938, "learning_rate": 1.9324791744677772e-07, "loss": 0.4911, "step": 29818 }, { "epoch": 0.9139082996199583, "grad_norm": 1.3725100985188483, "learning_rate": 1.9311129104825744e-07, "loss": 0.6555, "step": 29819 }, { "epoch": 0.9139389481426995, "grad_norm": 1.174025396798572, "learning_rate": 1.9297471201324136e-07, "loss": 0.5383, "step": 29820 }, { "epoch": 0.9139695966654408, "grad_norm": 1.2428583598528506, "learning_rate": 1.9283818034307623e-07, "loss": 0.596, "step": 29821 }, { "epoch": 0.9140002451881819, "grad_norm": 0.43384043178235077, "learning_rate": 1.9270169603910593e-07, "loss": 0.3943, "step": 29822 }, { "epoch": 0.9140308937109232, "grad_norm": 1.3052314854492724, "learning_rate": 1.9256525910267555e-07, "loss": 0.52, "step": 29823 }, { "epoch": 0.9140615422336643, "grad_norm": 1.4160420649712182, "learning_rate": 1.9242886953513062e-07, "loss": 0.5804, "step": 29824 }, { "epoch": 0.9140921907564056, "grad_norm": 0.45600598844451934, "learning_rate": 1.9229252733781402e-07, "loss": 0.3724, "step": 29825 }, { "epoch": 0.9141228392791467, "grad_norm": 1.9740445807554075, "learning_rate": 1.9215623251206849e-07, "loss": 0.6267, "step": 29826 }, { "epoch": 0.914153487801888, "grad_norm": 0.4318307356127686, "learning_rate": 1.92019985059238e-07, "loss": 0.3915, "step": 29827 }, { "epoch": 0.9141841363246291, "grad_norm": 1.251759052185537, "learning_rate": 1.9188378498066485e-07, "loss": 0.5519, "step": 29828 }, { "epoch": 0.9142147848473704, "grad_norm": 1.158601473596962, "learning_rate": 1.9174763227769122e-07, "loss": 0.5744, "step": 29829 }, { "epoch": 0.9142454333701115, "grad_norm": 1.3149444516510578, "learning_rate": 1.9161152695165775e-07, "loss": 0.5729, "step": 29830 }, { "epoch": 0.9142760818928528, "grad_norm": 1.3129435435748305, "learning_rate": 1.9147546900390667e-07, "loss": 0.6673, "step": 29831 }, { "epoch": 0.914306730415594, "grad_norm": 1.4100582914034068, "learning_rate": 1.9133945843577805e-07, "loss": 0.6321, "step": 29832 }, { "epoch": 0.9143373789383352, "grad_norm": 1.4582704694703945, "learning_rate": 1.9120349524861247e-07, "loss": 0.5839, "step": 29833 }, { "epoch": 0.9143680274610764, "grad_norm": 1.3815524386647275, "learning_rate": 1.9106757944374831e-07, "loss": 0.584, "step": 29834 }, { "epoch": 0.9143986759838176, "grad_norm": 1.3520609109800092, "learning_rate": 1.9093171102252672e-07, "loss": 0.5922, "step": 29835 }, { "epoch": 0.9144293245065588, "grad_norm": 1.3022342302478844, "learning_rate": 1.907958899862844e-07, "loss": 0.5397, "step": 29836 }, { "epoch": 0.9144599730292999, "grad_norm": 1.331339691580149, "learning_rate": 1.9066011633636196e-07, "loss": 0.5866, "step": 29837 }, { "epoch": 0.9144906215520412, "grad_norm": 1.4492130389067126, "learning_rate": 1.9052439007409495e-07, "loss": 0.5727, "step": 29838 }, { "epoch": 0.9145212700747823, "grad_norm": 1.5424611773039232, "learning_rate": 1.9038871120082125e-07, "loss": 0.5328, "step": 29839 }, { "epoch": 0.9145519185975236, "grad_norm": 1.4695176601724065, "learning_rate": 1.9025307971787921e-07, "loss": 0.5897, "step": 29840 }, { "epoch": 0.9145825671202648, "grad_norm": 1.3263480034325168, "learning_rate": 1.9011749562660388e-07, "loss": 0.6265, "step": 29841 }, { "epoch": 0.914613215643006, "grad_norm": 0.463313167033215, "learning_rate": 1.8998195892833137e-07, "loss": 0.3904, "step": 29842 }, { "epoch": 0.9146438641657472, "grad_norm": 0.4253356026978202, "learning_rate": 1.898464696243979e-07, "loss": 0.3919, "step": 29843 }, { "epoch": 0.9146745126884884, "grad_norm": 1.3296630111866845, "learning_rate": 1.8971102771613736e-07, "loss": 0.5221, "step": 29844 }, { "epoch": 0.9147051612112296, "grad_norm": 1.3403021829436195, "learning_rate": 1.8957563320488427e-07, "loss": 0.5418, "step": 29845 }, { "epoch": 0.9147358097339708, "grad_norm": 1.488532113454389, "learning_rate": 1.8944028609197419e-07, "loss": 0.556, "step": 29846 }, { "epoch": 0.914766458256712, "grad_norm": 0.4383010038336708, "learning_rate": 1.893049863787394e-07, "loss": 0.4022, "step": 29847 }, { "epoch": 0.9147971067794533, "grad_norm": 0.435904915231814, "learning_rate": 1.8916973406651385e-07, "loss": 0.3675, "step": 29848 }, { "epoch": 0.9148277553021944, "grad_norm": 1.377133260848624, "learning_rate": 1.8903452915662924e-07, "loss": 0.5698, "step": 29849 }, { "epoch": 0.9148584038249357, "grad_norm": 1.3059412005296764, "learning_rate": 1.888993716504184e-07, "loss": 0.6068, "step": 29850 }, { "epoch": 0.9148890523476768, "grad_norm": 1.1941895976775845, "learning_rate": 1.8876426154921357e-07, "loss": 0.5235, "step": 29851 }, { "epoch": 0.9149197008704181, "grad_norm": 0.4531760203035983, "learning_rate": 1.8862919885434537e-07, "loss": 0.3965, "step": 29852 }, { "epoch": 0.9149503493931592, "grad_norm": 1.4332160164003322, "learning_rate": 1.8849418356714388e-07, "loss": 0.6684, "step": 29853 }, { "epoch": 0.9149809979159005, "grad_norm": 0.4362222515191512, "learning_rate": 1.8835921568894133e-07, "loss": 0.3816, "step": 29854 }, { "epoch": 0.9150116464386416, "grad_norm": 1.2766557919368655, "learning_rate": 1.882242952210661e-07, "loss": 0.5422, "step": 29855 }, { "epoch": 0.9150422949613829, "grad_norm": 0.42214269618865374, "learning_rate": 1.880894221648477e-07, "loss": 0.3783, "step": 29856 }, { "epoch": 0.915072943484124, "grad_norm": 1.3419379338383006, "learning_rate": 1.8795459652161618e-07, "loss": 0.6789, "step": 29857 }, { "epoch": 0.9151035920068653, "grad_norm": 1.5041124054060224, "learning_rate": 1.878198182926988e-07, "loss": 0.55, "step": 29858 }, { "epoch": 0.9151342405296065, "grad_norm": 1.2676678384556437, "learning_rate": 1.8768508747942393e-07, "loss": 0.6636, "step": 29859 }, { "epoch": 0.9151648890523477, "grad_norm": 1.4442865791040633, "learning_rate": 1.8755040408311941e-07, "loss": 0.5352, "step": 29860 }, { "epoch": 0.9151955375750889, "grad_norm": 0.42729161510794167, "learning_rate": 1.874157681051114e-07, "loss": 0.3804, "step": 29861 }, { "epoch": 0.9152261860978301, "grad_norm": 1.4980570377283968, "learning_rate": 1.872811795467283e-07, "loss": 0.6229, "step": 29862 }, { "epoch": 0.9152568346205713, "grad_norm": 1.502216593067223, "learning_rate": 1.8714663840929403e-07, "loss": 0.646, "step": 29863 }, { "epoch": 0.9152874831433125, "grad_norm": 1.3466469398110366, "learning_rate": 1.8701214469413588e-07, "loss": 0.6399, "step": 29864 }, { "epoch": 0.9153181316660537, "grad_norm": 1.343056155358959, "learning_rate": 1.8687769840257886e-07, "loss": 0.6451, "step": 29865 }, { "epoch": 0.915348780188795, "grad_norm": 1.4059622146317183, "learning_rate": 1.8674329953594693e-07, "loss": 0.7081, "step": 29866 }, { "epoch": 0.9153794287115361, "grad_norm": 1.3343268596434936, "learning_rate": 1.8660894809556464e-07, "loss": 0.5005, "step": 29867 }, { "epoch": 0.9154100772342773, "grad_norm": 1.3339084448728011, "learning_rate": 1.864746440827564e-07, "loss": 0.6521, "step": 29868 }, { "epoch": 0.9154407257570185, "grad_norm": 1.1903582238150638, "learning_rate": 1.8634038749884453e-07, "loss": 0.6018, "step": 29869 }, { "epoch": 0.9154713742797597, "grad_norm": 1.35692565931219, "learning_rate": 1.8620617834515299e-07, "loss": 0.5935, "step": 29870 }, { "epoch": 0.9155020228025009, "grad_norm": 1.3022737698106468, "learning_rate": 1.8607201662300346e-07, "loss": 0.5713, "step": 29871 }, { "epoch": 0.9155326713252421, "grad_norm": 0.4202945993898298, "learning_rate": 1.8593790233371766e-07, "loss": 0.3636, "step": 29872 }, { "epoch": 0.9155633198479833, "grad_norm": 1.3555118124709356, "learning_rate": 1.8580383547861792e-07, "loss": 0.5565, "step": 29873 }, { "epoch": 0.9155939683707245, "grad_norm": 1.3430196233936815, "learning_rate": 1.856698160590248e-07, "loss": 0.5992, "step": 29874 }, { "epoch": 0.9156246168934657, "grad_norm": 1.3532806591286681, "learning_rate": 1.8553584407625834e-07, "loss": 0.5884, "step": 29875 }, { "epoch": 0.9156552654162069, "grad_norm": 1.353444863804872, "learning_rate": 1.8540191953163978e-07, "loss": 0.646, "step": 29876 }, { "epoch": 0.9156859139389482, "grad_norm": 1.552931835939051, "learning_rate": 1.852680424264869e-07, "loss": 0.6709, "step": 29877 }, { "epoch": 0.9157165624616893, "grad_norm": 1.3393509493741336, "learning_rate": 1.8513421276212086e-07, "loss": 0.5767, "step": 29878 }, { "epoch": 0.9157472109844306, "grad_norm": 0.45728802510412936, "learning_rate": 1.8500043053985894e-07, "loss": 0.3753, "step": 29879 }, { "epoch": 0.9157778595071717, "grad_norm": 1.4064421240472325, "learning_rate": 1.8486669576101957e-07, "loss": 0.6571, "step": 29880 }, { "epoch": 0.915808508029913, "grad_norm": 1.3187192556766605, "learning_rate": 1.847330084269211e-07, "loss": 0.6027, "step": 29881 }, { "epoch": 0.9158391565526541, "grad_norm": 1.5515176229739407, "learning_rate": 1.8459936853888028e-07, "loss": 0.6651, "step": 29882 }, { "epoch": 0.9158698050753954, "grad_norm": 1.3957151737276554, "learning_rate": 1.8446577609821325e-07, "loss": 0.5835, "step": 29883 }, { "epoch": 0.9159004535981365, "grad_norm": 1.2575769087244002, "learning_rate": 1.843322311062379e-07, "loss": 0.566, "step": 29884 }, { "epoch": 0.9159311021208778, "grad_norm": 1.5249785932334883, "learning_rate": 1.8419873356426866e-07, "loss": 0.6415, "step": 29885 }, { "epoch": 0.915961750643619, "grad_norm": 1.1913871796459712, "learning_rate": 1.8406528347362172e-07, "loss": 0.4881, "step": 29886 }, { "epoch": 0.9159923991663602, "grad_norm": 0.4814990471049432, "learning_rate": 1.839318808356122e-07, "loss": 0.378, "step": 29887 }, { "epoch": 0.9160230476891014, "grad_norm": 1.3720672322674914, "learning_rate": 1.8379852565155343e-07, "loss": 0.635, "step": 29888 }, { "epoch": 0.9160536962118426, "grad_norm": 1.3769603727662774, "learning_rate": 1.836652179227605e-07, "loss": 0.6155, "step": 29889 }, { "epoch": 0.9160843447345838, "grad_norm": 1.517207595719673, "learning_rate": 1.8353195765054566e-07, "loss": 0.4895, "step": 29890 }, { "epoch": 0.916114993257325, "grad_norm": 1.2606088144903373, "learning_rate": 1.8339874483622344e-07, "loss": 0.6458, "step": 29891 }, { "epoch": 0.9161456417800662, "grad_norm": 1.3851166231560414, "learning_rate": 1.8326557948110611e-07, "loss": 0.5235, "step": 29892 }, { "epoch": 0.9161762903028075, "grad_norm": 1.3549319377709974, "learning_rate": 1.831324615865049e-07, "loss": 0.5824, "step": 29893 }, { "epoch": 0.9162069388255486, "grad_norm": 1.3277222705313219, "learning_rate": 1.82999391153732e-07, "loss": 0.6747, "step": 29894 }, { "epoch": 0.9162375873482899, "grad_norm": 1.2161423761302688, "learning_rate": 1.828663681840992e-07, "loss": 0.5078, "step": 29895 }, { "epoch": 0.916268235871031, "grad_norm": 1.403364806564611, "learning_rate": 1.8273339267891598e-07, "loss": 0.5833, "step": 29896 }, { "epoch": 0.9162988843937723, "grad_norm": 1.3755847362787628, "learning_rate": 1.8260046463949298e-07, "loss": 0.61, "step": 29897 }, { "epoch": 0.9163295329165134, "grad_norm": 0.4618970977374021, "learning_rate": 1.8246758406714082e-07, "loss": 0.4021, "step": 29898 }, { "epoch": 0.9163601814392546, "grad_norm": 0.4468507354506604, "learning_rate": 1.8233475096316788e-07, "loss": 0.404, "step": 29899 }, { "epoch": 0.9163908299619958, "grad_norm": 1.3875403540879414, "learning_rate": 1.822019653288837e-07, "loss": 0.6823, "step": 29900 }, { "epoch": 0.916421478484737, "grad_norm": 1.285224581385801, "learning_rate": 1.8206922716559493e-07, "loss": 0.5476, "step": 29901 }, { "epoch": 0.9164521270074782, "grad_norm": 1.345058384382677, "learning_rate": 1.819365364746123e-07, "loss": 0.5703, "step": 29902 }, { "epoch": 0.9164827755302194, "grad_norm": 1.2796887180071808, "learning_rate": 1.8180389325724135e-07, "loss": 0.5688, "step": 29903 }, { "epoch": 0.9165134240529607, "grad_norm": 0.4512081086058013, "learning_rate": 1.8167129751478886e-07, "loss": 0.4032, "step": 29904 }, { "epoch": 0.9165440725757018, "grad_norm": 1.4398653216227328, "learning_rate": 1.8153874924856207e-07, "loss": 0.6331, "step": 29905 }, { "epoch": 0.9165747210984431, "grad_norm": 1.3672599385801436, "learning_rate": 1.814062484598672e-07, "loss": 0.6315, "step": 29906 }, { "epoch": 0.9166053696211842, "grad_norm": 1.4468333222119394, "learning_rate": 1.812737951500093e-07, "loss": 0.4613, "step": 29907 }, { "epoch": 0.9166360181439255, "grad_norm": 1.389062070401109, "learning_rate": 1.8114138932029347e-07, "loss": 0.6202, "step": 29908 }, { "epoch": 0.9166666666666666, "grad_norm": 0.4562142799288852, "learning_rate": 1.8100903097202415e-07, "loss": 0.3963, "step": 29909 }, { "epoch": 0.9166973151894079, "grad_norm": 1.3264468799805114, "learning_rate": 1.8087672010650704e-07, "loss": 0.6114, "step": 29910 }, { "epoch": 0.916727963712149, "grad_norm": 1.3215156102123187, "learning_rate": 1.8074445672504382e-07, "loss": 0.5474, "step": 29911 }, { "epoch": 0.9167586122348903, "grad_norm": 1.2993806445276936, "learning_rate": 1.8061224082893791e-07, "loss": 0.5824, "step": 29912 }, { "epoch": 0.9167892607576315, "grad_norm": 1.2548805838343797, "learning_rate": 1.8048007241949384e-07, "loss": 0.6128, "step": 29913 }, { "epoch": 0.9168199092803727, "grad_norm": 1.3269911607605402, "learning_rate": 1.8034795149801276e-07, "loss": 0.5401, "step": 29914 }, { "epoch": 0.9168505578031139, "grad_norm": 0.4454529036588029, "learning_rate": 1.802158780657959e-07, "loss": 0.3907, "step": 29915 }, { "epoch": 0.9168812063258551, "grad_norm": 0.44123876961897474, "learning_rate": 1.800838521241449e-07, "loss": 0.3837, "step": 29916 }, { "epoch": 0.9169118548485963, "grad_norm": 1.3700534932495658, "learning_rate": 1.7995187367436106e-07, "loss": 0.5661, "step": 29917 }, { "epoch": 0.9169425033713375, "grad_norm": 1.4557707772940174, "learning_rate": 1.798199427177455e-07, "loss": 0.6136, "step": 29918 }, { "epoch": 0.9169731518940787, "grad_norm": 1.311716355126442, "learning_rate": 1.7968805925559663e-07, "loss": 0.5425, "step": 29919 }, { "epoch": 0.91700380041682, "grad_norm": 1.3543831123721461, "learning_rate": 1.7955622328921451e-07, "loss": 0.6596, "step": 29920 }, { "epoch": 0.9170344489395611, "grad_norm": 1.4872787254287432, "learning_rate": 1.794244348198987e-07, "loss": 0.5017, "step": 29921 }, { "epoch": 0.9170650974623024, "grad_norm": 1.5441150897198088, "learning_rate": 1.7929269384894755e-07, "loss": 0.6691, "step": 29922 }, { "epoch": 0.9170957459850435, "grad_norm": 1.4739798910313147, "learning_rate": 1.7916100037765837e-07, "loss": 0.5946, "step": 29923 }, { "epoch": 0.9171263945077848, "grad_norm": 1.3880017249552101, "learning_rate": 1.7902935440732962e-07, "loss": 0.5626, "step": 29924 }, { "epoch": 0.9171570430305259, "grad_norm": 1.5060726997669103, "learning_rate": 1.7889775593925795e-07, "loss": 0.6653, "step": 29925 }, { "epoch": 0.9171876915532672, "grad_norm": 1.4697245312670342, "learning_rate": 1.7876620497474018e-07, "loss": 0.597, "step": 29926 }, { "epoch": 0.9172183400760083, "grad_norm": 1.4481179165810556, "learning_rate": 1.7863470151507246e-07, "loss": 0.6564, "step": 29927 }, { "epoch": 0.9172489885987496, "grad_norm": 1.3036750354393658, "learning_rate": 1.7850324556155096e-07, "loss": 0.6399, "step": 29928 }, { "epoch": 0.9172796371214907, "grad_norm": 1.232553483586671, "learning_rate": 1.7837183711547078e-07, "loss": 0.5084, "step": 29929 }, { "epoch": 0.9173102856442319, "grad_norm": 1.3266281832971412, "learning_rate": 1.7824047617812644e-07, "loss": 0.5544, "step": 29930 }, { "epoch": 0.9173409341669732, "grad_norm": 1.4497677255346797, "learning_rate": 1.7810916275081136e-07, "loss": 0.651, "step": 29931 }, { "epoch": 0.9173715826897143, "grad_norm": 1.3691772979631207, "learning_rate": 1.779778968348217e-07, "loss": 0.5297, "step": 29932 }, { "epoch": 0.9174022312124556, "grad_norm": 0.4862230208238734, "learning_rate": 1.7784667843144977e-07, "loss": 0.4003, "step": 29933 }, { "epoch": 0.9174328797351967, "grad_norm": 1.3595732639576137, "learning_rate": 1.777155075419873e-07, "loss": 0.6343, "step": 29934 }, { "epoch": 0.917463528257938, "grad_norm": 1.2855179633198441, "learning_rate": 1.7758438416772827e-07, "loss": 0.6451, "step": 29935 }, { "epoch": 0.9174941767806791, "grad_norm": 1.3651413496506608, "learning_rate": 1.7745330830996387e-07, "loss": 0.7177, "step": 29936 }, { "epoch": 0.9175248253034204, "grad_norm": 0.43808449947896283, "learning_rate": 1.773222799699864e-07, "loss": 0.4024, "step": 29937 }, { "epoch": 0.9175554738261615, "grad_norm": 1.3120536674372647, "learning_rate": 1.7719129914908594e-07, "loss": 0.5858, "step": 29938 }, { "epoch": 0.9175861223489028, "grad_norm": 1.3371108375531915, "learning_rate": 1.770603658485537e-07, "loss": 0.5538, "step": 29939 }, { "epoch": 0.917616770871644, "grad_norm": 1.3028976324793808, "learning_rate": 1.7692948006968024e-07, "loss": 0.6229, "step": 29940 }, { "epoch": 0.9176474193943852, "grad_norm": 1.2104267687312453, "learning_rate": 1.767986418137546e-07, "loss": 0.5479, "step": 29941 }, { "epoch": 0.9176780679171264, "grad_norm": 1.4979129899764956, "learning_rate": 1.7666785108206462e-07, "loss": 0.6057, "step": 29942 }, { "epoch": 0.9177087164398676, "grad_norm": 1.3864523410585299, "learning_rate": 1.7653710787590206e-07, "loss": 0.5855, "step": 29943 }, { "epoch": 0.9177393649626088, "grad_norm": 1.4450536418396351, "learning_rate": 1.7640641219655252e-07, "loss": 0.5806, "step": 29944 }, { "epoch": 0.91777001348535, "grad_norm": 1.3296669811462112, "learning_rate": 1.7627576404530554e-07, "loss": 0.5679, "step": 29945 }, { "epoch": 0.9178006620080912, "grad_norm": 0.44890078020627183, "learning_rate": 1.7614516342344678e-07, "loss": 0.3888, "step": 29946 }, { "epoch": 0.9178313105308324, "grad_norm": 1.3384974004081864, "learning_rate": 1.7601461033226407e-07, "loss": 0.5509, "step": 29947 }, { "epoch": 0.9178619590535736, "grad_norm": 1.2398821535460767, "learning_rate": 1.7588410477304475e-07, "loss": 0.5592, "step": 29948 }, { "epoch": 0.9178926075763149, "grad_norm": 1.3509927639473895, "learning_rate": 1.7575364674707275e-07, "loss": 0.6514, "step": 29949 }, { "epoch": 0.917923256099056, "grad_norm": 1.3285442626850823, "learning_rate": 1.7562323625563427e-07, "loss": 0.6115, "step": 29950 }, { "epoch": 0.9179539046217973, "grad_norm": 1.1549551399850144, "learning_rate": 1.7549287330001498e-07, "loss": 0.5317, "step": 29951 }, { "epoch": 0.9179845531445384, "grad_norm": 1.2527501276288693, "learning_rate": 1.753625578814988e-07, "loss": 0.6379, "step": 29952 }, { "epoch": 0.9180152016672797, "grad_norm": 1.430851571037625, "learning_rate": 1.7523229000136866e-07, "loss": 0.7048, "step": 29953 }, { "epoch": 0.9180458501900208, "grad_norm": 1.2666647415823662, "learning_rate": 1.751020696609107e-07, "loss": 0.4956, "step": 29954 }, { "epoch": 0.9180764987127621, "grad_norm": 1.596802316583048, "learning_rate": 1.749718968614056e-07, "loss": 0.6215, "step": 29955 }, { "epoch": 0.9181071472355032, "grad_norm": 1.5472669545093887, "learning_rate": 1.7484177160413785e-07, "loss": 0.6186, "step": 29956 }, { "epoch": 0.9181377957582445, "grad_norm": 1.3752361773412611, "learning_rate": 1.7471169389038812e-07, "loss": 0.5607, "step": 29957 }, { "epoch": 0.9181684442809857, "grad_norm": 1.4158013957242752, "learning_rate": 1.7458166372143815e-07, "loss": 0.601, "step": 29958 }, { "epoch": 0.9181990928037269, "grad_norm": 1.39921506132041, "learning_rate": 1.744516810985708e-07, "loss": 0.5468, "step": 29959 }, { "epoch": 0.9182297413264681, "grad_norm": 1.406224371962145, "learning_rate": 1.7432174602306507e-07, "loss": 0.6554, "step": 29960 }, { "epoch": 0.9182603898492092, "grad_norm": 1.447842020902374, "learning_rate": 1.7419185849620158e-07, "loss": 0.4977, "step": 29961 }, { "epoch": 0.9182910383719505, "grad_norm": 1.3023878937831685, "learning_rate": 1.7406201851926097e-07, "loss": 0.6192, "step": 29962 }, { "epoch": 0.9183216868946916, "grad_norm": 1.3639333997881071, "learning_rate": 1.7393222609352167e-07, "loss": 0.6422, "step": 29963 }, { "epoch": 0.9183523354174329, "grad_norm": 1.3390926730688384, "learning_rate": 1.7380248122026322e-07, "loss": 0.5397, "step": 29964 }, { "epoch": 0.918382983940174, "grad_norm": 0.42599493780886555, "learning_rate": 1.7367278390076404e-07, "loss": 0.3983, "step": 29965 }, { "epoch": 0.9184136324629153, "grad_norm": 1.3760417416158726, "learning_rate": 1.735431341363014e-07, "loss": 0.5465, "step": 29966 }, { "epoch": 0.9184442809856564, "grad_norm": 1.2897906514351045, "learning_rate": 1.7341353192815325e-07, "loss": 0.6371, "step": 29967 }, { "epoch": 0.9184749295083977, "grad_norm": 1.250332935098255, "learning_rate": 1.7328397727759628e-07, "loss": 0.5851, "step": 29968 }, { "epoch": 0.9185055780311389, "grad_norm": 1.3294965994169556, "learning_rate": 1.7315447018590724e-07, "loss": 0.5369, "step": 29969 }, { "epoch": 0.9185362265538801, "grad_norm": 1.2674668513032796, "learning_rate": 1.7302501065436295e-07, "loss": 0.5491, "step": 29970 }, { "epoch": 0.9185668750766213, "grad_norm": 1.4182806025573491, "learning_rate": 1.7289559868423733e-07, "loss": 0.6267, "step": 29971 }, { "epoch": 0.9185975235993625, "grad_norm": 1.4542897030446602, "learning_rate": 1.727662342768066e-07, "loss": 0.6586, "step": 29972 }, { "epoch": 0.9186281721221037, "grad_norm": 1.294278212393541, "learning_rate": 1.7263691743334587e-07, "loss": 0.629, "step": 29973 }, { "epoch": 0.9186588206448449, "grad_norm": 1.409587018192952, "learning_rate": 1.7250764815512854e-07, "loss": 0.5859, "step": 29974 }, { "epoch": 0.9186894691675861, "grad_norm": 1.2896414312914999, "learning_rate": 1.7237842644342862e-07, "loss": 0.5404, "step": 29975 }, { "epoch": 0.9187201176903274, "grad_norm": 0.42621946978688696, "learning_rate": 1.7224925229951838e-07, "loss": 0.3615, "step": 29976 }, { "epoch": 0.9187507662130685, "grad_norm": 1.4786834292310358, "learning_rate": 1.721201257246724e-07, "loss": 0.6158, "step": 29977 }, { "epoch": 0.9187814147358098, "grad_norm": 1.3081161826234422, "learning_rate": 1.7199104672016187e-07, "loss": 0.6409, "step": 29978 }, { "epoch": 0.9188120632585509, "grad_norm": 1.3388420610608096, "learning_rate": 1.7186201528725855e-07, "loss": 0.6367, "step": 29979 }, { "epoch": 0.9188427117812922, "grad_norm": 1.2963829038955772, "learning_rate": 1.7173303142723418e-07, "loss": 0.5743, "step": 29980 }, { "epoch": 0.9188733603040333, "grad_norm": 1.4556672375490285, "learning_rate": 1.7160409514136e-07, "loss": 0.5814, "step": 29981 }, { "epoch": 0.9189040088267746, "grad_norm": 1.5799680838309853, "learning_rate": 1.7147520643090554e-07, "loss": 0.5922, "step": 29982 }, { "epoch": 0.9189346573495157, "grad_norm": 0.4631006969991412, "learning_rate": 1.7134636529714144e-07, "loss": 0.3801, "step": 29983 }, { "epoch": 0.918965305872257, "grad_norm": 1.3262038305170165, "learning_rate": 1.712175717413378e-07, "loss": 0.632, "step": 29984 }, { "epoch": 0.9189959543949981, "grad_norm": 0.46713380336719157, "learning_rate": 1.7108882576476194e-07, "loss": 0.4146, "step": 29985 }, { "epoch": 0.9190266029177394, "grad_norm": 1.3137245307468668, "learning_rate": 1.709601273686845e-07, "loss": 0.4816, "step": 29986 }, { "epoch": 0.9190572514404806, "grad_norm": 1.3634489673420687, "learning_rate": 1.7083147655437172e-07, "loss": 0.6456, "step": 29987 }, { "epoch": 0.9190878999632218, "grad_norm": 1.453864654667045, "learning_rate": 1.70702873323092e-07, "loss": 0.5873, "step": 29988 }, { "epoch": 0.919118548485963, "grad_norm": 1.366245532745087, "learning_rate": 1.7057431767611264e-07, "loss": 0.6226, "step": 29989 }, { "epoch": 0.9191491970087042, "grad_norm": 1.202158142199544, "learning_rate": 1.7044580961469992e-07, "loss": 0.5464, "step": 29990 }, { "epoch": 0.9191798455314454, "grad_norm": 1.3168821096194772, "learning_rate": 1.7031734914012056e-07, "loss": 0.6809, "step": 29991 }, { "epoch": 0.9192104940541865, "grad_norm": 1.6176115427708193, "learning_rate": 1.701889362536402e-07, "loss": 0.6671, "step": 29992 }, { "epoch": 0.9192411425769278, "grad_norm": 1.2927824272717077, "learning_rate": 1.7006057095652395e-07, "loss": 0.5272, "step": 29993 }, { "epoch": 0.919271791099669, "grad_norm": 1.5213904342191993, "learning_rate": 1.6993225325003638e-07, "loss": 0.651, "step": 29994 }, { "epoch": 0.9193024396224102, "grad_norm": 1.9453009492251812, "learning_rate": 1.6980398313544255e-07, "loss": 0.611, "step": 29995 }, { "epoch": 0.9193330881451514, "grad_norm": 1.294396295276328, "learning_rate": 1.6967576061400592e-07, "loss": 0.585, "step": 29996 }, { "epoch": 0.9193637366678926, "grad_norm": 1.3602846844872807, "learning_rate": 1.6954758568698992e-07, "loss": 0.5673, "step": 29997 }, { "epoch": 0.9193943851906338, "grad_norm": 1.256973509617478, "learning_rate": 1.6941945835565686e-07, "loss": 0.6287, "step": 29998 }, { "epoch": 0.919425033713375, "grad_norm": 1.3140157067511125, "learning_rate": 1.692913786212702e-07, "loss": 0.6158, "step": 29999 }, { "epoch": 0.9194556822361162, "grad_norm": 1.511709423216587, "learning_rate": 1.6916334648509225e-07, "loss": 0.5368, "step": 30000 }, { "epoch": 0.9194863307588574, "grad_norm": 1.3192716213635007, "learning_rate": 1.690353619483831e-07, "loss": 0.6005, "step": 30001 }, { "epoch": 0.9195169792815986, "grad_norm": 1.5300110377645382, "learning_rate": 1.6890742501240453e-07, "loss": 0.6151, "step": 30002 }, { "epoch": 0.9195476278043399, "grad_norm": 1.2232298994606217, "learning_rate": 1.6877953567841777e-07, "loss": 0.6556, "step": 30003 }, { "epoch": 0.919578276327081, "grad_norm": 1.4007243748878553, "learning_rate": 1.6865169394768176e-07, "loss": 0.568, "step": 30004 }, { "epoch": 0.9196089248498223, "grad_norm": 1.360659021139662, "learning_rate": 1.6852389982145722e-07, "loss": 0.5833, "step": 30005 }, { "epoch": 0.9196395733725634, "grad_norm": 1.342496750877068, "learning_rate": 1.6839615330100313e-07, "loss": 0.5849, "step": 30006 }, { "epoch": 0.9196702218953047, "grad_norm": 1.4357411253951122, "learning_rate": 1.6826845438757733e-07, "loss": 0.5398, "step": 30007 }, { "epoch": 0.9197008704180458, "grad_norm": 1.2814979916870646, "learning_rate": 1.6814080308243885e-07, "loss": 0.5224, "step": 30008 }, { "epoch": 0.9197315189407871, "grad_norm": 1.3871984352330398, "learning_rate": 1.6801319938684502e-07, "loss": 0.609, "step": 30009 }, { "epoch": 0.9197621674635282, "grad_norm": 1.4902657598115063, "learning_rate": 1.678856433020537e-07, "loss": 0.5858, "step": 30010 }, { "epoch": 0.9197928159862695, "grad_norm": 1.2160533086096832, "learning_rate": 1.6775813482932225e-07, "loss": 0.4362, "step": 30011 }, { "epoch": 0.9198234645090106, "grad_norm": 0.4351830411752262, "learning_rate": 1.6763067396990517e-07, "loss": 0.3668, "step": 30012 }, { "epoch": 0.9198541130317519, "grad_norm": 1.510725732547903, "learning_rate": 1.6750326072505984e-07, "loss": 0.563, "step": 30013 }, { "epoch": 0.9198847615544931, "grad_norm": 1.3477096144924157, "learning_rate": 1.673758950960419e-07, "loss": 0.652, "step": 30014 }, { "epoch": 0.9199154100772343, "grad_norm": 1.310931599213072, "learning_rate": 1.672485770841048e-07, "loss": 0.6659, "step": 30015 }, { "epoch": 0.9199460585999755, "grad_norm": 1.3093206611188204, "learning_rate": 1.6712130669050476e-07, "loss": 0.5184, "step": 30016 }, { "epoch": 0.9199767071227167, "grad_norm": 1.3472687682823827, "learning_rate": 1.6699408391649407e-07, "loss": 0.6041, "step": 30017 }, { "epoch": 0.9200073556454579, "grad_norm": 1.3480999503376039, "learning_rate": 1.6686690876332845e-07, "loss": 0.6974, "step": 30018 }, { "epoch": 0.9200380041681991, "grad_norm": 1.3976405959794398, "learning_rate": 1.6673978123225963e-07, "loss": 0.6422, "step": 30019 }, { "epoch": 0.9200686526909403, "grad_norm": 1.4385091780658577, "learning_rate": 1.6661270132454e-07, "loss": 0.6681, "step": 30020 }, { "epoch": 0.9200993012136816, "grad_norm": 1.492962730961712, "learning_rate": 1.6648566904142183e-07, "loss": 0.5453, "step": 30021 }, { "epoch": 0.9201299497364227, "grad_norm": 1.3225160963316802, "learning_rate": 1.6635868438415748e-07, "loss": 0.6167, "step": 30022 }, { "epoch": 0.9201605982591639, "grad_norm": 1.3887893589194786, "learning_rate": 1.662317473539976e-07, "loss": 0.6027, "step": 30023 }, { "epoch": 0.9201912467819051, "grad_norm": 1.3412898507274695, "learning_rate": 1.6610485795219288e-07, "loss": 0.5556, "step": 30024 }, { "epoch": 0.9202218953046463, "grad_norm": 1.3631057711093535, "learning_rate": 1.6597801617999454e-07, "loss": 0.5158, "step": 30025 }, { "epoch": 0.9202525438273875, "grad_norm": 1.1998482586312982, "learning_rate": 1.6585122203865046e-07, "loss": 0.5884, "step": 30026 }, { "epoch": 0.9202831923501287, "grad_norm": 1.4532288322304319, "learning_rate": 1.657244755294124e-07, "loss": 0.5987, "step": 30027 }, { "epoch": 0.9203138408728699, "grad_norm": 1.1837368390923904, "learning_rate": 1.655977766535266e-07, "loss": 0.5456, "step": 30028 }, { "epoch": 0.9203444893956111, "grad_norm": 1.3338587062435685, "learning_rate": 1.654711254122443e-07, "loss": 0.62, "step": 30029 }, { "epoch": 0.9203751379183523, "grad_norm": 1.3579433070269562, "learning_rate": 1.6534452180681115e-07, "loss": 0.4645, "step": 30030 }, { "epoch": 0.9204057864410935, "grad_norm": 0.447103537807455, "learning_rate": 1.652179658384756e-07, "loss": 0.4025, "step": 30031 }, { "epoch": 0.9204364349638348, "grad_norm": 1.4944220628660905, "learning_rate": 1.6509145750848444e-07, "loss": 0.6917, "step": 30032 }, { "epoch": 0.9204670834865759, "grad_norm": 1.5080420661979197, "learning_rate": 1.649649968180844e-07, "loss": 0.587, "step": 30033 }, { "epoch": 0.9204977320093172, "grad_norm": 1.4480684124841638, "learning_rate": 1.6483858376852123e-07, "loss": 0.6323, "step": 30034 }, { "epoch": 0.9205283805320583, "grad_norm": 1.2307498146393296, "learning_rate": 1.6471221836104e-07, "loss": 0.5472, "step": 30035 }, { "epoch": 0.9205590290547996, "grad_norm": 1.1977692947748693, "learning_rate": 1.6458590059688696e-07, "loss": 0.5398, "step": 30036 }, { "epoch": 0.9205896775775407, "grad_norm": 1.1591825204692734, "learning_rate": 1.6445963047730663e-07, "loss": 0.4806, "step": 30037 }, { "epoch": 0.920620326100282, "grad_norm": 1.4450366712485445, "learning_rate": 1.6433340800354302e-07, "loss": 0.5746, "step": 30038 }, { "epoch": 0.9206509746230231, "grad_norm": 1.3811900640966328, "learning_rate": 1.6420723317683796e-07, "loss": 0.6499, "step": 30039 }, { "epoch": 0.9206816231457644, "grad_norm": 1.4209073503886687, "learning_rate": 1.6408110599843763e-07, "loss": 0.6021, "step": 30040 }, { "epoch": 0.9207122716685056, "grad_norm": 1.5777665809243695, "learning_rate": 1.6395502646958385e-07, "loss": 0.6052, "step": 30041 }, { "epoch": 0.9207429201912468, "grad_norm": 1.3378357462654018, "learning_rate": 1.638289945915178e-07, "loss": 0.596, "step": 30042 }, { "epoch": 0.920773568713988, "grad_norm": 1.450722601686118, "learning_rate": 1.6370301036548186e-07, "loss": 0.5911, "step": 30043 }, { "epoch": 0.9208042172367292, "grad_norm": 1.486551145278312, "learning_rate": 1.6357707379271782e-07, "loss": 0.5603, "step": 30044 }, { "epoch": 0.9208348657594704, "grad_norm": 1.3833056086228717, "learning_rate": 1.6345118487446687e-07, "loss": 0.5569, "step": 30045 }, { "epoch": 0.9208655142822116, "grad_norm": 1.2444352349575392, "learning_rate": 1.6332534361196806e-07, "loss": 0.6412, "step": 30046 }, { "epoch": 0.9208961628049528, "grad_norm": 1.159459993475859, "learning_rate": 1.6319955000646258e-07, "loss": 0.5059, "step": 30047 }, { "epoch": 0.920926811327694, "grad_norm": 0.4437929363212436, "learning_rate": 1.630738040591895e-07, "loss": 0.3926, "step": 30048 }, { "epoch": 0.9209574598504352, "grad_norm": 1.4586633676462752, "learning_rate": 1.6294810577138832e-07, "loss": 0.6165, "step": 30049 }, { "epoch": 0.9209881083731765, "grad_norm": 1.4684247600996456, "learning_rate": 1.6282245514429583e-07, "loss": 0.5854, "step": 30050 }, { "epoch": 0.9210187568959176, "grad_norm": 0.44283500415031135, "learning_rate": 1.6269685217915222e-07, "loss": 0.4035, "step": 30051 }, { "epoch": 0.9210494054186589, "grad_norm": 1.4071733451996151, "learning_rate": 1.625712968771942e-07, "loss": 0.5323, "step": 30052 }, { "epoch": 0.9210800539414, "grad_norm": 1.2535113173699686, "learning_rate": 1.624457892396586e-07, "loss": 0.5574, "step": 30053 }, { "epoch": 0.9211107024641412, "grad_norm": 1.188771518438103, "learning_rate": 1.6232032926778218e-07, "loss": 0.5515, "step": 30054 }, { "epoch": 0.9211413509868824, "grad_norm": 1.2576643904504035, "learning_rate": 1.6219491696280122e-07, "loss": 0.5465, "step": 30055 }, { "epoch": 0.9211719995096236, "grad_norm": 1.3761451434305236, "learning_rate": 1.6206955232595245e-07, "loss": 0.5701, "step": 30056 }, { "epoch": 0.9212026480323648, "grad_norm": 0.4496012009485757, "learning_rate": 1.6194423535846936e-07, "loss": 0.4037, "step": 30057 }, { "epoch": 0.921233296555106, "grad_norm": 1.4402090055106695, "learning_rate": 1.6181896606158764e-07, "loss": 0.5267, "step": 30058 }, { "epoch": 0.9212639450778473, "grad_norm": 0.45228298701740555, "learning_rate": 1.616937444365424e-07, "loss": 0.3917, "step": 30059 }, { "epoch": 0.9212945936005884, "grad_norm": 1.2211453454023027, "learning_rate": 1.6156857048456654e-07, "loss": 0.6325, "step": 30060 }, { "epoch": 0.9213252421233297, "grad_norm": 1.4473229681779456, "learning_rate": 1.614434442068924e-07, "loss": 0.61, "step": 30061 }, { "epoch": 0.9213558906460708, "grad_norm": 1.3915296265001058, "learning_rate": 1.6131836560475457e-07, "loss": 0.5344, "step": 30062 }, { "epoch": 0.9213865391688121, "grad_norm": 1.4851640692497596, "learning_rate": 1.611933346793848e-07, "loss": 0.6411, "step": 30063 }, { "epoch": 0.9214171876915532, "grad_norm": 1.2150631185317482, "learning_rate": 1.6106835143201605e-07, "loss": 0.5642, "step": 30064 }, { "epoch": 0.9214478362142945, "grad_norm": 1.2290388313864478, "learning_rate": 1.6094341586387785e-07, "loss": 0.588, "step": 30065 }, { "epoch": 0.9214784847370356, "grad_norm": 0.4410767303901362, "learning_rate": 1.6081852797620257e-07, "loss": 0.3729, "step": 30066 }, { "epoch": 0.9215091332597769, "grad_norm": 1.36984258569883, "learning_rate": 1.6069368777022088e-07, "loss": 0.5779, "step": 30067 }, { "epoch": 0.921539781782518, "grad_norm": 1.4054910632235296, "learning_rate": 1.6056889524716234e-07, "loss": 0.5979, "step": 30068 }, { "epoch": 0.9215704303052593, "grad_norm": 1.352596040563695, "learning_rate": 1.60444150408256e-07, "loss": 0.6099, "step": 30069 }, { "epoch": 0.9216010788280005, "grad_norm": 1.2451441970327366, "learning_rate": 1.6031945325473253e-07, "loss": 0.598, "step": 30070 }, { "epoch": 0.9216317273507417, "grad_norm": 1.1986208815609918, "learning_rate": 1.6019480378781927e-07, "loss": 0.5548, "step": 30071 }, { "epoch": 0.9216623758734829, "grad_norm": 1.3645026310645547, "learning_rate": 1.600702020087458e-07, "loss": 0.6137, "step": 30072 }, { "epoch": 0.9216930243962241, "grad_norm": 1.3494517027335196, "learning_rate": 1.5994564791873835e-07, "loss": 0.6477, "step": 30073 }, { "epoch": 0.9217236729189653, "grad_norm": 1.2440384794230968, "learning_rate": 1.5982114151902428e-07, "loss": 0.6028, "step": 30074 }, { "epoch": 0.9217543214417065, "grad_norm": 1.3460467569481782, "learning_rate": 1.5969668281083207e-07, "loss": 0.5856, "step": 30075 }, { "epoch": 0.9217849699644477, "grad_norm": 1.3487282109713497, "learning_rate": 1.595722717953857e-07, "loss": 0.6831, "step": 30076 }, { "epoch": 0.921815618487189, "grad_norm": 1.2505454970981182, "learning_rate": 1.5944790847391255e-07, "loss": 0.5642, "step": 30077 }, { "epoch": 0.9218462670099301, "grad_norm": 1.2933163361842295, "learning_rate": 1.5932359284763832e-07, "loss": 0.6088, "step": 30078 }, { "epoch": 0.9218769155326714, "grad_norm": 1.1753220851863333, "learning_rate": 1.59199324917787e-07, "loss": 0.5338, "step": 30079 }, { "epoch": 0.9219075640554125, "grad_norm": 1.4241439628607342, "learning_rate": 1.5907510468558264e-07, "loss": 0.6112, "step": 30080 }, { "epoch": 0.9219382125781538, "grad_norm": 1.3931736556988352, "learning_rate": 1.5895093215225089e-07, "loss": 0.5918, "step": 30081 }, { "epoch": 0.9219688611008949, "grad_norm": 1.3069705218416119, "learning_rate": 1.5882680731901356e-07, "loss": 0.5472, "step": 30082 }, { "epoch": 0.9219995096236362, "grad_norm": 1.5193715762773778, "learning_rate": 1.587027301870947e-07, "loss": 0.6442, "step": 30083 }, { "epoch": 0.9220301581463773, "grad_norm": 0.4606945034100393, "learning_rate": 1.5857870075771608e-07, "loss": 0.3857, "step": 30084 }, { "epoch": 0.9220608066691185, "grad_norm": 0.45012214261688427, "learning_rate": 1.5845471903210063e-07, "loss": 0.3797, "step": 30085 }, { "epoch": 0.9220914551918598, "grad_norm": 0.4413548833642669, "learning_rate": 1.583307850114696e-07, "loss": 0.3758, "step": 30086 }, { "epoch": 0.9221221037146009, "grad_norm": 1.1921357580686627, "learning_rate": 1.5820689869704364e-07, "loss": 0.5639, "step": 30087 }, { "epoch": 0.9221527522373422, "grad_norm": 1.4168355074124666, "learning_rate": 1.5808306009004458e-07, "loss": 0.5791, "step": 30088 }, { "epoch": 0.9221834007600833, "grad_norm": 1.344403729414344, "learning_rate": 1.5795926919169201e-07, "loss": 0.6667, "step": 30089 }, { "epoch": 0.9222140492828246, "grad_norm": 1.350241211373378, "learning_rate": 1.5783552600320495e-07, "loss": 0.6317, "step": 30090 }, { "epoch": 0.9222446978055657, "grad_norm": 1.3657965584625584, "learning_rate": 1.5771183052580353e-07, "loss": 0.668, "step": 30091 }, { "epoch": 0.922275346328307, "grad_norm": 1.3289555021852564, "learning_rate": 1.575881827607073e-07, "loss": 0.6211, "step": 30092 }, { "epoch": 0.9223059948510481, "grad_norm": 1.2612640206289436, "learning_rate": 1.5746458270913258e-07, "loss": 0.5677, "step": 30093 }, { "epoch": 0.9223366433737894, "grad_norm": 1.6498566863797828, "learning_rate": 1.5734103037229942e-07, "loss": 0.7218, "step": 30094 }, { "epoch": 0.9223672918965306, "grad_norm": 1.3262710614328574, "learning_rate": 1.5721752575142357e-07, "loss": 0.6242, "step": 30095 }, { "epoch": 0.9223979404192718, "grad_norm": 1.280592654750459, "learning_rate": 1.5709406884772182e-07, "loss": 0.5441, "step": 30096 }, { "epoch": 0.922428588942013, "grad_norm": 1.3064274849799076, "learning_rate": 1.5697065966241266e-07, "loss": 0.5331, "step": 30097 }, { "epoch": 0.9224592374647542, "grad_norm": 1.4297165072524567, "learning_rate": 1.5684729819671008e-07, "loss": 0.5633, "step": 30098 }, { "epoch": 0.9224898859874954, "grad_norm": 1.1740394394874136, "learning_rate": 1.5672398445182978e-07, "loss": 0.506, "step": 30099 }, { "epoch": 0.9225205345102366, "grad_norm": 0.4279081345827363, "learning_rate": 1.5660071842898806e-07, "loss": 0.3931, "step": 30100 }, { "epoch": 0.9225511830329778, "grad_norm": 1.3553999259598268, "learning_rate": 1.5647750012939833e-07, "loss": 0.5458, "step": 30101 }, { "epoch": 0.922581831555719, "grad_norm": 1.1380928973155415, "learning_rate": 1.5635432955427464e-07, "loss": 0.5015, "step": 30102 }, { "epoch": 0.9226124800784602, "grad_norm": 1.3336152177461478, "learning_rate": 1.5623120670483215e-07, "loss": 0.6272, "step": 30103 }, { "epoch": 0.9226431286012015, "grad_norm": 3.237426960095853, "learning_rate": 1.5610813158228156e-07, "loss": 0.6456, "step": 30104 }, { "epoch": 0.9226737771239426, "grad_norm": 0.4397613971803473, "learning_rate": 1.5598510418783796e-07, "loss": 0.3885, "step": 30105 }, { "epoch": 0.9227044256466839, "grad_norm": 1.3820452263161833, "learning_rate": 1.558621245227121e-07, "loss": 0.61, "step": 30106 }, { "epoch": 0.922735074169425, "grad_norm": 1.5530547604278917, "learning_rate": 1.557391925881163e-07, "loss": 0.6399, "step": 30107 }, { "epoch": 0.9227657226921663, "grad_norm": 1.303506583315568, "learning_rate": 1.5561630838526186e-07, "loss": 0.6484, "step": 30108 }, { "epoch": 0.9227963712149074, "grad_norm": 1.4460898223415533, "learning_rate": 1.554934719153589e-07, "loss": 0.7116, "step": 30109 }, { "epoch": 0.9228270197376487, "grad_norm": 1.395782407510173, "learning_rate": 1.553706831796181e-07, "loss": 0.6788, "step": 30110 }, { "epoch": 0.9228576682603898, "grad_norm": 1.2373131518720433, "learning_rate": 1.5524794217925077e-07, "loss": 0.5665, "step": 30111 }, { "epoch": 0.9228883167831311, "grad_norm": 0.4455163190446856, "learning_rate": 1.5512524891546366e-07, "loss": 0.3893, "step": 30112 }, { "epoch": 0.9229189653058723, "grad_norm": 1.2808120049903204, "learning_rate": 1.550026033894675e-07, "loss": 0.633, "step": 30113 }, { "epoch": 0.9229496138286135, "grad_norm": 1.3280989137168202, "learning_rate": 1.548800056024713e-07, "loss": 0.5448, "step": 30114 }, { "epoch": 0.9229802623513547, "grad_norm": 1.3630853595901953, "learning_rate": 1.5475745555568077e-07, "loss": 0.503, "step": 30115 }, { "epoch": 0.9230109108740958, "grad_norm": 1.3163621266573524, "learning_rate": 1.5463495325030554e-07, "loss": 0.6145, "step": 30116 }, { "epoch": 0.9230415593968371, "grad_norm": 1.4462586148161702, "learning_rate": 1.5451249868755126e-07, "loss": 0.6562, "step": 30117 }, { "epoch": 0.9230722079195782, "grad_norm": 1.2402942758623212, "learning_rate": 1.543900918686253e-07, "loss": 0.5952, "step": 30118 }, { "epoch": 0.9231028564423195, "grad_norm": 1.443970585769045, "learning_rate": 1.5426773279473395e-07, "loss": 0.5631, "step": 30119 }, { "epoch": 0.9231335049650606, "grad_norm": 1.4902603835239376, "learning_rate": 1.5414542146708234e-07, "loss": 0.5881, "step": 30120 }, { "epoch": 0.9231641534878019, "grad_norm": 1.2436402213622084, "learning_rate": 1.5402315788687506e-07, "loss": 0.6182, "step": 30121 }, { "epoch": 0.923194802010543, "grad_norm": 1.550643246995233, "learning_rate": 1.5390094205531893e-07, "loss": 0.5771, "step": 30122 }, { "epoch": 0.9232254505332843, "grad_norm": 1.4379012639320385, "learning_rate": 1.5377877397361518e-07, "loss": 0.6168, "step": 30123 }, { "epoch": 0.9232560990560255, "grad_norm": 1.4788645826323714, "learning_rate": 1.536566536429701e-07, "loss": 0.5901, "step": 30124 }, { "epoch": 0.9232867475787667, "grad_norm": 1.3289959850598811, "learning_rate": 1.5353458106458551e-07, "loss": 0.543, "step": 30125 }, { "epoch": 0.9233173961015079, "grad_norm": 1.5306029649574315, "learning_rate": 1.5341255623966488e-07, "loss": 0.6352, "step": 30126 }, { "epoch": 0.9233480446242491, "grad_norm": 1.5489026010422882, "learning_rate": 1.5329057916941114e-07, "loss": 0.5664, "step": 30127 }, { "epoch": 0.9233786931469903, "grad_norm": 1.1787576150137913, "learning_rate": 1.5316864985502445e-07, "loss": 0.6282, "step": 30128 }, { "epoch": 0.9234093416697315, "grad_norm": 1.6245849322517036, "learning_rate": 1.5304676829770716e-07, "loss": 0.6562, "step": 30129 }, { "epoch": 0.9234399901924727, "grad_norm": 1.3709434113277617, "learning_rate": 1.5292493449866053e-07, "loss": 0.6524, "step": 30130 }, { "epoch": 0.923470638715214, "grad_norm": 0.4409509987204604, "learning_rate": 1.5280314845908474e-07, "loss": 0.3922, "step": 30131 }, { "epoch": 0.9235012872379551, "grad_norm": 1.2782400224337591, "learning_rate": 1.5268141018017933e-07, "loss": 0.6054, "step": 30132 }, { "epoch": 0.9235319357606964, "grad_norm": 1.4885703088033566, "learning_rate": 1.5255971966314508e-07, "loss": 0.6229, "step": 30133 }, { "epoch": 0.9235625842834375, "grad_norm": 0.4355204437198802, "learning_rate": 1.5243807690917932e-07, "loss": 0.4, "step": 30134 }, { "epoch": 0.9235932328061788, "grad_norm": 1.2070569540444385, "learning_rate": 1.5231648191948224e-07, "loss": 0.5494, "step": 30135 }, { "epoch": 0.9236238813289199, "grad_norm": 1.262788243539409, "learning_rate": 1.5219493469525004e-07, "loss": 0.6456, "step": 30136 }, { "epoch": 0.9236545298516612, "grad_norm": 0.4348927072200315, "learning_rate": 1.5207343523768237e-07, "loss": 0.3913, "step": 30137 }, { "epoch": 0.9236851783744023, "grad_norm": 1.3567545493562916, "learning_rate": 1.5195198354797547e-07, "loss": 0.6367, "step": 30138 }, { "epoch": 0.9237158268971436, "grad_norm": 0.450120575774065, "learning_rate": 1.5183057962732617e-07, "loss": 0.3886, "step": 30139 }, { "epoch": 0.9237464754198847, "grad_norm": 1.4161618837461178, "learning_rate": 1.5170922347693017e-07, "loss": 0.5367, "step": 30140 }, { "epoch": 0.923777123942626, "grad_norm": 1.2973011199693087, "learning_rate": 1.5158791509798432e-07, "loss": 0.604, "step": 30141 }, { "epoch": 0.9238077724653672, "grad_norm": 1.36931265718902, "learning_rate": 1.5146665449168262e-07, "loss": 0.5799, "step": 30142 }, { "epoch": 0.9238384209881084, "grad_norm": 1.407491536505326, "learning_rate": 1.5134544165922083e-07, "loss": 0.5226, "step": 30143 }, { "epoch": 0.9238690695108496, "grad_norm": 1.4292322265491362, "learning_rate": 1.5122427660179295e-07, "loss": 0.6732, "step": 30144 }, { "epoch": 0.9238997180335908, "grad_norm": 0.4323503227729904, "learning_rate": 1.5110315932059304e-07, "loss": 0.3717, "step": 30145 }, { "epoch": 0.923930366556332, "grad_norm": 0.4282985824342677, "learning_rate": 1.5098208981681462e-07, "loss": 0.3583, "step": 30146 }, { "epoch": 0.9239610150790731, "grad_norm": 0.43416775195867857, "learning_rate": 1.5086106809164947e-07, "loss": 0.3626, "step": 30147 }, { "epoch": 0.9239916636018144, "grad_norm": 1.3617694289140085, "learning_rate": 1.5074009414629165e-07, "loss": 0.666, "step": 30148 }, { "epoch": 0.9240223121245555, "grad_norm": 0.45041415726167094, "learning_rate": 1.5061916798193242e-07, "loss": 0.3856, "step": 30149 }, { "epoch": 0.9240529606472968, "grad_norm": 1.464723419655215, "learning_rate": 1.5049828959976308e-07, "loss": 0.573, "step": 30150 }, { "epoch": 0.924083609170038, "grad_norm": 1.301784099030907, "learning_rate": 1.5037745900097435e-07, "loss": 0.5935, "step": 30151 }, { "epoch": 0.9241142576927792, "grad_norm": 1.2181370827905356, "learning_rate": 1.5025667618675855e-07, "loss": 0.6289, "step": 30152 }, { "epoch": 0.9241449062155204, "grad_norm": 1.2401111563684446, "learning_rate": 1.5013594115830367e-07, "loss": 0.5321, "step": 30153 }, { "epoch": 0.9241755547382616, "grad_norm": 1.3807700028673429, "learning_rate": 1.5001525391679982e-07, "loss": 0.5906, "step": 30154 }, { "epoch": 0.9242062032610028, "grad_norm": 1.325828622414725, "learning_rate": 1.4989461446343723e-07, "loss": 0.5984, "step": 30155 }, { "epoch": 0.924236851783744, "grad_norm": 1.3236306205100745, "learning_rate": 1.4977402279940434e-07, "loss": 0.6442, "step": 30156 }, { "epoch": 0.9242675003064852, "grad_norm": 0.44661133332257846, "learning_rate": 1.496534789258891e-07, "loss": 0.4154, "step": 30157 }, { "epoch": 0.9242981488292265, "grad_norm": 1.5201321833804948, "learning_rate": 1.495329828440778e-07, "loss": 0.6279, "step": 30158 }, { "epoch": 0.9243287973519676, "grad_norm": 0.42185274982282384, "learning_rate": 1.4941253455516002e-07, "loss": 0.4024, "step": 30159 }, { "epoch": 0.9243594458747089, "grad_norm": 1.5519875774590706, "learning_rate": 1.4929213406032205e-07, "loss": 0.5984, "step": 30160 }, { "epoch": 0.92439009439745, "grad_norm": 1.335171388246033, "learning_rate": 1.4917178136074906e-07, "loss": 0.5436, "step": 30161 }, { "epoch": 0.9244207429201913, "grad_norm": 1.460845428856024, "learning_rate": 1.4905147645762785e-07, "loss": 0.5591, "step": 30162 }, { "epoch": 0.9244513914429324, "grad_norm": 1.294977297607334, "learning_rate": 1.489312193521436e-07, "loss": 0.6122, "step": 30163 }, { "epoch": 0.9244820399656737, "grad_norm": 1.440514703029836, "learning_rate": 1.488110100454815e-07, "loss": 0.6505, "step": 30164 }, { "epoch": 0.9245126884884148, "grad_norm": 1.3370344541567019, "learning_rate": 1.4869084853882497e-07, "loss": 0.5627, "step": 30165 }, { "epoch": 0.9245433370111561, "grad_norm": 1.3293479593656274, "learning_rate": 1.4857073483335927e-07, "loss": 0.5968, "step": 30166 }, { "epoch": 0.9245739855338972, "grad_norm": 0.4554068351433659, "learning_rate": 1.4845066893026783e-07, "loss": 0.4009, "step": 30167 }, { "epoch": 0.9246046340566385, "grad_norm": 1.299811466688853, "learning_rate": 1.4833065083073305e-07, "loss": 0.5686, "step": 30168 }, { "epoch": 0.9246352825793797, "grad_norm": 1.467268227690028, "learning_rate": 1.4821068053593734e-07, "loss": 0.6153, "step": 30169 }, { "epoch": 0.9246659311021209, "grad_norm": 1.5781850842338672, "learning_rate": 1.4809075804706363e-07, "loss": 0.5581, "step": 30170 }, { "epoch": 0.9246965796248621, "grad_norm": 1.4045336715103394, "learning_rate": 1.4797088336529264e-07, "loss": 0.6014, "step": 30171 }, { "epoch": 0.9247272281476033, "grad_norm": 1.2804397506546705, "learning_rate": 1.478510564918062e-07, "loss": 0.5849, "step": 30172 }, { "epoch": 0.9247578766703445, "grad_norm": 1.2687495570805796, "learning_rate": 1.4773127742778503e-07, "loss": 0.5802, "step": 30173 }, { "epoch": 0.9247885251930857, "grad_norm": 1.23567744502466, "learning_rate": 1.476115461744082e-07, "loss": 0.5995, "step": 30174 }, { "epoch": 0.9248191737158269, "grad_norm": 1.5539264880689703, "learning_rate": 1.4749186273285755e-07, "loss": 0.7068, "step": 30175 }, { "epoch": 0.9248498222385682, "grad_norm": 0.4105134797492613, "learning_rate": 1.4737222710431098e-07, "loss": 0.3924, "step": 30176 }, { "epoch": 0.9248804707613093, "grad_norm": 1.3450688395539558, "learning_rate": 1.472526392899465e-07, "loss": 0.5363, "step": 30177 }, { "epoch": 0.9249111192840505, "grad_norm": 1.4677927899727328, "learning_rate": 1.471330992909442e-07, "loss": 0.6779, "step": 30178 }, { "epoch": 0.9249417678067917, "grad_norm": 0.4613658083224511, "learning_rate": 1.4701360710848102e-07, "loss": 0.3975, "step": 30179 }, { "epoch": 0.9249724163295329, "grad_norm": 1.2813189061873387, "learning_rate": 1.4689416274373426e-07, "loss": 0.5984, "step": 30180 }, { "epoch": 0.9250030648522741, "grad_norm": 1.2493399323788459, "learning_rate": 1.4677476619788078e-07, "loss": 0.5112, "step": 30181 }, { "epoch": 0.9250337133750153, "grad_norm": 1.399320240772287, "learning_rate": 1.4665541747209743e-07, "loss": 0.659, "step": 30182 }, { "epoch": 0.9250643618977565, "grad_norm": 1.4020436382419235, "learning_rate": 1.465361165675605e-07, "loss": 0.5781, "step": 30183 }, { "epoch": 0.9250950104204977, "grad_norm": 1.2746166859530061, "learning_rate": 1.464168634854446e-07, "loss": 0.5225, "step": 30184 }, { "epoch": 0.925125658943239, "grad_norm": 1.3924243820894389, "learning_rate": 1.4629765822692487e-07, "loss": 0.6196, "step": 30185 }, { "epoch": 0.9251563074659801, "grad_norm": 1.444348374190559, "learning_rate": 1.4617850079317707e-07, "loss": 0.6457, "step": 30186 }, { "epoch": 0.9251869559887214, "grad_norm": 0.4532423284365987, "learning_rate": 1.4605939118537415e-07, "loss": 0.3985, "step": 30187 }, { "epoch": 0.9252176045114625, "grad_norm": 1.4686707371805958, "learning_rate": 1.4594032940468905e-07, "loss": 0.6552, "step": 30188 }, { "epoch": 0.9252482530342038, "grad_norm": 1.2260730374753166, "learning_rate": 1.4582131545229693e-07, "loss": 0.6097, "step": 30189 }, { "epoch": 0.9252789015569449, "grad_norm": 1.4728852516856221, "learning_rate": 1.4570234932936912e-07, "loss": 0.6462, "step": 30190 }, { "epoch": 0.9253095500796862, "grad_norm": 1.3480722634728965, "learning_rate": 1.455834310370785e-07, "loss": 0.5666, "step": 30191 }, { "epoch": 0.9253401986024273, "grad_norm": 1.4203094884053196, "learning_rate": 1.4546456057659532e-07, "loss": 0.6998, "step": 30192 }, { "epoch": 0.9253708471251686, "grad_norm": 1.4170061022577634, "learning_rate": 1.4534573794909246e-07, "loss": 0.6009, "step": 30193 }, { "epoch": 0.9254014956479097, "grad_norm": 0.4496601266705692, "learning_rate": 1.452269631557407e-07, "loss": 0.38, "step": 30194 }, { "epoch": 0.925432144170651, "grad_norm": 1.4452943009362684, "learning_rate": 1.451082361977091e-07, "loss": 0.534, "step": 30195 }, { "epoch": 0.9254627926933922, "grad_norm": 1.4321153609409802, "learning_rate": 1.4498955707616836e-07, "loss": 0.5872, "step": 30196 }, { "epoch": 0.9254934412161334, "grad_norm": 1.4187250759891357, "learning_rate": 1.4487092579228812e-07, "loss": 0.5194, "step": 30197 }, { "epoch": 0.9255240897388746, "grad_norm": 1.3146244948008707, "learning_rate": 1.4475234234723633e-07, "loss": 0.6522, "step": 30198 }, { "epoch": 0.9255547382616158, "grad_norm": 1.4825811599787726, "learning_rate": 1.4463380674218208e-07, "loss": 0.5962, "step": 30199 }, { "epoch": 0.925585386784357, "grad_norm": 1.2421146570344777, "learning_rate": 1.4451531897829384e-07, "loss": 0.6135, "step": 30200 }, { "epoch": 0.9256160353070982, "grad_norm": 1.3833466046136982, "learning_rate": 1.443968790567374e-07, "loss": 0.6338, "step": 30201 }, { "epoch": 0.9256466838298394, "grad_norm": 1.239028466134416, "learning_rate": 1.4427848697868175e-07, "loss": 0.5383, "step": 30202 }, { "epoch": 0.9256773323525807, "grad_norm": 0.4409007798907404, "learning_rate": 1.4416014274529211e-07, "loss": 0.3914, "step": 30203 }, { "epoch": 0.9257079808753218, "grad_norm": 0.40972129025064624, "learning_rate": 1.4404184635773532e-07, "loss": 0.364, "step": 30204 }, { "epoch": 0.9257386293980631, "grad_norm": 1.4426698397011717, "learning_rate": 1.4392359781717658e-07, "loss": 0.5289, "step": 30205 }, { "epoch": 0.9257692779208042, "grad_norm": 1.381004336138394, "learning_rate": 1.4380539712478102e-07, "loss": 0.5963, "step": 30206 }, { "epoch": 0.9257999264435455, "grad_norm": 1.4216293482789564, "learning_rate": 1.4368724428171333e-07, "loss": 0.6184, "step": 30207 }, { "epoch": 0.9258305749662866, "grad_norm": 1.3517008944889104, "learning_rate": 1.4356913928913807e-07, "loss": 0.5427, "step": 30208 }, { "epoch": 0.9258612234890278, "grad_norm": 1.2327303099674278, "learning_rate": 1.4345108214821823e-07, "loss": 0.5206, "step": 30209 }, { "epoch": 0.925891872011769, "grad_norm": 1.3780900746740048, "learning_rate": 1.4333307286011789e-07, "loss": 0.5559, "step": 30210 }, { "epoch": 0.9259225205345102, "grad_norm": 1.4081169937270834, "learning_rate": 1.4321511142599943e-07, "loss": 0.6369, "step": 30211 }, { "epoch": 0.9259531690572514, "grad_norm": 1.346701639929038, "learning_rate": 1.430971978470247e-07, "loss": 0.5735, "step": 30212 }, { "epoch": 0.9259838175799926, "grad_norm": 1.2058087466153662, "learning_rate": 1.4297933212435665e-07, "loss": 0.6146, "step": 30213 }, { "epoch": 0.9260144661027339, "grad_norm": 0.43073394216232275, "learning_rate": 1.428615142591555e-07, "loss": 0.3931, "step": 30214 }, { "epoch": 0.926045114625475, "grad_norm": 1.3570895317108205, "learning_rate": 1.4274374425258196e-07, "loss": 0.6412, "step": 30215 }, { "epoch": 0.9260757631482163, "grad_norm": 1.2176084410693486, "learning_rate": 1.4262602210579846e-07, "loss": 0.6361, "step": 30216 }, { "epoch": 0.9261064116709574, "grad_norm": 1.4879391118563312, "learning_rate": 1.4250834781996237e-07, "loss": 0.5745, "step": 30217 }, { "epoch": 0.9261370601936987, "grad_norm": 1.4351737189177083, "learning_rate": 1.4239072139623499e-07, "loss": 0.6642, "step": 30218 }, { "epoch": 0.9261677087164398, "grad_norm": 1.24009588433135, "learning_rate": 1.4227314283577488e-07, "loss": 0.4953, "step": 30219 }, { "epoch": 0.9261983572391811, "grad_norm": 0.44162003088183427, "learning_rate": 1.4215561213973994e-07, "loss": 0.3844, "step": 30220 }, { "epoch": 0.9262290057619222, "grad_norm": 1.344811921757901, "learning_rate": 1.4203812930928927e-07, "loss": 0.6131, "step": 30221 }, { "epoch": 0.9262596542846635, "grad_norm": 1.267324618283946, "learning_rate": 1.419206943455792e-07, "loss": 0.607, "step": 30222 }, { "epoch": 0.9262903028074047, "grad_norm": 1.4361450757865355, "learning_rate": 1.4180330724976764e-07, "loss": 0.5649, "step": 30223 }, { "epoch": 0.9263209513301459, "grad_norm": 1.3451450948630195, "learning_rate": 1.4168596802301148e-07, "loss": 0.5394, "step": 30224 }, { "epoch": 0.9263515998528871, "grad_norm": 1.6809514864571395, "learning_rate": 1.415686766664659e-07, "loss": 0.6044, "step": 30225 }, { "epoch": 0.9263822483756283, "grad_norm": 1.4315613929477105, "learning_rate": 1.4145143318128773e-07, "loss": 0.6584, "step": 30226 }, { "epoch": 0.9264128968983695, "grad_norm": 1.1271590537183096, "learning_rate": 1.4133423756863164e-07, "loss": 0.5416, "step": 30227 }, { "epoch": 0.9264435454211107, "grad_norm": 1.3351419613189308, "learning_rate": 1.4121708982965278e-07, "loss": 0.6051, "step": 30228 }, { "epoch": 0.9264741939438519, "grad_norm": 1.31661234742828, "learning_rate": 1.410999899655041e-07, "loss": 0.5783, "step": 30229 }, { "epoch": 0.9265048424665931, "grad_norm": 1.3985851664709448, "learning_rate": 1.4098293797734142e-07, "loss": 0.6494, "step": 30230 }, { "epoch": 0.9265354909893343, "grad_norm": 1.4988293682630898, "learning_rate": 1.4086593386631653e-07, "loss": 0.6522, "step": 30231 }, { "epoch": 0.9265661395120756, "grad_norm": 1.314276811035475, "learning_rate": 1.4074897763358354e-07, "loss": 0.6489, "step": 30232 }, { "epoch": 0.9265967880348167, "grad_norm": 1.1911542686944587, "learning_rate": 1.406320692802937e-07, "loss": 0.5439, "step": 30233 }, { "epoch": 0.926627436557558, "grad_norm": 0.4824231707300791, "learning_rate": 1.4051520880759896e-07, "loss": 0.4139, "step": 30234 }, { "epoch": 0.9266580850802991, "grad_norm": 0.4410558742466929, "learning_rate": 1.403983962166522e-07, "loss": 0.3974, "step": 30235 }, { "epoch": 0.9266887336030404, "grad_norm": 1.1362939526491915, "learning_rate": 1.4028163150860252e-07, "loss": 0.5464, "step": 30236 }, { "epoch": 0.9267193821257815, "grad_norm": 1.3816350743126233, "learning_rate": 1.401649146846018e-07, "loss": 0.6064, "step": 30237 }, { "epoch": 0.9267500306485228, "grad_norm": 1.4305846960132398, "learning_rate": 1.4004824574579967e-07, "loss": 0.6442, "step": 30238 }, { "epoch": 0.9267806791712639, "grad_norm": 1.3279236660630147, "learning_rate": 1.399316246933452e-07, "loss": 0.5591, "step": 30239 }, { "epoch": 0.9268113276940051, "grad_norm": 1.2517941614545387, "learning_rate": 1.3981505152838803e-07, "loss": 0.5949, "step": 30240 }, { "epoch": 0.9268419762167464, "grad_norm": 1.2745467479177115, "learning_rate": 1.3969852625207726e-07, "loss": 0.6245, "step": 30241 }, { "epoch": 0.9268726247394875, "grad_norm": 1.3699569020193343, "learning_rate": 1.395820488655597e-07, "loss": 0.6104, "step": 30242 }, { "epoch": 0.9269032732622288, "grad_norm": 1.6400098370032103, "learning_rate": 1.3946561936998448e-07, "loss": 0.5791, "step": 30243 }, { "epoch": 0.9269339217849699, "grad_norm": 2.603039853703667, "learning_rate": 1.3934923776649734e-07, "loss": 0.6291, "step": 30244 }, { "epoch": 0.9269645703077112, "grad_norm": 1.3635714970538197, "learning_rate": 1.3923290405624678e-07, "loss": 0.6215, "step": 30245 }, { "epoch": 0.9269952188304523, "grad_norm": 1.3825732618676978, "learning_rate": 1.3911661824037803e-07, "loss": 0.6151, "step": 30246 }, { "epoch": 0.9270258673531936, "grad_norm": 1.3610579677107468, "learning_rate": 1.3900038032003627e-07, "loss": 0.6224, "step": 30247 }, { "epoch": 0.9270565158759347, "grad_norm": 1.388263250681805, "learning_rate": 1.388841902963678e-07, "loss": 0.546, "step": 30248 }, { "epoch": 0.927087164398676, "grad_norm": 1.3099606795158247, "learning_rate": 1.3876804817051727e-07, "loss": 0.6089, "step": 30249 }, { "epoch": 0.9271178129214172, "grad_norm": 1.3647702482313628, "learning_rate": 1.3865195394362875e-07, "loss": 0.6241, "step": 30250 }, { "epoch": 0.9271484614441584, "grad_norm": 1.4481169446623454, "learning_rate": 1.3853590761684631e-07, "loss": 0.5665, "step": 30251 }, { "epoch": 0.9271791099668996, "grad_norm": 1.344999574058551, "learning_rate": 1.3841990919131354e-07, "loss": 0.5752, "step": 30252 }, { "epoch": 0.9272097584896408, "grad_norm": 1.3721973056741632, "learning_rate": 1.3830395866817336e-07, "loss": 0.6842, "step": 30253 }, { "epoch": 0.927240407012382, "grad_norm": 1.437858338001911, "learning_rate": 1.3818805604856877e-07, "loss": 0.6029, "step": 30254 }, { "epoch": 0.9272710555351232, "grad_norm": 1.2707245197871813, "learning_rate": 1.3807220133363996e-07, "loss": 0.5336, "step": 30255 }, { "epoch": 0.9273017040578644, "grad_norm": 1.3049469991385008, "learning_rate": 1.3795639452453047e-07, "loss": 0.53, "step": 30256 }, { "epoch": 0.9273323525806056, "grad_norm": 1.3049690206388993, "learning_rate": 1.3784063562238104e-07, "loss": 0.5735, "step": 30257 }, { "epoch": 0.9273630011033468, "grad_norm": 1.2605348473521678, "learning_rate": 1.377249246283313e-07, "loss": 0.5725, "step": 30258 }, { "epoch": 0.9273936496260881, "grad_norm": 1.526685269569094, "learning_rate": 1.376092615435215e-07, "loss": 0.5882, "step": 30259 }, { "epoch": 0.9274242981488292, "grad_norm": 1.2479921799668388, "learning_rate": 1.374936463690929e-07, "loss": 0.6182, "step": 30260 }, { "epoch": 0.9274549466715705, "grad_norm": 2.201860932472354, "learning_rate": 1.373780791061824e-07, "loss": 0.5811, "step": 30261 }, { "epoch": 0.9274855951943116, "grad_norm": 1.4007613597535482, "learning_rate": 1.3726255975593018e-07, "loss": 0.6299, "step": 30262 }, { "epoch": 0.9275162437170529, "grad_norm": 0.42685832733714485, "learning_rate": 1.371470883194742e-07, "loss": 0.3847, "step": 30263 }, { "epoch": 0.927546892239794, "grad_norm": 1.3795129000726232, "learning_rate": 1.370316647979525e-07, "loss": 0.5891, "step": 30264 }, { "epoch": 0.9275775407625353, "grad_norm": 1.3101364264707085, "learning_rate": 1.3691628919250245e-07, "loss": 0.5518, "step": 30265 }, { "epoch": 0.9276081892852764, "grad_norm": 1.5313043292283446, "learning_rate": 1.3680096150425925e-07, "loss": 0.6013, "step": 30266 }, { "epoch": 0.9276388378080177, "grad_norm": 1.251173657122435, "learning_rate": 1.366856817343609e-07, "loss": 0.601, "step": 30267 }, { "epoch": 0.9276694863307589, "grad_norm": 1.4382301349936637, "learning_rate": 1.3657044988394376e-07, "loss": 0.6195, "step": 30268 }, { "epoch": 0.9277001348535001, "grad_norm": 1.2912210607269896, "learning_rate": 1.3645526595414126e-07, "loss": 0.6007, "step": 30269 }, { "epoch": 0.9277307833762413, "grad_norm": 1.38893704633908, "learning_rate": 1.363401299460898e-07, "loss": 0.6323, "step": 30270 }, { "epoch": 0.9277614318989824, "grad_norm": 1.4807245624177716, "learning_rate": 1.362250418609229e-07, "loss": 0.5873, "step": 30271 }, { "epoch": 0.9277920804217237, "grad_norm": 0.435366303266923, "learning_rate": 1.361100016997763e-07, "loss": 0.3702, "step": 30272 }, { "epoch": 0.9278227289444648, "grad_norm": 1.5040415023344622, "learning_rate": 1.3599500946378185e-07, "loss": 0.652, "step": 30273 }, { "epoch": 0.9278533774672061, "grad_norm": 1.231074795387686, "learning_rate": 1.3588006515407203e-07, "loss": 0.5368, "step": 30274 }, { "epoch": 0.9278840259899472, "grad_norm": 1.799873179796098, "learning_rate": 1.3576516877178204e-07, "loss": 0.597, "step": 30275 }, { "epoch": 0.9279146745126885, "grad_norm": 1.4336060937809618, "learning_rate": 1.3565032031804205e-07, "loss": 0.5771, "step": 30276 }, { "epoch": 0.9279453230354296, "grad_norm": 1.3346994791662736, "learning_rate": 1.3553551979398339e-07, "loss": 0.6133, "step": 30277 }, { "epoch": 0.9279759715581709, "grad_norm": 1.609304693477678, "learning_rate": 1.3542076720073792e-07, "loss": 0.6194, "step": 30278 }, { "epoch": 0.9280066200809121, "grad_norm": 1.3944465078787502, "learning_rate": 1.3530606253943645e-07, "loss": 0.5874, "step": 30279 }, { "epoch": 0.9280372686036533, "grad_norm": 1.390141822693661, "learning_rate": 1.3519140581120914e-07, "loss": 0.6327, "step": 30280 }, { "epoch": 0.9280679171263945, "grad_norm": 1.2413341807531155, "learning_rate": 1.3507679701718568e-07, "loss": 0.5417, "step": 30281 }, { "epoch": 0.9280985656491357, "grad_norm": 1.3406472042964728, "learning_rate": 1.3496223615849513e-07, "loss": 0.4704, "step": 30282 }, { "epoch": 0.9281292141718769, "grad_norm": 1.2017171267013567, "learning_rate": 1.348477232362666e-07, "loss": 0.5142, "step": 30283 }, { "epoch": 0.9281598626946181, "grad_norm": 1.4917710515927038, "learning_rate": 1.3473325825162864e-07, "loss": 0.5749, "step": 30284 }, { "epoch": 0.9281905112173593, "grad_norm": 0.4266785913557315, "learning_rate": 1.3461884120570756e-07, "loss": 0.3686, "step": 30285 }, { "epoch": 0.9282211597401006, "grad_norm": 1.3540964292511368, "learning_rate": 1.3450447209963303e-07, "loss": 0.6323, "step": 30286 }, { "epoch": 0.9282518082628417, "grad_norm": 1.4738724607371154, "learning_rate": 1.3439015093453078e-07, "loss": 0.6225, "step": 30287 }, { "epoch": 0.928282456785583, "grad_norm": 1.3978800672409752, "learning_rate": 1.342758777115266e-07, "loss": 0.5113, "step": 30288 }, { "epoch": 0.9283131053083241, "grad_norm": 1.2385882528403207, "learning_rate": 1.341616524317474e-07, "loss": 0.6115, "step": 30289 }, { "epoch": 0.9283437538310654, "grad_norm": 1.22672200019601, "learning_rate": 1.3404747509631833e-07, "loss": 0.5491, "step": 30290 }, { "epoch": 0.9283744023538065, "grad_norm": 1.3816747485232481, "learning_rate": 1.3393334570636517e-07, "loss": 0.5732, "step": 30291 }, { "epoch": 0.9284050508765478, "grad_norm": 1.4079598265537008, "learning_rate": 1.3381926426301095e-07, "loss": 0.6238, "step": 30292 }, { "epoch": 0.9284356993992889, "grad_norm": 1.3805392681031308, "learning_rate": 1.3370523076738083e-07, "loss": 0.586, "step": 30293 }, { "epoch": 0.9284663479220302, "grad_norm": 1.4802324212960936, "learning_rate": 1.335912452205984e-07, "loss": 0.6463, "step": 30294 }, { "epoch": 0.9284969964447713, "grad_norm": 1.2400835865978708, "learning_rate": 1.3347730762378664e-07, "loss": 0.5834, "step": 30295 }, { "epoch": 0.9285276449675126, "grad_norm": 1.5239360227974152, "learning_rate": 1.333634179780674e-07, "loss": 0.5884, "step": 30296 }, { "epoch": 0.9285582934902538, "grad_norm": 0.41738719704875143, "learning_rate": 1.3324957628456424e-07, "loss": 0.3785, "step": 30297 }, { "epoch": 0.928588942012995, "grad_norm": 1.323406493585677, "learning_rate": 1.3313578254439796e-07, "loss": 0.5768, "step": 30298 }, { "epoch": 0.9286195905357362, "grad_norm": 1.4309042378123238, "learning_rate": 1.330220367586904e-07, "loss": 0.5907, "step": 30299 }, { "epoch": 0.9286502390584774, "grad_norm": 1.3980344977899795, "learning_rate": 1.329083389285618e-07, "loss": 0.623, "step": 30300 }, { "epoch": 0.9286808875812186, "grad_norm": 1.3882258221104342, "learning_rate": 1.3279468905513237e-07, "loss": 0.6277, "step": 30301 }, { "epoch": 0.9287115361039597, "grad_norm": 2.2271083793191617, "learning_rate": 1.326810871395229e-07, "loss": 0.6171, "step": 30302 }, { "epoch": 0.928742184626701, "grad_norm": 1.4101881849608224, "learning_rate": 1.325675331828513e-07, "loss": 0.6668, "step": 30303 }, { "epoch": 0.9287728331494421, "grad_norm": 1.4488782203866208, "learning_rate": 1.3245402718623678e-07, "loss": 0.5404, "step": 30304 }, { "epoch": 0.9288034816721834, "grad_norm": 0.4625698192536183, "learning_rate": 1.323405691507995e-07, "loss": 0.4091, "step": 30305 }, { "epoch": 0.9288341301949246, "grad_norm": 1.3706872754044612, "learning_rate": 1.3222715907765471e-07, "loss": 0.5712, "step": 30306 }, { "epoch": 0.9288647787176658, "grad_norm": 1.1802801103418674, "learning_rate": 1.321137969679226e-07, "loss": 0.5152, "step": 30307 }, { "epoch": 0.928895427240407, "grad_norm": 1.3679042165231792, "learning_rate": 1.3200048282271727e-07, "loss": 0.6203, "step": 30308 }, { "epoch": 0.9289260757631482, "grad_norm": 1.3471030330708615, "learning_rate": 1.318872166431573e-07, "loss": 0.5977, "step": 30309 }, { "epoch": 0.9289567242858894, "grad_norm": 1.5606267430378011, "learning_rate": 1.3177399843035898e-07, "loss": 0.5619, "step": 30310 }, { "epoch": 0.9289873728086306, "grad_norm": 1.2450592363259803, "learning_rate": 1.316608281854359e-07, "loss": 0.5668, "step": 30311 }, { "epoch": 0.9290180213313718, "grad_norm": 0.43437593465325786, "learning_rate": 1.3154770590950438e-07, "loss": 0.3868, "step": 30312 }, { "epoch": 0.929048669854113, "grad_norm": 1.2208236276539022, "learning_rate": 1.3143463160367964e-07, "loss": 0.509, "step": 30313 }, { "epoch": 0.9290793183768542, "grad_norm": 0.4681821124660042, "learning_rate": 1.3132160526907467e-07, "loss": 0.3976, "step": 30314 }, { "epoch": 0.9291099668995955, "grad_norm": 0.4857817349839512, "learning_rate": 1.3120862690680358e-07, "loss": 0.4013, "step": 30315 }, { "epoch": 0.9291406154223366, "grad_norm": 1.3045677821609338, "learning_rate": 1.3109569651797992e-07, "loss": 0.5863, "step": 30316 }, { "epoch": 0.9291712639450779, "grad_norm": 1.4132523034280078, "learning_rate": 1.3098281410371616e-07, "loss": 0.5623, "step": 30317 }, { "epoch": 0.929201912467819, "grad_norm": 1.365577899000232, "learning_rate": 1.3086997966512472e-07, "loss": 0.6719, "step": 30318 }, { "epoch": 0.9292325609905603, "grad_norm": 1.1761784625114535, "learning_rate": 1.3075719320331636e-07, "loss": 0.5881, "step": 30319 }, { "epoch": 0.9292632095133014, "grad_norm": 1.3373098252847224, "learning_rate": 1.3064445471940358e-07, "loss": 0.5235, "step": 30320 }, { "epoch": 0.9292938580360427, "grad_norm": 1.4391263720175318, "learning_rate": 1.305317642144971e-07, "loss": 0.5921, "step": 30321 }, { "epoch": 0.9293245065587838, "grad_norm": 1.1763229944770655, "learning_rate": 1.3041912168970715e-07, "loss": 0.5193, "step": 30322 }, { "epoch": 0.9293551550815251, "grad_norm": 1.3924458772945936, "learning_rate": 1.303065271461429e-07, "loss": 0.5152, "step": 30323 }, { "epoch": 0.9293858036042663, "grad_norm": 1.6178471691270282, "learning_rate": 1.3019398058491507e-07, "loss": 0.6089, "step": 30324 }, { "epoch": 0.9294164521270075, "grad_norm": 1.6375692299603195, "learning_rate": 1.3008148200713166e-07, "loss": 0.6204, "step": 30325 }, { "epoch": 0.9294471006497487, "grad_norm": 1.14920476289433, "learning_rate": 1.2996903141390127e-07, "loss": 0.5607, "step": 30326 }, { "epoch": 0.9294777491724899, "grad_norm": 1.2813324288125394, "learning_rate": 1.2985662880633243e-07, "loss": 0.5662, "step": 30327 }, { "epoch": 0.9295083976952311, "grad_norm": 1.3157797375842082, "learning_rate": 1.2974427418553205e-07, "loss": 0.5571, "step": 30328 }, { "epoch": 0.9295390462179723, "grad_norm": 1.2704442334582862, "learning_rate": 1.296319675526081e-07, "loss": 0.6621, "step": 30329 }, { "epoch": 0.9295696947407135, "grad_norm": 0.44865221365386815, "learning_rate": 1.2951970890866527e-07, "loss": 0.3698, "step": 30330 }, { "epoch": 0.9296003432634548, "grad_norm": 1.2958914435316538, "learning_rate": 1.294074982548116e-07, "loss": 0.5219, "step": 30331 }, { "epoch": 0.9296309917861959, "grad_norm": 1.299897204262264, "learning_rate": 1.2929533559215223e-07, "loss": 0.5516, "step": 30332 }, { "epoch": 0.929661640308937, "grad_norm": 1.3035351952395757, "learning_rate": 1.2918322092179192e-07, "loss": 0.6233, "step": 30333 }, { "epoch": 0.9296922888316783, "grad_norm": 1.3254309617033992, "learning_rate": 1.290711542448353e-07, "loss": 0.5171, "step": 30334 }, { "epoch": 0.9297229373544195, "grad_norm": 1.5089647715500982, "learning_rate": 1.289591355623876e-07, "loss": 0.5576, "step": 30335 }, { "epoch": 0.9297535858771607, "grad_norm": 1.254351297180728, "learning_rate": 1.2884716487555128e-07, "loss": 0.5858, "step": 30336 }, { "epoch": 0.9297842343999019, "grad_norm": 1.720091227995536, "learning_rate": 1.2873524218542988e-07, "loss": 0.6291, "step": 30337 }, { "epoch": 0.9298148829226431, "grad_norm": 1.2406764312047796, "learning_rate": 1.2862336749312753e-07, "loss": 0.6372, "step": 30338 }, { "epoch": 0.9298455314453843, "grad_norm": 1.3596985818907936, "learning_rate": 1.2851154079974448e-07, "loss": 0.6846, "step": 30339 }, { "epoch": 0.9298761799681255, "grad_norm": 0.44586488681124575, "learning_rate": 1.2839976210638482e-07, "loss": 0.3833, "step": 30340 }, { "epoch": 0.9299068284908667, "grad_norm": 1.2292978148218763, "learning_rate": 1.282880314141477e-07, "loss": 0.555, "step": 30341 }, { "epoch": 0.929937477013608, "grad_norm": 1.6721667511933214, "learning_rate": 1.2817634872413555e-07, "loss": 0.6533, "step": 30342 }, { "epoch": 0.9299681255363491, "grad_norm": 0.44964692010228297, "learning_rate": 1.2806471403744857e-07, "loss": 0.4, "step": 30343 }, { "epoch": 0.9299987740590904, "grad_norm": 1.577220231771646, "learning_rate": 1.2795312735518595e-07, "loss": 0.6322, "step": 30344 }, { "epoch": 0.9300294225818315, "grad_norm": 1.389564785204445, "learning_rate": 1.2784158867844788e-07, "loss": 0.4855, "step": 30345 }, { "epoch": 0.9300600711045728, "grad_norm": 1.3411335870230388, "learning_rate": 1.2773009800833346e-07, "loss": 0.6173, "step": 30346 }, { "epoch": 0.9300907196273139, "grad_norm": 1.2300431985186637, "learning_rate": 1.2761865534594077e-07, "loss": 0.5222, "step": 30347 }, { "epoch": 0.9301213681500552, "grad_norm": 1.2882404211931904, "learning_rate": 1.2750726069236774e-07, "loss": 0.5516, "step": 30348 }, { "epoch": 0.9301520166727963, "grad_norm": 0.46718865595119374, "learning_rate": 1.27395914048713e-07, "loss": 0.4, "step": 30349 }, { "epoch": 0.9301826651955376, "grad_norm": 0.4485939197676016, "learning_rate": 1.2728461541607285e-07, "loss": 0.4108, "step": 30350 }, { "epoch": 0.9302133137182788, "grad_norm": 1.3046578227624426, "learning_rate": 1.2717336479554422e-07, "loss": 0.5545, "step": 30351 }, { "epoch": 0.93024396224102, "grad_norm": 0.45517197994379593, "learning_rate": 1.2706216218822343e-07, "loss": 0.3832, "step": 30352 }, { "epoch": 0.9302746107637612, "grad_norm": 1.393327063615557, "learning_rate": 1.269510075952052e-07, "loss": 0.5913, "step": 30353 }, { "epoch": 0.9303052592865024, "grad_norm": 0.4325423335490728, "learning_rate": 1.2683990101758636e-07, "loss": 0.3809, "step": 30354 }, { "epoch": 0.9303359078092436, "grad_norm": 1.29505278293586, "learning_rate": 1.2672884245646e-07, "loss": 0.5941, "step": 30355 }, { "epoch": 0.9303665563319848, "grad_norm": 1.4384438230746333, "learning_rate": 1.266178319129213e-07, "loss": 0.7147, "step": 30356 }, { "epoch": 0.930397204854726, "grad_norm": 1.38853757765427, "learning_rate": 1.26506869388065e-07, "loss": 0.6047, "step": 30357 }, { "epoch": 0.9304278533774673, "grad_norm": 0.4621246993238706, "learning_rate": 1.263959548829824e-07, "loss": 0.4063, "step": 30358 }, { "epoch": 0.9304585019002084, "grad_norm": 1.3352060125096008, "learning_rate": 1.2628508839876818e-07, "loss": 0.5769, "step": 30359 }, { "epoch": 0.9304891504229497, "grad_norm": 1.5115442904640704, "learning_rate": 1.2617426993651372e-07, "loss": 0.5973, "step": 30360 }, { "epoch": 0.9305197989456908, "grad_norm": 1.52255865611858, "learning_rate": 1.260634994973109e-07, "loss": 0.5682, "step": 30361 }, { "epoch": 0.9305504474684321, "grad_norm": 1.3913656888508523, "learning_rate": 1.259527770822516e-07, "loss": 0.5707, "step": 30362 }, { "epoch": 0.9305810959911732, "grad_norm": 1.50732719732629, "learning_rate": 1.258421026924267e-07, "loss": 0.4941, "step": 30363 }, { "epoch": 0.9306117445139144, "grad_norm": 1.1929936442896165, "learning_rate": 1.2573147632892635e-07, "loss": 0.5332, "step": 30364 }, { "epoch": 0.9306423930366556, "grad_norm": 0.4403532664999145, "learning_rate": 1.2562089799284195e-07, "loss": 0.3973, "step": 30365 }, { "epoch": 0.9306730415593968, "grad_norm": 1.2472393497590184, "learning_rate": 1.2551036768526093e-07, "loss": 0.554, "step": 30366 }, { "epoch": 0.930703690082138, "grad_norm": 1.5119563962237696, "learning_rate": 1.2539988540727354e-07, "loss": 0.5843, "step": 30367 }, { "epoch": 0.9307343386048792, "grad_norm": 1.3054330482380592, "learning_rate": 1.2528945115996892e-07, "loss": 0.5505, "step": 30368 }, { "epoch": 0.9307649871276205, "grad_norm": 1.540980412383767, "learning_rate": 1.2517906494443399e-07, "loss": 0.5562, "step": 30369 }, { "epoch": 0.9307956356503616, "grad_norm": 1.260257087850475, "learning_rate": 1.2506872676175786e-07, "loss": 0.513, "step": 30370 }, { "epoch": 0.9308262841731029, "grad_norm": 1.2133855771873634, "learning_rate": 1.2495843661302577e-07, "loss": 0.6192, "step": 30371 }, { "epoch": 0.930856932695844, "grad_norm": 1.4318380554310184, "learning_rate": 1.248481944993263e-07, "loss": 0.6066, "step": 30372 }, { "epoch": 0.9308875812185853, "grad_norm": 0.42995888669094995, "learning_rate": 1.2473800042174465e-07, "loss": 0.3909, "step": 30373 }, { "epoch": 0.9309182297413264, "grad_norm": 1.3106216879000296, "learning_rate": 1.246278543813667e-07, "loss": 0.5252, "step": 30374 }, { "epoch": 0.9309488782640677, "grad_norm": 1.376016265709694, "learning_rate": 1.245177563792782e-07, "loss": 0.6431, "step": 30375 }, { "epoch": 0.9309795267868088, "grad_norm": 1.3611488968578214, "learning_rate": 1.2440770641656385e-07, "loss": 0.5836, "step": 30376 }, { "epoch": 0.9310101753095501, "grad_norm": 0.45835025216144976, "learning_rate": 1.2429770449430723e-07, "loss": 0.391, "step": 30377 }, { "epoch": 0.9310408238322913, "grad_norm": 1.3835665125620151, "learning_rate": 1.2418775061359302e-07, "loss": 0.5905, "step": 30378 }, { "epoch": 0.9310714723550325, "grad_norm": 1.560595878974308, "learning_rate": 1.2407784477550477e-07, "loss": 0.5961, "step": 30379 }, { "epoch": 0.9311021208777737, "grad_norm": 1.583532028061345, "learning_rate": 1.23967986981125e-07, "loss": 0.5625, "step": 30380 }, { "epoch": 0.9311327694005149, "grad_norm": 1.3134670817961038, "learning_rate": 1.238581772315367e-07, "loss": 0.6221, "step": 30381 }, { "epoch": 0.9311634179232561, "grad_norm": 1.4814355330143008, "learning_rate": 1.2374841552782014e-07, "loss": 0.6023, "step": 30382 }, { "epoch": 0.9311940664459973, "grad_norm": 0.4302944219365178, "learning_rate": 1.2363870187105886e-07, "loss": 0.3638, "step": 30383 }, { "epoch": 0.9312247149687385, "grad_norm": 0.43935666889397573, "learning_rate": 1.235290362623337e-07, "loss": 0.4002, "step": 30384 }, { "epoch": 0.9312553634914797, "grad_norm": 1.352015907067239, "learning_rate": 1.234194187027238e-07, "loss": 0.6503, "step": 30385 }, { "epoch": 0.9312860120142209, "grad_norm": 0.4429966675851945, "learning_rate": 1.233098491933099e-07, "loss": 0.3912, "step": 30386 }, { "epoch": 0.9313166605369622, "grad_norm": 1.2706146452155167, "learning_rate": 1.2320032773517233e-07, "loss": 0.5092, "step": 30387 }, { "epoch": 0.9313473090597033, "grad_norm": 0.44692276899551525, "learning_rate": 1.2309085432938962e-07, "loss": 0.3742, "step": 30388 }, { "epoch": 0.9313779575824446, "grad_norm": 1.3330364875627012, "learning_rate": 1.2298142897704035e-07, "loss": 0.5187, "step": 30389 }, { "epoch": 0.9314086061051857, "grad_norm": 1.3709402734807412, "learning_rate": 1.2287205167920313e-07, "loss": 0.5395, "step": 30390 }, { "epoch": 0.931439254627927, "grad_norm": 0.46149612349849184, "learning_rate": 1.2276272243695542e-07, "loss": 0.4175, "step": 30391 }, { "epoch": 0.9314699031506681, "grad_norm": 1.4047272836901208, "learning_rate": 1.2265344125137467e-07, "loss": 0.6001, "step": 30392 }, { "epoch": 0.9315005516734094, "grad_norm": 1.171550502484577, "learning_rate": 1.2254420812353674e-07, "loss": 0.5482, "step": 30393 }, { "epoch": 0.9315312001961505, "grad_norm": 0.4495876656775597, "learning_rate": 1.224350230545196e-07, "loss": 0.3907, "step": 30394 }, { "epoch": 0.9315618487188917, "grad_norm": 1.3202751663789942, "learning_rate": 1.2232588604539796e-07, "loss": 0.5802, "step": 30395 }, { "epoch": 0.931592497241633, "grad_norm": 1.3289304869678809, "learning_rate": 1.222167970972471e-07, "loss": 0.6208, "step": 30396 }, { "epoch": 0.9316231457643741, "grad_norm": 0.4359212680312149, "learning_rate": 1.2210775621114224e-07, "loss": 0.3684, "step": 30397 }, { "epoch": 0.9316537942871154, "grad_norm": 1.4485995633467894, "learning_rate": 1.2199876338815753e-07, "loss": 0.5402, "step": 30398 }, { "epoch": 0.9316844428098565, "grad_norm": 1.3880031665907944, "learning_rate": 1.2188981862936765e-07, "loss": 0.5449, "step": 30399 }, { "epoch": 0.9317150913325978, "grad_norm": 1.2884635345020445, "learning_rate": 1.2178092193584457e-07, "loss": 0.6278, "step": 30400 }, { "epoch": 0.9317457398553389, "grad_norm": 1.3238510171471596, "learning_rate": 1.2167207330866294e-07, "loss": 0.6254, "step": 30401 }, { "epoch": 0.9317763883780802, "grad_norm": 0.43672860074113307, "learning_rate": 1.2156327274889467e-07, "loss": 0.3711, "step": 30402 }, { "epoch": 0.9318070369008213, "grad_norm": 1.3666992410197218, "learning_rate": 1.2145452025761173e-07, "loss": 0.6138, "step": 30403 }, { "epoch": 0.9318376854235626, "grad_norm": 1.4294421709368883, "learning_rate": 1.213458158358849e-07, "loss": 0.5543, "step": 30404 }, { "epoch": 0.9318683339463038, "grad_norm": 1.4431407825071905, "learning_rate": 1.2123715948478608e-07, "loss": 0.553, "step": 30405 }, { "epoch": 0.931898982469045, "grad_norm": 1.2151348686505596, "learning_rate": 1.2112855120538613e-07, "loss": 0.538, "step": 30406 }, { "epoch": 0.9319296309917862, "grad_norm": 1.43108438935017, "learning_rate": 1.2101999099875529e-07, "loss": 0.564, "step": 30407 }, { "epoch": 0.9319602795145274, "grad_norm": 1.4288048655618568, "learning_rate": 1.209114788659621e-07, "loss": 0.6128, "step": 30408 }, { "epoch": 0.9319909280372686, "grad_norm": 1.443023829933125, "learning_rate": 1.2080301480807687e-07, "loss": 0.6143, "step": 30409 }, { "epoch": 0.9320215765600098, "grad_norm": 0.45775433713439556, "learning_rate": 1.2069459882616818e-07, "loss": 0.3762, "step": 30410 }, { "epoch": 0.932052225082751, "grad_norm": 1.42885154053452, "learning_rate": 1.2058623092130406e-07, "loss": 0.6205, "step": 30411 }, { "epoch": 0.9320828736054922, "grad_norm": 1.3745904559381419, "learning_rate": 1.2047791109455087e-07, "loss": 0.5645, "step": 30412 }, { "epoch": 0.9321135221282334, "grad_norm": 1.378759930735771, "learning_rate": 1.2036963934697887e-07, "loss": 0.5691, "step": 30413 }, { "epoch": 0.9321441706509747, "grad_norm": 1.3601227298512695, "learning_rate": 1.2026141567965332e-07, "loss": 0.6044, "step": 30414 }, { "epoch": 0.9321748191737158, "grad_norm": 1.3564406318365088, "learning_rate": 1.201532400936395e-07, "loss": 0.6587, "step": 30415 }, { "epoch": 0.9322054676964571, "grad_norm": 1.4207879658329545, "learning_rate": 1.2004511259000485e-07, "loss": 0.5945, "step": 30416 }, { "epoch": 0.9322361162191982, "grad_norm": 1.4599527766839093, "learning_rate": 1.1993703316981408e-07, "loss": 0.6684, "step": 30417 }, { "epoch": 0.9322667647419395, "grad_norm": 1.3175815783299116, "learning_rate": 1.1982900183413304e-07, "loss": 0.5818, "step": 30418 }, { "epoch": 0.9322974132646806, "grad_norm": 1.206654008913038, "learning_rate": 1.1972101858402475e-07, "loss": 0.5732, "step": 30419 }, { "epoch": 0.9323280617874219, "grad_norm": 1.3832979031850496, "learning_rate": 1.1961308342055444e-07, "loss": 0.6956, "step": 30420 }, { "epoch": 0.932358710310163, "grad_norm": 1.2764468741912094, "learning_rate": 1.195051963447852e-07, "loss": 0.6477, "step": 30421 }, { "epoch": 0.9323893588329043, "grad_norm": 1.1889235037089352, "learning_rate": 1.1939735735778e-07, "loss": 0.5421, "step": 30422 }, { "epoch": 0.9324200073556455, "grad_norm": 1.3299803780979387, "learning_rate": 1.192895664606003e-07, "loss": 0.5157, "step": 30423 }, { "epoch": 0.9324506558783867, "grad_norm": 1.4251980578307868, "learning_rate": 1.1918182365431075e-07, "loss": 0.6014, "step": 30424 }, { "epoch": 0.9324813044011279, "grad_norm": 0.4773198351255791, "learning_rate": 1.190741289399705e-07, "loss": 0.4002, "step": 30425 }, { "epoch": 0.932511952923869, "grad_norm": 1.2608857078440592, "learning_rate": 1.1896648231864205e-07, "loss": 0.5823, "step": 30426 }, { "epoch": 0.9325426014466103, "grad_norm": 0.459248468786245, "learning_rate": 1.1885888379138565e-07, "loss": 0.3929, "step": 30427 }, { "epoch": 0.9325732499693514, "grad_norm": 1.3381056478688114, "learning_rate": 1.1875133335926159e-07, "loss": 0.5881, "step": 30428 }, { "epoch": 0.9326038984920927, "grad_norm": 1.6629243950688017, "learning_rate": 1.186438310233301e-07, "loss": 0.501, "step": 30429 }, { "epoch": 0.9326345470148338, "grad_norm": 1.5689455389729048, "learning_rate": 1.1853637678464925e-07, "loss": 0.5812, "step": 30430 }, { "epoch": 0.9326651955375751, "grad_norm": 0.45094635070560396, "learning_rate": 1.1842897064427816e-07, "loss": 0.374, "step": 30431 }, { "epoch": 0.9326958440603162, "grad_norm": 1.4368024400947514, "learning_rate": 1.1832161260327602e-07, "loss": 0.5566, "step": 30432 }, { "epoch": 0.9327264925830575, "grad_norm": 1.3105178591372395, "learning_rate": 1.1821430266269973e-07, "loss": 0.6521, "step": 30433 }, { "epoch": 0.9327571411057987, "grad_norm": 0.4431653498712349, "learning_rate": 1.1810704082360735e-07, "loss": 0.3801, "step": 30434 }, { "epoch": 0.9327877896285399, "grad_norm": 1.354192921136615, "learning_rate": 1.1799982708705526e-07, "loss": 0.5578, "step": 30435 }, { "epoch": 0.9328184381512811, "grad_norm": 1.186696603027941, "learning_rate": 1.1789266145409984e-07, "loss": 0.5483, "step": 30436 }, { "epoch": 0.9328490866740223, "grad_norm": 1.3833742794287258, "learning_rate": 1.1778554392579745e-07, "loss": 0.5389, "step": 30437 }, { "epoch": 0.9328797351967635, "grad_norm": 1.2894767927570951, "learning_rate": 1.176784745032028e-07, "loss": 0.6454, "step": 30438 }, { "epoch": 0.9329103837195047, "grad_norm": 1.2717518340118958, "learning_rate": 1.1757145318737172e-07, "loss": 0.5452, "step": 30439 }, { "epoch": 0.9329410322422459, "grad_norm": 1.2971587865784528, "learning_rate": 1.174644799793584e-07, "loss": 0.6104, "step": 30440 }, { "epoch": 0.9329716807649872, "grad_norm": 0.43614179010013127, "learning_rate": 1.1735755488021638e-07, "loss": 0.3931, "step": 30441 }, { "epoch": 0.9330023292877283, "grad_norm": 1.3338030169420863, "learning_rate": 1.1725067789099986e-07, "loss": 0.6205, "step": 30442 }, { "epoch": 0.9330329778104696, "grad_norm": 1.4434428956822853, "learning_rate": 1.1714384901276188e-07, "loss": 0.6135, "step": 30443 }, { "epoch": 0.9330636263332107, "grad_norm": 1.1992303823497412, "learning_rate": 1.1703706824655492e-07, "loss": 0.502, "step": 30444 }, { "epoch": 0.933094274855952, "grad_norm": 1.4386121183350469, "learning_rate": 1.1693033559343037e-07, "loss": 0.6531, "step": 30445 }, { "epoch": 0.9331249233786931, "grad_norm": 1.4099084473210755, "learning_rate": 1.1682365105444182e-07, "loss": 0.5469, "step": 30446 }, { "epoch": 0.9331555719014344, "grad_norm": 1.3532312074787125, "learning_rate": 1.1671701463063845e-07, "loss": 0.6184, "step": 30447 }, { "epoch": 0.9331862204241755, "grad_norm": 1.4989627623298167, "learning_rate": 1.1661042632307218e-07, "loss": 0.6321, "step": 30448 }, { "epoch": 0.9332168689469168, "grad_norm": 0.43745477547068534, "learning_rate": 1.1650388613279218e-07, "loss": 0.38, "step": 30449 }, { "epoch": 0.933247517469658, "grad_norm": 1.300337844896703, "learning_rate": 1.1639739406084927e-07, "loss": 0.6339, "step": 30450 }, { "epoch": 0.9332781659923992, "grad_norm": 1.2975543627211203, "learning_rate": 1.1629095010829262e-07, "loss": 0.5508, "step": 30451 }, { "epoch": 0.9333088145151404, "grad_norm": 1.1742468300698998, "learning_rate": 1.1618455427617026e-07, "loss": 0.5125, "step": 30452 }, { "epoch": 0.9333394630378816, "grad_norm": 1.2779585804525098, "learning_rate": 1.1607820656553138e-07, "loss": 0.5428, "step": 30453 }, { "epoch": 0.9333701115606228, "grad_norm": 1.3672894556415305, "learning_rate": 1.1597190697742344e-07, "loss": 0.5607, "step": 30454 }, { "epoch": 0.933400760083364, "grad_norm": 1.4737472379202992, "learning_rate": 1.1586565551289397e-07, "loss": 0.6338, "step": 30455 }, { "epoch": 0.9334314086061052, "grad_norm": 0.4586644841059018, "learning_rate": 1.1575945217299045e-07, "loss": 0.3889, "step": 30456 }, { "epoch": 0.9334620571288463, "grad_norm": 1.5930240472832298, "learning_rate": 1.156532969587576e-07, "loss": 0.6475, "step": 30457 }, { "epoch": 0.9334927056515876, "grad_norm": 1.3785071324600102, "learning_rate": 1.1554718987124292e-07, "loss": 0.6451, "step": 30458 }, { "epoch": 0.9335233541743287, "grad_norm": 0.45564237952248454, "learning_rate": 1.1544113091149222e-07, "loss": 0.3909, "step": 30459 }, { "epoch": 0.93355400269707, "grad_norm": 1.321955909393759, "learning_rate": 1.1533512008054915e-07, "loss": 0.6285, "step": 30460 }, { "epoch": 0.9335846512198112, "grad_norm": 0.43039164923453005, "learning_rate": 1.1522915737945895e-07, "loss": 0.3984, "step": 30461 }, { "epoch": 0.9336152997425524, "grad_norm": 1.3157349844752444, "learning_rate": 1.1512324280926635e-07, "loss": 0.5704, "step": 30462 }, { "epoch": 0.9336459482652936, "grad_norm": 1.2983162827652712, "learning_rate": 1.1501737637101329e-07, "loss": 0.6192, "step": 30463 }, { "epoch": 0.9336765967880348, "grad_norm": 1.3928861994566584, "learning_rate": 1.149115580657445e-07, "loss": 0.6071, "step": 30464 }, { "epoch": 0.933707245310776, "grad_norm": 0.4820444532214326, "learning_rate": 1.1480578789450192e-07, "loss": 0.3928, "step": 30465 }, { "epoch": 0.9337378938335172, "grad_norm": 1.3256476318825425, "learning_rate": 1.147000658583275e-07, "loss": 0.61, "step": 30466 }, { "epoch": 0.9337685423562584, "grad_norm": 2.1244610351626494, "learning_rate": 1.1459439195826427e-07, "loss": 0.6419, "step": 30467 }, { "epoch": 0.9337991908789997, "grad_norm": 1.1719630315241845, "learning_rate": 1.1448876619535143e-07, "loss": 0.5707, "step": 30468 }, { "epoch": 0.9338298394017408, "grad_norm": 1.4232865462592839, "learning_rate": 1.1438318857063091e-07, "loss": 0.5971, "step": 30469 }, { "epoch": 0.9338604879244821, "grad_norm": 1.4789673120481273, "learning_rate": 1.1427765908514355e-07, "loss": 0.6048, "step": 30470 }, { "epoch": 0.9338911364472232, "grad_norm": 1.2100749672420548, "learning_rate": 1.1417217773992795e-07, "loss": 0.634, "step": 30471 }, { "epoch": 0.9339217849699645, "grad_norm": 1.511314404352276, "learning_rate": 1.1406674453602385e-07, "loss": 0.693, "step": 30472 }, { "epoch": 0.9339524334927056, "grad_norm": 0.4563332625626126, "learning_rate": 1.1396135947447096e-07, "loss": 0.3994, "step": 30473 }, { "epoch": 0.9339830820154469, "grad_norm": 1.2445503435943714, "learning_rate": 1.1385602255630624e-07, "loss": 0.5702, "step": 30474 }, { "epoch": 0.934013730538188, "grad_norm": 1.2488864812007878, "learning_rate": 1.137507337825683e-07, "loss": 0.5952, "step": 30475 }, { "epoch": 0.9340443790609293, "grad_norm": 1.3840073959755494, "learning_rate": 1.1364549315429519e-07, "loss": 0.6048, "step": 30476 }, { "epoch": 0.9340750275836704, "grad_norm": 1.2582541270621794, "learning_rate": 1.1354030067252275e-07, "loss": 0.6382, "step": 30477 }, { "epoch": 0.9341056761064117, "grad_norm": 1.555194521093418, "learning_rate": 1.134351563382885e-07, "loss": 0.5572, "step": 30478 }, { "epoch": 0.9341363246291529, "grad_norm": 1.4479409180462932, "learning_rate": 1.1333006015262659e-07, "loss": 0.5949, "step": 30479 }, { "epoch": 0.9341669731518941, "grad_norm": 1.233318806578626, "learning_rate": 1.1322501211657566e-07, "loss": 0.5355, "step": 30480 }, { "epoch": 0.9341976216746353, "grad_norm": 1.2410825768020504, "learning_rate": 1.131200122311682e-07, "loss": 0.4902, "step": 30481 }, { "epoch": 0.9342282701973765, "grad_norm": 1.361377635384814, "learning_rate": 1.1301506049743949e-07, "loss": 0.6405, "step": 30482 }, { "epoch": 0.9342589187201177, "grad_norm": 0.45567307709680965, "learning_rate": 1.129101569164237e-07, "loss": 0.396, "step": 30483 }, { "epoch": 0.9342895672428589, "grad_norm": 1.1875131036636706, "learning_rate": 1.1280530148915503e-07, "loss": 0.6039, "step": 30484 }, { "epoch": 0.9343202157656001, "grad_norm": 1.3559994268628286, "learning_rate": 1.1270049421666596e-07, "loss": 0.6029, "step": 30485 }, { "epoch": 0.9343508642883414, "grad_norm": 1.2794074588950846, "learning_rate": 1.12595735099989e-07, "loss": 0.5939, "step": 30486 }, { "epoch": 0.9343815128110825, "grad_norm": 2.5852350628006415, "learning_rate": 1.1249102414015722e-07, "loss": 0.6864, "step": 30487 }, { "epoch": 0.9344121613338237, "grad_norm": 0.4309185815708173, "learning_rate": 1.1238636133820202e-07, "loss": 0.3803, "step": 30488 }, { "epoch": 0.9344428098565649, "grad_norm": 1.465890058062867, "learning_rate": 1.122817466951548e-07, "loss": 0.5349, "step": 30489 }, { "epoch": 0.9344734583793061, "grad_norm": 1.33290182303341, "learning_rate": 1.1217718021204471e-07, "loss": 0.6805, "step": 30490 }, { "epoch": 0.9345041069020473, "grad_norm": 0.4270492537454393, "learning_rate": 1.1207266188990485e-07, "loss": 0.3924, "step": 30491 }, { "epoch": 0.9345347554247885, "grad_norm": 1.21716746907278, "learning_rate": 1.1196819172976325e-07, "loss": 0.6038, "step": 30492 }, { "epoch": 0.9345654039475297, "grad_norm": 1.2839228918738863, "learning_rate": 1.1186376973264967e-07, "loss": 0.6088, "step": 30493 }, { "epoch": 0.9345960524702709, "grad_norm": 1.37343913292661, "learning_rate": 1.1175939589959328e-07, "loss": 0.5751, "step": 30494 }, { "epoch": 0.9346267009930121, "grad_norm": 1.3677528412402036, "learning_rate": 1.1165507023162214e-07, "loss": 0.6115, "step": 30495 }, { "epoch": 0.9346573495157533, "grad_norm": 1.3712322826627816, "learning_rate": 1.1155079272976432e-07, "loss": 0.5866, "step": 30496 }, { "epoch": 0.9346879980384946, "grad_norm": 0.44567976908683166, "learning_rate": 1.1144656339504678e-07, "loss": 0.41, "step": 30497 }, { "epoch": 0.9347186465612357, "grad_norm": 1.247710356569453, "learning_rate": 1.1134238222849758e-07, "loss": 0.5248, "step": 30498 }, { "epoch": 0.934749295083977, "grad_norm": 1.2976026049405904, "learning_rate": 1.1123824923114257e-07, "loss": 0.5167, "step": 30499 }, { "epoch": 0.9347799436067181, "grad_norm": 1.469175872252538, "learning_rate": 1.1113416440400815e-07, "loss": 0.6105, "step": 30500 }, { "epoch": 0.9348105921294594, "grad_norm": 1.4069121711576218, "learning_rate": 1.1103012774811961e-07, "loss": 0.607, "step": 30501 }, { "epoch": 0.9348412406522005, "grad_norm": 1.2805730787961858, "learning_rate": 1.1092613926450169e-07, "loss": 0.5276, "step": 30502 }, { "epoch": 0.9348718891749418, "grad_norm": 1.4433121461810774, "learning_rate": 1.1082219895418022e-07, "loss": 0.6359, "step": 30503 }, { "epoch": 0.934902537697683, "grad_norm": 1.3770910785140131, "learning_rate": 1.1071830681817775e-07, "loss": 0.5407, "step": 30504 }, { "epoch": 0.9349331862204242, "grad_norm": 0.4472886892849539, "learning_rate": 1.1061446285751897e-07, "loss": 0.391, "step": 30505 }, { "epoch": 0.9349638347431654, "grad_norm": 0.4678256984749384, "learning_rate": 1.1051066707322699e-07, "loss": 0.3987, "step": 30506 }, { "epoch": 0.9349944832659066, "grad_norm": 1.380572044573409, "learning_rate": 1.1040691946632487e-07, "loss": 0.6038, "step": 30507 }, { "epoch": 0.9350251317886478, "grad_norm": 1.491373514024837, "learning_rate": 1.1030322003783456e-07, "loss": 0.578, "step": 30508 }, { "epoch": 0.935055780311389, "grad_norm": 0.4551189148675508, "learning_rate": 1.1019956878877635e-07, "loss": 0.3932, "step": 30509 }, { "epoch": 0.9350864288341302, "grad_norm": 0.4585238256365826, "learning_rate": 1.1009596572017445e-07, "loss": 0.3931, "step": 30510 }, { "epoch": 0.9351170773568714, "grad_norm": 1.2502483557200341, "learning_rate": 1.0999241083304746e-07, "loss": 0.5859, "step": 30511 }, { "epoch": 0.9351477258796126, "grad_norm": 1.3308288952210336, "learning_rate": 1.0988890412841624e-07, "loss": 0.5968, "step": 30512 }, { "epoch": 0.9351783744023539, "grad_norm": 1.2745053418600143, "learning_rate": 1.0978544560730109e-07, "loss": 0.5365, "step": 30513 }, { "epoch": 0.935209022925095, "grad_norm": 1.4112764434932967, "learning_rate": 1.0968203527072119e-07, "loss": 0.6592, "step": 30514 }, { "epoch": 0.9352396714478363, "grad_norm": 1.2375245461483875, "learning_rate": 1.0957867311969516e-07, "loss": 0.6287, "step": 30515 }, { "epoch": 0.9352703199705774, "grad_norm": 1.3437699290498908, "learning_rate": 1.0947535915524166e-07, "loss": 0.5588, "step": 30516 }, { "epoch": 0.9353009684933187, "grad_norm": 1.456131709633483, "learning_rate": 1.0937209337837872e-07, "loss": 0.6445, "step": 30517 }, { "epoch": 0.9353316170160598, "grad_norm": 1.3406123383313244, "learning_rate": 1.0926887579012446e-07, "loss": 0.604, "step": 30518 }, { "epoch": 0.935362265538801, "grad_norm": 1.2641151555729444, "learning_rate": 1.0916570639149526e-07, "loss": 0.6519, "step": 30519 }, { "epoch": 0.9353929140615422, "grad_norm": 0.46417910685457864, "learning_rate": 1.0906258518350698e-07, "loss": 0.3915, "step": 30520 }, { "epoch": 0.9354235625842834, "grad_norm": 1.2561942438955924, "learning_rate": 1.0895951216717715e-07, "loss": 0.5492, "step": 30521 }, { "epoch": 0.9354542111070246, "grad_norm": 1.3420370837447162, "learning_rate": 1.0885648734352105e-07, "loss": 0.5959, "step": 30522 }, { "epoch": 0.9354848596297658, "grad_norm": 1.3596133873048875, "learning_rate": 1.0875351071355234e-07, "loss": 0.6321, "step": 30523 }, { "epoch": 0.9355155081525071, "grad_norm": 1.3018472310114162, "learning_rate": 1.086505822782874e-07, "loss": 0.5597, "step": 30524 }, { "epoch": 0.9355461566752482, "grad_norm": 1.3542484183978316, "learning_rate": 1.0854770203873932e-07, "loss": 0.6127, "step": 30525 }, { "epoch": 0.9355768051979895, "grad_norm": 1.5173581781383894, "learning_rate": 1.0844486999592285e-07, "loss": 0.6707, "step": 30526 }, { "epoch": 0.9356074537207306, "grad_norm": 1.3340388840705393, "learning_rate": 1.0834208615084996e-07, "loss": 0.6172, "step": 30527 }, { "epoch": 0.9356381022434719, "grad_norm": 1.2614654365700382, "learning_rate": 1.0823935050453427e-07, "loss": 0.5524, "step": 30528 }, { "epoch": 0.935668750766213, "grad_norm": 1.3437618398139146, "learning_rate": 1.081366630579883e-07, "loss": 0.6655, "step": 30529 }, { "epoch": 0.9356993992889543, "grad_norm": 1.5243335995892453, "learning_rate": 1.080340238122235e-07, "loss": 0.7303, "step": 30530 }, { "epoch": 0.9357300478116954, "grad_norm": 1.458023537850623, "learning_rate": 1.0793143276825068e-07, "loss": 0.6044, "step": 30531 }, { "epoch": 0.9357606963344367, "grad_norm": 1.2003475588283221, "learning_rate": 1.0782888992708129e-07, "loss": 0.635, "step": 30532 }, { "epoch": 0.9357913448571779, "grad_norm": 0.4328347162421286, "learning_rate": 1.0772639528972562e-07, "loss": 0.3979, "step": 30533 }, { "epoch": 0.9358219933799191, "grad_norm": 0.4381136006642053, "learning_rate": 1.076239488571934e-07, "loss": 0.3794, "step": 30534 }, { "epoch": 0.9358526419026603, "grad_norm": 1.450316856809233, "learning_rate": 1.0752155063049441e-07, "loss": 0.5915, "step": 30535 }, { "epoch": 0.9358832904254015, "grad_norm": 1.1716685278560872, "learning_rate": 1.0741920061063671e-07, "loss": 0.5649, "step": 30536 }, { "epoch": 0.9359139389481427, "grad_norm": 1.6976985609384354, "learning_rate": 1.0731689879863061e-07, "loss": 0.5487, "step": 30537 }, { "epoch": 0.9359445874708839, "grad_norm": 0.44219034950957437, "learning_rate": 1.0721464519548198e-07, "loss": 0.3778, "step": 30538 }, { "epoch": 0.9359752359936251, "grad_norm": 1.2190452029925294, "learning_rate": 1.0711243980219943e-07, "loss": 0.5772, "step": 30539 }, { "epoch": 0.9360058845163663, "grad_norm": 1.3783727686569318, "learning_rate": 1.0701028261979052e-07, "loss": 0.6477, "step": 30540 }, { "epoch": 0.9360365330391075, "grad_norm": 1.4876969657886376, "learning_rate": 1.0690817364926109e-07, "loss": 0.6349, "step": 30541 }, { "epoch": 0.9360671815618488, "grad_norm": 1.3779274284462009, "learning_rate": 1.0680611289161647e-07, "loss": 0.5657, "step": 30542 }, { "epoch": 0.9360978300845899, "grad_norm": 1.3787835342242127, "learning_rate": 1.0670410034786416e-07, "loss": 0.6093, "step": 30543 }, { "epoch": 0.9361284786073312, "grad_norm": 1.50764276546536, "learning_rate": 1.0660213601900782e-07, "loss": 0.6406, "step": 30544 }, { "epoch": 0.9361591271300723, "grad_norm": 1.3382754862206123, "learning_rate": 1.065002199060533e-07, "loss": 0.6179, "step": 30545 }, { "epoch": 0.9361897756528136, "grad_norm": 1.303395138538911, "learning_rate": 1.0639835201000315e-07, "loss": 0.6161, "step": 30546 }, { "epoch": 0.9362204241755547, "grad_norm": 1.2884942761839222, "learning_rate": 1.0629653233186265e-07, "loss": 0.5789, "step": 30547 }, { "epoch": 0.936251072698296, "grad_norm": 1.3460179280433369, "learning_rate": 1.0619476087263491e-07, "loss": 0.5688, "step": 30548 }, { "epoch": 0.9362817212210371, "grad_norm": 1.2864354435016427, "learning_rate": 1.060930376333219e-07, "loss": 0.5782, "step": 30549 }, { "epoch": 0.9363123697437783, "grad_norm": 1.2872459228244526, "learning_rate": 1.0599136261492615e-07, "loss": 0.6367, "step": 30550 }, { "epoch": 0.9363430182665196, "grad_norm": 1.5098386542359956, "learning_rate": 1.0588973581845019e-07, "loss": 0.5617, "step": 30551 }, { "epoch": 0.9363736667892607, "grad_norm": 0.4654217840681583, "learning_rate": 1.0578815724489489e-07, "loss": 0.3857, "step": 30552 }, { "epoch": 0.936404315312002, "grad_norm": 1.3286479369601865, "learning_rate": 1.056866268952611e-07, "loss": 0.6444, "step": 30553 }, { "epoch": 0.9364349638347431, "grad_norm": 1.4467814799251513, "learning_rate": 1.055851447705486e-07, "loss": 0.5904, "step": 30554 }, { "epoch": 0.9364656123574844, "grad_norm": 1.346638350939959, "learning_rate": 1.0548371087175824e-07, "loss": 0.6238, "step": 30555 }, { "epoch": 0.9364962608802255, "grad_norm": 1.1739636555109847, "learning_rate": 1.0538232519988978e-07, "loss": 0.5345, "step": 30556 }, { "epoch": 0.9365269094029668, "grad_norm": 1.3598963716956352, "learning_rate": 1.0528098775594131e-07, "loss": 0.5848, "step": 30557 }, { "epoch": 0.9365575579257079, "grad_norm": 1.358492611689575, "learning_rate": 1.0517969854091092e-07, "loss": 0.6067, "step": 30558 }, { "epoch": 0.9365882064484492, "grad_norm": 1.4575906311083744, "learning_rate": 1.0507845755579838e-07, "loss": 0.6536, "step": 30559 }, { "epoch": 0.9366188549711904, "grad_norm": 1.28751697867931, "learning_rate": 1.0497726480159954e-07, "loss": 0.5607, "step": 30560 }, { "epoch": 0.9366495034939316, "grad_norm": 0.45048241508472103, "learning_rate": 1.0487612027931249e-07, "loss": 0.3721, "step": 30561 }, { "epoch": 0.9366801520166728, "grad_norm": 1.4475278188009062, "learning_rate": 1.0477502398993367e-07, "loss": 0.6156, "step": 30562 }, { "epoch": 0.936710800539414, "grad_norm": 1.223696905992939, "learning_rate": 1.0467397593445838e-07, "loss": 0.5487, "step": 30563 }, { "epoch": 0.9367414490621552, "grad_norm": 0.4538470043476345, "learning_rate": 1.0457297611388362e-07, "loss": 0.3731, "step": 30564 }, { "epoch": 0.9367720975848964, "grad_norm": 1.309439108857238, "learning_rate": 1.0447202452920357e-07, "loss": 0.5796, "step": 30565 }, { "epoch": 0.9368027461076376, "grad_norm": 1.6165969218166956, "learning_rate": 1.0437112118141302e-07, "loss": 0.5212, "step": 30566 }, { "epoch": 0.9368333946303788, "grad_norm": 1.4162507909675133, "learning_rate": 1.042702660715067e-07, "loss": 0.5639, "step": 30567 }, { "epoch": 0.93686404315312, "grad_norm": 1.243286582379419, "learning_rate": 1.0416945920047771e-07, "loss": 0.5338, "step": 30568 }, { "epoch": 0.9368946916758613, "grad_norm": 1.2943872232747418, "learning_rate": 1.0406870056932028e-07, "loss": 0.5985, "step": 30569 }, { "epoch": 0.9369253401986024, "grad_norm": 1.366049948359239, "learning_rate": 1.0396799017902637e-07, "loss": 0.5963, "step": 30570 }, { "epoch": 0.9369559887213437, "grad_norm": 0.4845153249985738, "learning_rate": 1.0386732803058852e-07, "loss": 0.4026, "step": 30571 }, { "epoch": 0.9369866372440848, "grad_norm": 1.3341188375385336, "learning_rate": 1.0376671412499817e-07, "loss": 0.6777, "step": 30572 }, { "epoch": 0.9370172857668261, "grad_norm": 1.3380295089631091, "learning_rate": 1.0366614846324786e-07, "loss": 0.6326, "step": 30573 }, { "epoch": 0.9370479342895672, "grad_norm": 0.46216160113522126, "learning_rate": 1.0356563104632733e-07, "loss": 0.4142, "step": 30574 }, { "epoch": 0.9370785828123085, "grad_norm": 0.43692637204561036, "learning_rate": 1.0346516187522804e-07, "loss": 0.3829, "step": 30575 }, { "epoch": 0.9371092313350496, "grad_norm": 1.1446986288686498, "learning_rate": 1.0336474095093862e-07, "loss": 0.5787, "step": 30576 }, { "epoch": 0.9371398798577909, "grad_norm": 1.4203996789882345, "learning_rate": 1.0326436827444941e-07, "loss": 0.6211, "step": 30577 }, { "epoch": 0.937170528380532, "grad_norm": 1.467662360386607, "learning_rate": 1.0316404384674961e-07, "loss": 0.5821, "step": 30578 }, { "epoch": 0.9372011769032733, "grad_norm": 1.3139055836406925, "learning_rate": 1.030637676688273e-07, "loss": 0.6132, "step": 30579 }, { "epoch": 0.9372318254260145, "grad_norm": 1.4170756306596792, "learning_rate": 1.0296353974167006e-07, "loss": 0.6078, "step": 30580 }, { "epoch": 0.9372624739487556, "grad_norm": 0.44245635924291565, "learning_rate": 1.0286336006626707e-07, "loss": 0.3869, "step": 30581 }, { "epoch": 0.9372931224714969, "grad_norm": 0.4381267500231859, "learning_rate": 1.0276322864360366e-07, "loss": 0.3939, "step": 30582 }, { "epoch": 0.937323770994238, "grad_norm": 1.372469001198796, "learning_rate": 1.0266314547466683e-07, "loss": 0.6065, "step": 30583 }, { "epoch": 0.9373544195169793, "grad_norm": 1.5692569468216857, "learning_rate": 1.025631105604441e-07, "loss": 0.503, "step": 30584 }, { "epoch": 0.9373850680397204, "grad_norm": 1.4062275845443928, "learning_rate": 1.0246312390191915e-07, "loss": 0.5805, "step": 30585 }, { "epoch": 0.9374157165624617, "grad_norm": 1.3808122160928242, "learning_rate": 1.0236318550007896e-07, "loss": 0.5745, "step": 30586 }, { "epoch": 0.9374463650852028, "grad_norm": 0.4383903422482862, "learning_rate": 1.0226329535590607e-07, "loss": 0.3622, "step": 30587 }, { "epoch": 0.9374770136079441, "grad_norm": 1.2878034674319618, "learning_rate": 1.0216345347038748e-07, "loss": 0.5984, "step": 30588 }, { "epoch": 0.9375076621306853, "grad_norm": 1.3069629183088773, "learning_rate": 1.0206365984450516e-07, "loss": 0.6302, "step": 30589 }, { "epoch": 0.9375383106534265, "grad_norm": 1.3394694376744114, "learning_rate": 1.0196391447924282e-07, "loss": 0.6237, "step": 30590 }, { "epoch": 0.9375689591761677, "grad_norm": 1.475184100327828, "learning_rate": 1.0186421737558294e-07, "loss": 0.8103, "step": 30591 }, { "epoch": 0.9375996076989089, "grad_norm": 0.4382069700225807, "learning_rate": 1.0176456853450812e-07, "loss": 0.3949, "step": 30592 }, { "epoch": 0.9376302562216501, "grad_norm": 0.45027983190185134, "learning_rate": 1.0166496795700031e-07, "loss": 0.4174, "step": 30593 }, { "epoch": 0.9376609047443913, "grad_norm": 1.2799413392970305, "learning_rate": 1.0156541564404099e-07, "loss": 0.5587, "step": 30594 }, { "epoch": 0.9376915532671325, "grad_norm": 1.5125318519763715, "learning_rate": 1.0146591159661157e-07, "loss": 0.7354, "step": 30595 }, { "epoch": 0.9377222017898738, "grad_norm": 1.4110830692581504, "learning_rate": 1.0136645581569071e-07, "loss": 0.6372, "step": 30596 }, { "epoch": 0.9377528503126149, "grad_norm": 1.1704371102210882, "learning_rate": 1.0126704830226042e-07, "loss": 0.5728, "step": 30597 }, { "epoch": 0.9377834988353562, "grad_norm": 1.2692385186584922, "learning_rate": 1.011676890572988e-07, "loss": 0.6071, "step": 30598 }, { "epoch": 0.9378141473580973, "grad_norm": 0.4332514679820983, "learning_rate": 1.0106837808178561e-07, "loss": 0.4009, "step": 30599 }, { "epoch": 0.9378447958808386, "grad_norm": 1.3314112060915884, "learning_rate": 1.0096911537669951e-07, "loss": 0.5261, "step": 30600 }, { "epoch": 0.9378754444035797, "grad_norm": 1.4370058362783507, "learning_rate": 1.0086990094301751e-07, "loss": 0.5461, "step": 30601 }, { "epoch": 0.937906092926321, "grad_norm": 1.5058972580184407, "learning_rate": 1.0077073478171773e-07, "loss": 0.6306, "step": 30602 }, { "epoch": 0.9379367414490621, "grad_norm": 1.2217101286395777, "learning_rate": 1.0067161689377825e-07, "loss": 0.5696, "step": 30603 }, { "epoch": 0.9379673899718034, "grad_norm": 0.4676020699092105, "learning_rate": 1.005725472801744e-07, "loss": 0.3942, "step": 30604 }, { "epoch": 0.9379980384945446, "grad_norm": 1.2299909540768081, "learning_rate": 1.004735259418832e-07, "loss": 0.5299, "step": 30605 }, { "epoch": 0.9380286870172858, "grad_norm": 1.3257585054362173, "learning_rate": 1.0037455287987885e-07, "loss": 0.6423, "step": 30606 }, { "epoch": 0.938059335540027, "grad_norm": 1.2005984012346402, "learning_rate": 1.0027562809513836e-07, "loss": 0.583, "step": 30607 }, { "epoch": 0.9380899840627682, "grad_norm": 1.2809391716933392, "learning_rate": 1.0017675158863649e-07, "loss": 0.5815, "step": 30608 }, { "epoch": 0.9381206325855094, "grad_norm": 1.3084417382734832, "learning_rate": 1.0007792336134581e-07, "loss": 0.5951, "step": 30609 }, { "epoch": 0.9381512811082506, "grad_norm": 1.5313894674191204, "learning_rate": 9.997914341424109e-08, "loss": 0.6738, "step": 30610 }, { "epoch": 0.9381819296309918, "grad_norm": 1.22505091741942, "learning_rate": 9.9880411748296e-08, "loss": 0.5616, "step": 30611 }, { "epoch": 0.938212578153733, "grad_norm": 1.249391342070499, "learning_rate": 9.978172836448252e-08, "loss": 0.613, "step": 30612 }, { "epoch": 0.9382432266764742, "grad_norm": 1.2858368154254183, "learning_rate": 9.968309326377379e-08, "loss": 0.6317, "step": 30613 }, { "epoch": 0.9382738751992153, "grad_norm": 1.41715468798958, "learning_rate": 9.958450644714068e-08, "loss": 0.6378, "step": 30614 }, { "epoch": 0.9383045237219566, "grad_norm": 1.2188466270937535, "learning_rate": 9.948596791555632e-08, "loss": 0.5761, "step": 30615 }, { "epoch": 0.9383351722446978, "grad_norm": 1.3320599901385282, "learning_rate": 9.938747766999046e-08, "loss": 0.7021, "step": 30616 }, { "epoch": 0.938365820767439, "grad_norm": 1.4455325724312027, "learning_rate": 9.928903571141235e-08, "loss": 0.4758, "step": 30617 }, { "epoch": 0.9383964692901802, "grad_norm": 1.2440829274804588, "learning_rate": 9.919064204079454e-08, "loss": 0.5368, "step": 30618 }, { "epoch": 0.9384271178129214, "grad_norm": 1.3482288807713092, "learning_rate": 9.909229665910514e-08, "loss": 0.5318, "step": 30619 }, { "epoch": 0.9384577663356626, "grad_norm": 1.2386128621399553, "learning_rate": 9.899399956731281e-08, "loss": 0.5432, "step": 30620 }, { "epoch": 0.9384884148584038, "grad_norm": 1.2919555938049234, "learning_rate": 9.88957507663868e-08, "loss": 0.6235, "step": 30621 }, { "epoch": 0.938519063381145, "grad_norm": 1.7885925535947833, "learning_rate": 9.879755025729521e-08, "loss": 0.7099, "step": 30622 }, { "epoch": 0.9385497119038863, "grad_norm": 1.222889486337851, "learning_rate": 9.869939804100504e-08, "loss": 0.5333, "step": 30623 }, { "epoch": 0.9385803604266274, "grad_norm": 1.3198741638846039, "learning_rate": 9.860129411848385e-08, "loss": 0.5066, "step": 30624 }, { "epoch": 0.9386110089493687, "grad_norm": 1.3305985650738326, "learning_rate": 9.850323849069809e-08, "loss": 0.6435, "step": 30625 }, { "epoch": 0.9386416574721098, "grad_norm": 1.402092501665025, "learning_rate": 9.840523115861423e-08, "loss": 0.5852, "step": 30626 }, { "epoch": 0.9386723059948511, "grad_norm": 1.4279211181704645, "learning_rate": 9.830727212319813e-08, "loss": 0.5836, "step": 30627 }, { "epoch": 0.9387029545175922, "grad_norm": 1.2147390423636057, "learning_rate": 9.820936138541348e-08, "loss": 0.5422, "step": 30628 }, { "epoch": 0.9387336030403335, "grad_norm": 1.332303525854739, "learning_rate": 9.811149894622729e-08, "loss": 0.669, "step": 30629 }, { "epoch": 0.9387642515630746, "grad_norm": 1.2535917148512383, "learning_rate": 9.801368480660267e-08, "loss": 0.6014, "step": 30630 }, { "epoch": 0.9387949000858159, "grad_norm": 0.45001864470994, "learning_rate": 9.791591896750275e-08, "loss": 0.3927, "step": 30631 }, { "epoch": 0.938825548608557, "grad_norm": 0.4299972128572065, "learning_rate": 9.781820142989173e-08, "loss": 0.4029, "step": 30632 }, { "epoch": 0.9388561971312983, "grad_norm": 0.4472342798728987, "learning_rate": 9.772053219473221e-08, "loss": 0.3915, "step": 30633 }, { "epoch": 0.9388868456540395, "grad_norm": 1.564942884423229, "learning_rate": 9.762291126298673e-08, "loss": 0.5821, "step": 30634 }, { "epoch": 0.9389174941767807, "grad_norm": 1.3083800494557185, "learning_rate": 9.752533863561676e-08, "loss": 0.5372, "step": 30635 }, { "epoch": 0.9389481426995219, "grad_norm": 1.2896374638256196, "learning_rate": 9.742781431358428e-08, "loss": 0.6112, "step": 30636 }, { "epoch": 0.9389787912222631, "grad_norm": 1.3837851892901216, "learning_rate": 9.733033829785021e-08, "loss": 0.5473, "step": 30637 }, { "epoch": 0.9390094397450043, "grad_norm": 1.2116098489540463, "learning_rate": 9.723291058937434e-08, "loss": 0.5015, "step": 30638 }, { "epoch": 0.9390400882677455, "grad_norm": 0.4526109578021519, "learning_rate": 9.713553118911645e-08, "loss": 0.3908, "step": 30639 }, { "epoch": 0.9390707367904867, "grad_norm": 1.6687216250560988, "learning_rate": 9.7038200098038e-08, "loss": 0.5645, "step": 30640 }, { "epoch": 0.939101385313228, "grad_norm": 1.2775084501782648, "learning_rate": 9.694091731709542e-08, "loss": 0.6173, "step": 30641 }, { "epoch": 0.9391320338359691, "grad_norm": 1.5548234867602102, "learning_rate": 9.68436828472491e-08, "loss": 0.6766, "step": 30642 }, { "epoch": 0.9391626823587104, "grad_norm": 1.421644342330651, "learning_rate": 9.674649668945657e-08, "loss": 0.5547, "step": 30643 }, { "epoch": 0.9391933308814515, "grad_norm": 1.301183205635562, "learning_rate": 9.664935884467486e-08, "loss": 0.5697, "step": 30644 }, { "epoch": 0.9392239794041927, "grad_norm": 1.4511131400975505, "learning_rate": 9.655226931386208e-08, "loss": 0.5785, "step": 30645 }, { "epoch": 0.9392546279269339, "grad_norm": 1.2943662803970495, "learning_rate": 9.645522809797414e-08, "loss": 0.6208, "step": 30646 }, { "epoch": 0.9392852764496751, "grad_norm": 1.425556479524605, "learning_rate": 9.63582351979675e-08, "loss": 0.6249, "step": 30647 }, { "epoch": 0.9393159249724163, "grad_norm": 1.4376659521355022, "learning_rate": 9.626129061479806e-08, "loss": 0.6146, "step": 30648 }, { "epoch": 0.9393465734951575, "grad_norm": 1.3836124273646384, "learning_rate": 9.61643943494206e-08, "loss": 0.6375, "step": 30649 }, { "epoch": 0.9393772220178987, "grad_norm": 1.337901789023769, "learning_rate": 9.606754640278993e-08, "loss": 0.5169, "step": 30650 }, { "epoch": 0.9394078705406399, "grad_norm": 1.3744573509441929, "learning_rate": 9.597074677586027e-08, "loss": 0.5972, "step": 30651 }, { "epoch": 0.9394385190633812, "grad_norm": 1.491711311309432, "learning_rate": 9.587399546958586e-08, "loss": 0.6334, "step": 30652 }, { "epoch": 0.9394691675861223, "grad_norm": 1.3667724403192363, "learning_rate": 9.577729248491985e-08, "loss": 0.5885, "step": 30653 }, { "epoch": 0.9394998161088636, "grad_norm": 0.4545760538020283, "learning_rate": 9.568063782281478e-08, "loss": 0.4042, "step": 30654 }, { "epoch": 0.9395304646316047, "grad_norm": 1.4499246507715589, "learning_rate": 9.558403148422324e-08, "loss": 0.6001, "step": 30655 }, { "epoch": 0.939561113154346, "grad_norm": 1.424815047858155, "learning_rate": 9.548747347009779e-08, "loss": 0.6956, "step": 30656 }, { "epoch": 0.9395917616770871, "grad_norm": 1.496183953474945, "learning_rate": 9.539096378138879e-08, "loss": 0.617, "step": 30657 }, { "epoch": 0.9396224101998284, "grad_norm": 1.3406196954927743, "learning_rate": 9.529450241904659e-08, "loss": 0.622, "step": 30658 }, { "epoch": 0.9396530587225695, "grad_norm": 0.4512665682722846, "learning_rate": 9.51980893840232e-08, "loss": 0.3906, "step": 30659 }, { "epoch": 0.9396837072453108, "grad_norm": 1.4622586648968832, "learning_rate": 9.510172467726786e-08, "loss": 0.5788, "step": 30660 }, { "epoch": 0.939714355768052, "grad_norm": 1.5183054016118083, "learning_rate": 9.500540829973093e-08, "loss": 0.559, "step": 30661 }, { "epoch": 0.9397450042907932, "grad_norm": 1.4287709469422003, "learning_rate": 9.490914025235998e-08, "loss": 0.6735, "step": 30662 }, { "epoch": 0.9397756528135344, "grad_norm": 0.43630545250709285, "learning_rate": 9.481292053610424e-08, "loss": 0.3987, "step": 30663 }, { "epoch": 0.9398063013362756, "grad_norm": 0.423458313620374, "learning_rate": 9.471674915191242e-08, "loss": 0.3827, "step": 30664 }, { "epoch": 0.9398369498590168, "grad_norm": 0.47455604216912506, "learning_rate": 9.462062610073098e-08, "loss": 0.3953, "step": 30665 }, { "epoch": 0.939867598381758, "grad_norm": 1.4707541619794733, "learning_rate": 9.452455138350746e-08, "loss": 0.6219, "step": 30666 }, { "epoch": 0.9398982469044992, "grad_norm": 0.43026188419806655, "learning_rate": 9.442852500118949e-08, "loss": 0.3673, "step": 30667 }, { "epoch": 0.9399288954272405, "grad_norm": 1.4329160115313908, "learning_rate": 9.433254695472183e-08, "loss": 0.6267, "step": 30668 }, { "epoch": 0.9399595439499816, "grad_norm": 1.3112064556246383, "learning_rate": 9.42366172450504e-08, "loss": 0.6829, "step": 30669 }, { "epoch": 0.9399901924727229, "grad_norm": 1.444171808764594, "learning_rate": 9.414073587312167e-08, "loss": 0.6559, "step": 30670 }, { "epoch": 0.940020840995464, "grad_norm": 0.4631869088165539, "learning_rate": 9.404490283987877e-08, "loss": 0.3913, "step": 30671 }, { "epoch": 0.9400514895182053, "grad_norm": 0.4507430348924655, "learning_rate": 9.394911814626706e-08, "loss": 0.394, "step": 30672 }, { "epoch": 0.9400821380409464, "grad_norm": 1.3354045480932177, "learning_rate": 9.385338179322967e-08, "loss": 0.6558, "step": 30673 }, { "epoch": 0.9401127865636877, "grad_norm": 1.3997489348060215, "learning_rate": 9.375769378171029e-08, "loss": 0.5681, "step": 30674 }, { "epoch": 0.9401434350864288, "grad_norm": 1.4431380038395039, "learning_rate": 9.366205411265205e-08, "loss": 0.6861, "step": 30675 }, { "epoch": 0.94017408360917, "grad_norm": 1.4205365100837655, "learning_rate": 9.356646278699643e-08, "loss": 0.5779, "step": 30676 }, { "epoch": 0.9402047321319112, "grad_norm": 0.45948709126761467, "learning_rate": 9.347091980568601e-08, "loss": 0.3762, "step": 30677 }, { "epoch": 0.9402353806546524, "grad_norm": 1.5708769158911402, "learning_rate": 9.337542516966224e-08, "loss": 0.6372, "step": 30678 }, { "epoch": 0.9402660291773937, "grad_norm": 0.4633665768592169, "learning_rate": 9.32799788798655e-08, "loss": 0.3903, "step": 30679 }, { "epoch": 0.9402966777001348, "grad_norm": 1.4561423300378074, "learning_rate": 9.318458093723614e-08, "loss": 0.5019, "step": 30680 }, { "epoch": 0.9403273262228761, "grad_norm": 1.4356637600410942, "learning_rate": 9.308923134271563e-08, "loss": 0.634, "step": 30681 }, { "epoch": 0.9403579747456172, "grad_norm": 1.299245644801961, "learning_rate": 9.299393009724156e-08, "loss": 0.6588, "step": 30682 }, { "epoch": 0.9403886232683585, "grad_norm": 1.3686637645891062, "learning_rate": 9.289867720175427e-08, "loss": 0.6162, "step": 30683 }, { "epoch": 0.9404192717910996, "grad_norm": 1.330603932494322, "learning_rate": 9.280347265719192e-08, "loss": 0.5999, "step": 30684 }, { "epoch": 0.9404499203138409, "grad_norm": 1.2536010001280504, "learning_rate": 9.270831646449207e-08, "loss": 0.5289, "step": 30685 }, { "epoch": 0.940480568836582, "grad_norm": 1.5681790988071826, "learning_rate": 9.261320862459288e-08, "loss": 0.5546, "step": 30686 }, { "epoch": 0.9405112173593233, "grad_norm": 1.3231217514400255, "learning_rate": 9.251814913843138e-08, "loss": 0.5608, "step": 30687 }, { "epoch": 0.9405418658820645, "grad_norm": 1.35862366733308, "learning_rate": 9.242313800694402e-08, "loss": 0.5834, "step": 30688 }, { "epoch": 0.9405725144048057, "grad_norm": 1.4851617274013107, "learning_rate": 9.232817523106785e-08, "loss": 0.6611, "step": 30689 }, { "epoch": 0.9406031629275469, "grad_norm": 1.4734501734587186, "learning_rate": 9.223326081173712e-08, "loss": 0.6396, "step": 30690 }, { "epoch": 0.9406338114502881, "grad_norm": 1.479078227370736, "learning_rate": 9.213839474988829e-08, "loss": 0.7544, "step": 30691 }, { "epoch": 0.9406644599730293, "grad_norm": 1.3046197092939813, "learning_rate": 9.204357704645562e-08, "loss": 0.5925, "step": 30692 }, { "epoch": 0.9406951084957705, "grad_norm": 1.2637033961126345, "learning_rate": 9.19488077023728e-08, "loss": 0.5549, "step": 30693 }, { "epoch": 0.9407257570185117, "grad_norm": 0.4615766890319006, "learning_rate": 9.185408671857465e-08, "loss": 0.3914, "step": 30694 }, { "epoch": 0.940756405541253, "grad_norm": 1.2497019070989313, "learning_rate": 9.175941409599376e-08, "loss": 0.5341, "step": 30695 }, { "epoch": 0.9407870540639941, "grad_norm": 1.3236206632362812, "learning_rate": 9.16647898355627e-08, "loss": 0.4843, "step": 30696 }, { "epoch": 0.9408177025867354, "grad_norm": 1.2594080891249289, "learning_rate": 9.15702139382152e-08, "loss": 0.5549, "step": 30697 }, { "epoch": 0.9408483511094765, "grad_norm": 1.467321976057214, "learning_rate": 9.147568640488158e-08, "loss": 0.5818, "step": 30698 }, { "epoch": 0.9408789996322178, "grad_norm": 1.6846797125184987, "learning_rate": 9.13812072364939e-08, "loss": 0.6637, "step": 30699 }, { "epoch": 0.9409096481549589, "grad_norm": 1.2865576023049896, "learning_rate": 9.128677643398365e-08, "loss": 0.559, "step": 30700 }, { "epoch": 0.9409402966777002, "grad_norm": 1.2890662611073629, "learning_rate": 9.119239399828062e-08, "loss": 0.5843, "step": 30701 }, { "epoch": 0.9409709452004413, "grad_norm": 1.3156552389819427, "learning_rate": 9.109805993031517e-08, "loss": 0.5503, "step": 30702 }, { "epoch": 0.9410015937231826, "grad_norm": 1.5875580360483632, "learning_rate": 9.100377423101603e-08, "loss": 0.6129, "step": 30703 }, { "epoch": 0.9410322422459237, "grad_norm": 1.3421672187118967, "learning_rate": 9.090953690131243e-08, "loss": 0.5284, "step": 30704 }, { "epoch": 0.941062890768665, "grad_norm": 1.384341174306185, "learning_rate": 9.08153479421342e-08, "loss": 0.6621, "step": 30705 }, { "epoch": 0.9410935392914062, "grad_norm": 1.257294701852771, "learning_rate": 9.072120735440726e-08, "loss": 0.5203, "step": 30706 }, { "epoch": 0.9411241878141473, "grad_norm": 1.324488648683984, "learning_rate": 9.062711513906087e-08, "loss": 0.5922, "step": 30707 }, { "epoch": 0.9411548363368886, "grad_norm": 1.229662453859541, "learning_rate": 9.053307129702204e-08, "loss": 0.5085, "step": 30708 }, { "epoch": 0.9411854848596297, "grad_norm": 0.4290740777287859, "learning_rate": 9.043907582921673e-08, "loss": 0.3691, "step": 30709 }, { "epoch": 0.941216133382371, "grad_norm": 1.3688026632001196, "learning_rate": 9.034512873657086e-08, "loss": 0.5988, "step": 30710 }, { "epoch": 0.9412467819051121, "grad_norm": 1.2822919138851843, "learning_rate": 9.025123002001146e-08, "loss": 0.4903, "step": 30711 }, { "epoch": 0.9412774304278534, "grad_norm": 1.3083429500796497, "learning_rate": 9.015737968046223e-08, "loss": 0.5864, "step": 30712 }, { "epoch": 0.9413080789505945, "grad_norm": 0.4430595464605222, "learning_rate": 9.00635777188491e-08, "loss": 0.3874, "step": 30713 }, { "epoch": 0.9413387274733358, "grad_norm": 1.6082834938964745, "learning_rate": 8.996982413609523e-08, "loss": 0.5914, "step": 30714 }, { "epoch": 0.941369375996077, "grad_norm": 1.3351661336268494, "learning_rate": 8.987611893312542e-08, "loss": 0.509, "step": 30715 }, { "epoch": 0.9414000245188182, "grad_norm": 1.3935005088918546, "learning_rate": 8.978246211086228e-08, "loss": 0.6361, "step": 30716 }, { "epoch": 0.9414306730415594, "grad_norm": 1.34784699482007, "learning_rate": 8.968885367022895e-08, "loss": 0.6235, "step": 30717 }, { "epoch": 0.9414613215643006, "grad_norm": 1.2007665397300396, "learning_rate": 8.959529361214748e-08, "loss": 0.5597, "step": 30718 }, { "epoch": 0.9414919700870418, "grad_norm": 1.3543594692760745, "learning_rate": 8.950178193754045e-08, "loss": 0.6762, "step": 30719 }, { "epoch": 0.941522618609783, "grad_norm": 1.315562558541686, "learning_rate": 8.94083186473288e-08, "loss": 0.6376, "step": 30720 }, { "epoch": 0.9415532671325242, "grad_norm": 0.45220395239977884, "learning_rate": 8.93149037424329e-08, "loss": 0.4026, "step": 30721 }, { "epoch": 0.9415839156552654, "grad_norm": 1.2926661599475056, "learning_rate": 8.922153722377425e-08, "loss": 0.6401, "step": 30722 }, { "epoch": 0.9416145641780066, "grad_norm": 1.3339310677635594, "learning_rate": 8.912821909227154e-08, "loss": 0.6118, "step": 30723 }, { "epoch": 0.9416452127007479, "grad_norm": 1.2577017889698439, "learning_rate": 8.903494934884572e-08, "loss": 0.5629, "step": 30724 }, { "epoch": 0.941675861223489, "grad_norm": 1.2892032703339662, "learning_rate": 8.894172799441436e-08, "loss": 0.6031, "step": 30725 }, { "epoch": 0.9417065097462303, "grad_norm": 1.260533019320198, "learning_rate": 8.884855502989732e-08, "loss": 0.4796, "step": 30726 }, { "epoch": 0.9417371582689714, "grad_norm": 1.557593435756215, "learning_rate": 8.875543045621216e-08, "loss": 0.5964, "step": 30727 }, { "epoch": 0.9417678067917127, "grad_norm": 0.4256888231763202, "learning_rate": 8.866235427427594e-08, "loss": 0.3792, "step": 30728 }, { "epoch": 0.9417984553144538, "grad_norm": 1.4764284084884665, "learning_rate": 8.856932648500571e-08, "loss": 0.6277, "step": 30729 }, { "epoch": 0.9418291038371951, "grad_norm": 1.2900679841121674, "learning_rate": 8.847634708931963e-08, "loss": 0.6646, "step": 30730 }, { "epoch": 0.9418597523599362, "grad_norm": 0.4362159989107107, "learning_rate": 8.838341608813194e-08, "loss": 0.3813, "step": 30731 }, { "epoch": 0.9418904008826775, "grad_norm": 1.2218792029626502, "learning_rate": 8.829053348235917e-08, "loss": 0.5093, "step": 30732 }, { "epoch": 0.9419210494054187, "grad_norm": 1.454863095830228, "learning_rate": 8.819769927291666e-08, "loss": 0.586, "step": 30733 }, { "epoch": 0.9419516979281599, "grad_norm": 1.447523993722484, "learning_rate": 8.810491346071926e-08, "loss": 0.5816, "step": 30734 }, { "epoch": 0.9419823464509011, "grad_norm": 1.3729418927278132, "learning_rate": 8.801217604668121e-08, "loss": 0.6139, "step": 30735 }, { "epoch": 0.9420129949736423, "grad_norm": 1.4966341415359823, "learning_rate": 8.79194870317146e-08, "loss": 0.5885, "step": 30736 }, { "epoch": 0.9420436434963835, "grad_norm": 0.42258656731707056, "learning_rate": 8.782684641673533e-08, "loss": 0.3956, "step": 30737 }, { "epoch": 0.9420742920191246, "grad_norm": 1.186238080541182, "learning_rate": 8.773425420265491e-08, "loss": 0.5294, "step": 30738 }, { "epoch": 0.9421049405418659, "grad_norm": 1.4076837836939906, "learning_rate": 8.764171039038538e-08, "loss": 0.6092, "step": 30739 }, { "epoch": 0.942135589064607, "grad_norm": 1.3520837682187004, "learning_rate": 8.754921498083879e-08, "loss": 0.5748, "step": 30740 }, { "epoch": 0.9421662375873483, "grad_norm": 1.2543151274738362, "learning_rate": 8.745676797492664e-08, "loss": 0.6265, "step": 30741 }, { "epoch": 0.9421968861100894, "grad_norm": 1.292652742155918, "learning_rate": 8.73643693735604e-08, "loss": 0.7076, "step": 30742 }, { "epoch": 0.9422275346328307, "grad_norm": 1.3922878547302018, "learning_rate": 8.727201917764938e-08, "loss": 0.6118, "step": 30743 }, { "epoch": 0.9422581831555719, "grad_norm": 1.4299511172745567, "learning_rate": 8.717971738810448e-08, "loss": 0.6028, "step": 30744 }, { "epoch": 0.9422888316783131, "grad_norm": 1.1701963933975192, "learning_rate": 8.7087464005835e-08, "loss": 0.6127, "step": 30745 }, { "epoch": 0.9423194802010543, "grad_norm": 0.44852589565705697, "learning_rate": 8.699525903175022e-08, "loss": 0.4052, "step": 30746 }, { "epoch": 0.9423501287237955, "grad_norm": 0.4574110603813544, "learning_rate": 8.690310246675715e-08, "loss": 0.4253, "step": 30747 }, { "epoch": 0.9423807772465367, "grad_norm": 1.484781214025592, "learning_rate": 8.68109943117651e-08, "loss": 0.666, "step": 30748 }, { "epoch": 0.9424114257692779, "grad_norm": 1.4175070938451162, "learning_rate": 8.671893456768166e-08, "loss": 0.638, "step": 30749 }, { "epoch": 0.9424420742920191, "grad_norm": 2.2463698115971247, "learning_rate": 8.662692323541388e-08, "loss": 0.6441, "step": 30750 }, { "epoch": 0.9424727228147604, "grad_norm": 1.3379431482363608, "learning_rate": 8.653496031586772e-08, "loss": 0.6392, "step": 30751 }, { "epoch": 0.9425033713375015, "grad_norm": 1.3120542450374704, "learning_rate": 8.644304580994967e-08, "loss": 0.5763, "step": 30752 }, { "epoch": 0.9425340198602428, "grad_norm": 1.2064276168565518, "learning_rate": 8.635117971856622e-08, "loss": 0.4414, "step": 30753 }, { "epoch": 0.9425646683829839, "grad_norm": 0.45492350579846835, "learning_rate": 8.625936204262165e-08, "loss": 0.3782, "step": 30754 }, { "epoch": 0.9425953169057252, "grad_norm": 1.404736869396158, "learning_rate": 8.616759278301967e-08, "loss": 0.6328, "step": 30755 }, { "epoch": 0.9426259654284663, "grad_norm": 1.5014091233148834, "learning_rate": 8.60758719406668e-08, "loss": 0.6091, "step": 30756 }, { "epoch": 0.9426566139512076, "grad_norm": 1.405555830333022, "learning_rate": 8.598419951646564e-08, "loss": 0.6671, "step": 30757 }, { "epoch": 0.9426872624739487, "grad_norm": 1.3044662014102364, "learning_rate": 8.58925755113188e-08, "loss": 0.5547, "step": 30758 }, { "epoch": 0.94271791099669, "grad_norm": 1.5594525636283127, "learning_rate": 8.580099992613001e-08, "loss": 0.5609, "step": 30759 }, { "epoch": 0.9427485595194312, "grad_norm": 1.2449357168244712, "learning_rate": 8.570947276180131e-08, "loss": 0.5806, "step": 30760 }, { "epoch": 0.9427792080421724, "grad_norm": 1.3118535150699249, "learning_rate": 8.561799401923477e-08, "loss": 0.5102, "step": 30761 }, { "epoch": 0.9428098565649136, "grad_norm": 1.202045720641895, "learning_rate": 8.552656369933132e-08, "loss": 0.6009, "step": 30762 }, { "epoch": 0.9428405050876548, "grad_norm": 1.2602187484523784, "learning_rate": 8.543518180299192e-08, "loss": 0.5041, "step": 30763 }, { "epoch": 0.942871153610396, "grad_norm": 1.2747234606868683, "learning_rate": 8.534384833111809e-08, "loss": 0.5234, "step": 30764 }, { "epoch": 0.9429018021331372, "grad_norm": 1.2593143365293795, "learning_rate": 8.525256328460796e-08, "loss": 0.6323, "step": 30765 }, { "epoch": 0.9429324506558784, "grad_norm": 1.3040669881991245, "learning_rate": 8.51613266643614e-08, "loss": 0.5601, "step": 30766 }, { "epoch": 0.9429630991786196, "grad_norm": 1.2535737630884658, "learning_rate": 8.507013847127876e-08, "loss": 0.6163, "step": 30767 }, { "epoch": 0.9429937477013608, "grad_norm": 1.3182476292440555, "learning_rate": 8.497899870625715e-08, "loss": 0.6276, "step": 30768 }, { "epoch": 0.943024396224102, "grad_norm": 1.4137724458389358, "learning_rate": 8.488790737019525e-08, "loss": 0.6349, "step": 30769 }, { "epoch": 0.9430550447468432, "grad_norm": 0.45171569562437724, "learning_rate": 8.47968644639896e-08, "loss": 0.4064, "step": 30770 }, { "epoch": 0.9430856932695844, "grad_norm": 1.1502990005503204, "learning_rate": 8.470586998853891e-08, "loss": 0.5222, "step": 30771 }, { "epoch": 0.9431163417923256, "grad_norm": 1.3852764353259306, "learning_rate": 8.461492394473859e-08, "loss": 0.5888, "step": 30772 }, { "epoch": 0.9431469903150668, "grad_norm": 1.383560654033979, "learning_rate": 8.452402633348511e-08, "loss": 0.5447, "step": 30773 }, { "epoch": 0.943177638837808, "grad_norm": 1.4054305636132336, "learning_rate": 8.44331771556739e-08, "loss": 0.6525, "step": 30774 }, { "epoch": 0.9432082873605492, "grad_norm": 0.4592219188034083, "learning_rate": 8.434237641220088e-08, "loss": 0.4137, "step": 30775 }, { "epoch": 0.9432389358832904, "grad_norm": 1.4145556144914355, "learning_rate": 8.425162410395981e-08, "loss": 0.7105, "step": 30776 }, { "epoch": 0.9432695844060316, "grad_norm": 0.4527067833907836, "learning_rate": 8.416092023184441e-08, "loss": 0.3775, "step": 30777 }, { "epoch": 0.9433002329287729, "grad_norm": 0.45844698906834996, "learning_rate": 8.407026479675063e-08, "loss": 0.3845, "step": 30778 }, { "epoch": 0.943330881451514, "grad_norm": 1.3018643678432007, "learning_rate": 8.39796577995694e-08, "loss": 0.5401, "step": 30779 }, { "epoch": 0.9433615299742553, "grad_norm": 2.191678310781532, "learning_rate": 8.388909924119503e-08, "loss": 0.6359, "step": 30780 }, { "epoch": 0.9433921784969964, "grad_norm": 1.3396828765288047, "learning_rate": 8.379858912251848e-08, "loss": 0.6373, "step": 30781 }, { "epoch": 0.9434228270197377, "grad_norm": 0.4189727421096302, "learning_rate": 8.37081274444329e-08, "loss": 0.3508, "step": 30782 }, { "epoch": 0.9434534755424788, "grad_norm": 1.3117225142138782, "learning_rate": 8.361771420782871e-08, "loss": 0.5321, "step": 30783 }, { "epoch": 0.9434841240652201, "grad_norm": 1.3415792762886676, "learning_rate": 8.352734941359741e-08, "loss": 0.4993, "step": 30784 }, { "epoch": 0.9435147725879612, "grad_norm": 1.3284680445436237, "learning_rate": 8.343703306262829e-08, "loss": 0.5442, "step": 30785 }, { "epoch": 0.9435454211107025, "grad_norm": 1.3925960493163367, "learning_rate": 8.334676515581286e-08, "loss": 0.5482, "step": 30786 }, { "epoch": 0.9435760696334436, "grad_norm": 1.4524815078356388, "learning_rate": 8.325654569403985e-08, "loss": 0.647, "step": 30787 }, { "epoch": 0.9436067181561849, "grad_norm": 1.1526307669029503, "learning_rate": 8.316637467819744e-08, "loss": 0.5684, "step": 30788 }, { "epoch": 0.9436373666789261, "grad_norm": 1.333406748916172, "learning_rate": 8.307625210917548e-08, "loss": 0.6392, "step": 30789 }, { "epoch": 0.9436680152016673, "grad_norm": 1.3989335529437348, "learning_rate": 8.298617798786047e-08, "loss": 0.6717, "step": 30790 }, { "epoch": 0.9436986637244085, "grad_norm": 1.5004380204896421, "learning_rate": 8.289615231514115e-08, "loss": 0.5615, "step": 30791 }, { "epoch": 0.9437293122471497, "grad_norm": 0.461706840124089, "learning_rate": 8.280617509190403e-08, "loss": 0.4044, "step": 30792 }, { "epoch": 0.9437599607698909, "grad_norm": 1.4174102665311978, "learning_rate": 8.271624631903564e-08, "loss": 0.6258, "step": 30793 }, { "epoch": 0.9437906092926321, "grad_norm": 1.214534093218474, "learning_rate": 8.262636599742301e-08, "loss": 0.527, "step": 30794 }, { "epoch": 0.9438212578153733, "grad_norm": 1.3929320454555028, "learning_rate": 8.253653412794994e-08, "loss": 0.6278, "step": 30795 }, { "epoch": 0.9438519063381146, "grad_norm": 1.7415373099734073, "learning_rate": 8.244675071150287e-08, "loss": 0.658, "step": 30796 }, { "epoch": 0.9438825548608557, "grad_norm": 1.4206390356534848, "learning_rate": 8.23570157489667e-08, "loss": 0.6076, "step": 30797 }, { "epoch": 0.943913203383597, "grad_norm": 1.307865918039181, "learning_rate": 8.22673292412246e-08, "loss": 0.5699, "step": 30798 }, { "epoch": 0.9439438519063381, "grad_norm": 1.3891670361811497, "learning_rate": 8.217769118916085e-08, "loss": 0.5443, "step": 30799 }, { "epoch": 0.9439745004290793, "grad_norm": 1.5506769741021775, "learning_rate": 8.208810159365865e-08, "loss": 0.645, "step": 30800 }, { "epoch": 0.9440051489518205, "grad_norm": 1.4877583767763476, "learning_rate": 8.199856045560062e-08, "loss": 0.6605, "step": 30801 }, { "epoch": 0.9440357974745617, "grad_norm": 1.407551585084675, "learning_rate": 8.190906777586938e-08, "loss": 0.5251, "step": 30802 }, { "epoch": 0.9440664459973029, "grad_norm": 1.214540865352696, "learning_rate": 8.18196235553459e-08, "loss": 0.4641, "step": 30803 }, { "epoch": 0.9440970945200441, "grad_norm": 1.4214388568971452, "learning_rate": 8.173022779491224e-08, "loss": 0.6345, "step": 30804 }, { "epoch": 0.9441277430427854, "grad_norm": 1.3824746746572827, "learning_rate": 8.164088049544938e-08, "loss": 0.6196, "step": 30805 }, { "epoch": 0.9441583915655265, "grad_norm": 0.44788069383111917, "learning_rate": 8.155158165783661e-08, "loss": 0.397, "step": 30806 }, { "epoch": 0.9441890400882678, "grad_norm": 1.3703349190412046, "learning_rate": 8.146233128295489e-08, "loss": 0.6294, "step": 30807 }, { "epoch": 0.9442196886110089, "grad_norm": 1.3788842128881085, "learning_rate": 8.137312937168407e-08, "loss": 0.5904, "step": 30808 }, { "epoch": 0.9442503371337502, "grad_norm": 1.3974021635704807, "learning_rate": 8.128397592490123e-08, "loss": 0.6576, "step": 30809 }, { "epoch": 0.9442809856564913, "grad_norm": 1.2823730421406498, "learning_rate": 8.119487094348677e-08, "loss": 0.6513, "step": 30810 }, { "epoch": 0.9443116341792326, "grad_norm": 1.3373129382754227, "learning_rate": 8.110581442831666e-08, "loss": 0.595, "step": 30811 }, { "epoch": 0.9443422827019737, "grad_norm": 1.3998712281768695, "learning_rate": 8.10168063802702e-08, "loss": 0.5557, "step": 30812 }, { "epoch": 0.944372931224715, "grad_norm": 1.4835123075829204, "learning_rate": 8.092784680022391e-08, "loss": 0.6834, "step": 30813 }, { "epoch": 0.9444035797474561, "grad_norm": 1.276604523220435, "learning_rate": 8.083893568905376e-08, "loss": 0.5724, "step": 30814 }, { "epoch": 0.9444342282701974, "grad_norm": 1.3398995578229318, "learning_rate": 8.075007304763626e-08, "loss": 0.5425, "step": 30815 }, { "epoch": 0.9444648767929386, "grad_norm": 0.4494673496203345, "learning_rate": 8.066125887684739e-08, "loss": 0.4023, "step": 30816 }, { "epoch": 0.9444955253156798, "grad_norm": 1.3398206750811368, "learning_rate": 8.057249317756089e-08, "loss": 0.5742, "step": 30817 }, { "epoch": 0.944526173838421, "grad_norm": 1.567318579304312, "learning_rate": 8.048377595065271e-08, "loss": 0.6713, "step": 30818 }, { "epoch": 0.9445568223611622, "grad_norm": 1.3579495163705777, "learning_rate": 8.039510719699717e-08, "loss": 0.5567, "step": 30819 }, { "epoch": 0.9445874708839034, "grad_norm": 1.3843602546004576, "learning_rate": 8.03064869174669e-08, "loss": 0.6159, "step": 30820 }, { "epoch": 0.9446181194066446, "grad_norm": 1.2441646215281765, "learning_rate": 8.021791511293564e-08, "loss": 0.5833, "step": 30821 }, { "epoch": 0.9446487679293858, "grad_norm": 1.3373227040446198, "learning_rate": 8.012939178427547e-08, "loss": 0.5635, "step": 30822 }, { "epoch": 0.944679416452127, "grad_norm": 0.4498440810016506, "learning_rate": 8.004091693236016e-08, "loss": 0.3822, "step": 30823 }, { "epoch": 0.9447100649748682, "grad_norm": 1.4496291560435624, "learning_rate": 7.995249055806009e-08, "loss": 0.6296, "step": 30824 }, { "epoch": 0.9447407134976095, "grad_norm": 1.3378814396521552, "learning_rate": 7.986411266224681e-08, "loss": 0.5775, "step": 30825 }, { "epoch": 0.9447713620203506, "grad_norm": 1.4697974383469161, "learning_rate": 7.977578324579127e-08, "loss": 0.5713, "step": 30826 }, { "epoch": 0.9448020105430919, "grad_norm": 0.45840343660522953, "learning_rate": 7.968750230956445e-08, "loss": 0.3894, "step": 30827 }, { "epoch": 0.944832659065833, "grad_norm": 1.2498425661477919, "learning_rate": 7.959926985443511e-08, "loss": 0.5817, "step": 30828 }, { "epoch": 0.9448633075885743, "grad_norm": 1.3181617738317353, "learning_rate": 7.95110858812731e-08, "loss": 0.6112, "step": 30829 }, { "epoch": 0.9448939561113154, "grad_norm": 1.300216078915294, "learning_rate": 7.942295039094771e-08, "loss": 0.5205, "step": 30830 }, { "epoch": 0.9449246046340566, "grad_norm": 0.4387702584346655, "learning_rate": 7.933486338432661e-08, "loss": 0.3895, "step": 30831 }, { "epoch": 0.9449552531567978, "grad_norm": 1.4432990429549444, "learning_rate": 7.924682486227797e-08, "loss": 0.5943, "step": 30832 }, { "epoch": 0.944985901679539, "grad_norm": 1.4386723113916082, "learning_rate": 7.915883482566943e-08, "loss": 0.6312, "step": 30833 }, { "epoch": 0.9450165502022803, "grad_norm": 1.34305874424685, "learning_rate": 7.90708932753681e-08, "loss": 0.6143, "step": 30834 }, { "epoch": 0.9450471987250214, "grad_norm": 1.382731504326266, "learning_rate": 7.898300021224048e-08, "loss": 0.6382, "step": 30835 }, { "epoch": 0.9450778472477627, "grad_norm": 1.2674812346896827, "learning_rate": 7.889515563715256e-08, "loss": 0.5527, "step": 30836 }, { "epoch": 0.9451084957705038, "grad_norm": 1.234025782454282, "learning_rate": 7.880735955096918e-08, "loss": 0.571, "step": 30837 }, { "epoch": 0.9451391442932451, "grad_norm": 1.6237063242085847, "learning_rate": 7.871961195455635e-08, "loss": 0.6064, "step": 30838 }, { "epoch": 0.9451697928159862, "grad_norm": 1.386878364613536, "learning_rate": 7.863191284877836e-08, "loss": 0.6078, "step": 30839 }, { "epoch": 0.9452004413387275, "grad_norm": 1.4042551445879685, "learning_rate": 7.854426223449951e-08, "loss": 0.4827, "step": 30840 }, { "epoch": 0.9452310898614686, "grad_norm": 1.2700426729031864, "learning_rate": 7.845666011258247e-08, "loss": 0.55, "step": 30841 }, { "epoch": 0.9452617383842099, "grad_norm": 2.6990210041963523, "learning_rate": 7.836910648389206e-08, "loss": 0.6183, "step": 30842 }, { "epoch": 0.945292386906951, "grad_norm": 1.2115842445521863, "learning_rate": 7.828160134929041e-08, "loss": 0.5269, "step": 30843 }, { "epoch": 0.9453230354296923, "grad_norm": 0.4519438006010981, "learning_rate": 7.819414470963848e-08, "loss": 0.3972, "step": 30844 }, { "epoch": 0.9453536839524335, "grad_norm": 1.2778080767010858, "learning_rate": 7.810673656579947e-08, "loss": 0.611, "step": 30845 }, { "epoch": 0.9453843324751747, "grad_norm": 1.2311064783763084, "learning_rate": 7.801937691863381e-08, "loss": 0.5074, "step": 30846 }, { "epoch": 0.9454149809979159, "grad_norm": 1.5512483004987625, "learning_rate": 7.793206576900247e-08, "loss": 0.6802, "step": 30847 }, { "epoch": 0.9454456295206571, "grad_norm": 1.2110571663331722, "learning_rate": 7.784480311776588e-08, "loss": 0.5297, "step": 30848 }, { "epoch": 0.9454762780433983, "grad_norm": 1.2700307612928252, "learning_rate": 7.775758896578445e-08, "loss": 0.6215, "step": 30849 }, { "epoch": 0.9455069265661395, "grad_norm": 1.487401431183343, "learning_rate": 7.767042331391638e-08, "loss": 0.5842, "step": 30850 }, { "epoch": 0.9455375750888807, "grad_norm": 1.3477237502309427, "learning_rate": 7.758330616302156e-08, "loss": 0.6159, "step": 30851 }, { "epoch": 0.945568223611622, "grad_norm": 1.3500873586393654, "learning_rate": 7.749623751395707e-08, "loss": 0.5262, "step": 30852 }, { "epoch": 0.9455988721343631, "grad_norm": 1.5601683856479103, "learning_rate": 7.740921736758222e-08, "loss": 0.5961, "step": 30853 }, { "epoch": 0.9456295206571044, "grad_norm": 1.3594323719414516, "learning_rate": 7.732224572475355e-08, "loss": 0.6125, "step": 30854 }, { "epoch": 0.9456601691798455, "grad_norm": 1.562443215222299, "learning_rate": 7.72353225863287e-08, "loss": 0.575, "step": 30855 }, { "epoch": 0.9456908177025868, "grad_norm": 1.2320902818703432, "learning_rate": 7.714844795316312e-08, "loss": 0.5796, "step": 30856 }, { "epoch": 0.9457214662253279, "grad_norm": 1.5835771074077383, "learning_rate": 7.706162182611387e-08, "loss": 0.6591, "step": 30857 }, { "epoch": 0.9457521147480692, "grad_norm": 1.3191375837727535, "learning_rate": 7.697484420603584e-08, "loss": 0.6116, "step": 30858 }, { "epoch": 0.9457827632708103, "grad_norm": 1.4087537436713156, "learning_rate": 7.688811509378447e-08, "loss": 0.6389, "step": 30859 }, { "epoch": 0.9458134117935516, "grad_norm": 1.3164742824100564, "learning_rate": 7.680143449021404e-08, "loss": 0.5459, "step": 30860 }, { "epoch": 0.9458440603162928, "grad_norm": 1.454887414244298, "learning_rate": 7.67148023961789e-08, "loss": 0.6777, "step": 30861 }, { "epoch": 0.9458747088390339, "grad_norm": 0.46975292895819765, "learning_rate": 7.662821881253279e-08, "loss": 0.4075, "step": 30862 }, { "epoch": 0.9459053573617752, "grad_norm": 1.2836410195190155, "learning_rate": 7.654168374012782e-08, "loss": 0.5121, "step": 30863 }, { "epoch": 0.9459360058845163, "grad_norm": 0.44144832422111613, "learning_rate": 7.645519717981775e-08, "loss": 0.3963, "step": 30864 }, { "epoch": 0.9459666544072576, "grad_norm": 1.4492854108914497, "learning_rate": 7.636875913245467e-08, "loss": 0.5429, "step": 30865 }, { "epoch": 0.9459973029299987, "grad_norm": 1.3782335583977776, "learning_rate": 7.628236959888902e-08, "loss": 0.5265, "step": 30866 }, { "epoch": 0.94602795145274, "grad_norm": 1.3411686253827053, "learning_rate": 7.619602857997344e-08, "loss": 0.5383, "step": 30867 }, { "epoch": 0.9460585999754811, "grad_norm": 1.3658419194119895, "learning_rate": 7.610973607655836e-08, "loss": 0.6175, "step": 30868 }, { "epoch": 0.9460892484982224, "grad_norm": 1.525120909648295, "learning_rate": 7.602349208949422e-08, "loss": 0.5907, "step": 30869 }, { "epoch": 0.9461198970209636, "grad_norm": 1.482411300392181, "learning_rate": 7.593729661962979e-08, "loss": 0.6949, "step": 30870 }, { "epoch": 0.9461505455437048, "grad_norm": 1.3955337472960512, "learning_rate": 7.585114966781493e-08, "loss": 0.5229, "step": 30871 }, { "epoch": 0.946181194066446, "grad_norm": 1.395719773520306, "learning_rate": 7.576505123489952e-08, "loss": 0.5797, "step": 30872 }, { "epoch": 0.9462118425891872, "grad_norm": 1.3050431884325504, "learning_rate": 7.567900132173067e-08, "loss": 0.6484, "step": 30873 }, { "epoch": 0.9462424911119284, "grad_norm": 0.45265400498815983, "learning_rate": 7.559299992915602e-08, "loss": 0.4033, "step": 30874 }, { "epoch": 0.9462731396346696, "grad_norm": 1.316449871663201, "learning_rate": 7.550704705802381e-08, "loss": 0.6297, "step": 30875 }, { "epoch": 0.9463037881574108, "grad_norm": 0.4422100972716101, "learning_rate": 7.542114270918111e-08, "loss": 0.3831, "step": 30876 }, { "epoch": 0.946334436680152, "grad_norm": 1.653944023350783, "learning_rate": 7.533528688347336e-08, "loss": 0.6197, "step": 30877 }, { "epoch": 0.9463650852028932, "grad_norm": 1.2326069516986655, "learning_rate": 7.524947958174655e-08, "loss": 0.5105, "step": 30878 }, { "epoch": 0.9463957337256345, "grad_norm": 1.371167084305023, "learning_rate": 7.516372080484724e-08, "loss": 0.5535, "step": 30879 }, { "epoch": 0.9464263822483756, "grad_norm": 1.3880086768019309, "learning_rate": 7.50780105536203e-08, "loss": 0.5655, "step": 30880 }, { "epoch": 0.9464570307711169, "grad_norm": 1.4890211740588002, "learning_rate": 7.499234882890949e-08, "loss": 0.5843, "step": 30881 }, { "epoch": 0.946487679293858, "grad_norm": 1.46335089408064, "learning_rate": 7.49067356315586e-08, "loss": 0.5997, "step": 30882 }, { "epoch": 0.9465183278165993, "grad_norm": 1.5660787362146749, "learning_rate": 7.482117096241248e-08, "loss": 0.5412, "step": 30883 }, { "epoch": 0.9465489763393404, "grad_norm": 0.43496395385608105, "learning_rate": 7.473565482231382e-08, "loss": 0.3851, "step": 30884 }, { "epoch": 0.9465796248620817, "grad_norm": 0.4410683269342313, "learning_rate": 7.465018721210416e-08, "loss": 0.372, "step": 30885 }, { "epoch": 0.9466102733848228, "grad_norm": 1.3981895699893292, "learning_rate": 7.45647681326267e-08, "loss": 0.5864, "step": 30886 }, { "epoch": 0.9466409219075641, "grad_norm": 1.3212517742471972, "learning_rate": 7.447939758472245e-08, "loss": 0.5779, "step": 30887 }, { "epoch": 0.9466715704303053, "grad_norm": 0.4725752100383277, "learning_rate": 7.43940755692335e-08, "loss": 0.4157, "step": 30888 }, { "epoch": 0.9467022189530465, "grad_norm": 1.3778233708956567, "learning_rate": 7.430880208699975e-08, "loss": 0.5654, "step": 30889 }, { "epoch": 0.9467328674757877, "grad_norm": 1.3611806514252258, "learning_rate": 7.422357713886163e-08, "loss": 0.5519, "step": 30890 }, { "epoch": 0.9467635159985289, "grad_norm": 1.2701156644781677, "learning_rate": 7.413840072565959e-08, "loss": 0.5313, "step": 30891 }, { "epoch": 0.9467941645212701, "grad_norm": 1.6768243348549234, "learning_rate": 7.405327284823128e-08, "loss": 0.5934, "step": 30892 }, { "epoch": 0.9468248130440112, "grad_norm": 1.252553632230827, "learning_rate": 7.396819350741657e-08, "loss": 0.6075, "step": 30893 }, { "epoch": 0.9468554615667525, "grad_norm": 1.296133719678182, "learning_rate": 7.388316270405427e-08, "loss": 0.5676, "step": 30894 }, { "epoch": 0.9468861100894936, "grad_norm": 1.4011876261425187, "learning_rate": 7.37981804389809e-08, "loss": 0.5753, "step": 30895 }, { "epoch": 0.9469167586122349, "grad_norm": 1.2955228444582445, "learning_rate": 7.37132467130347e-08, "loss": 0.495, "step": 30896 }, { "epoch": 0.946947407134976, "grad_norm": 1.446641893203518, "learning_rate": 7.362836152705221e-08, "loss": 0.591, "step": 30897 }, { "epoch": 0.9469780556577173, "grad_norm": 1.383789432246649, "learning_rate": 7.354352488187e-08, "loss": 0.6786, "step": 30898 }, { "epoch": 0.9470087041804585, "grad_norm": 1.223361325653428, "learning_rate": 7.34587367783246e-08, "loss": 0.5613, "step": 30899 }, { "epoch": 0.9470393527031997, "grad_norm": 1.4322840108485646, "learning_rate": 7.33739972172498e-08, "loss": 0.6335, "step": 30900 }, { "epoch": 0.9470700012259409, "grad_norm": 0.46260281311193663, "learning_rate": 7.32893061994816e-08, "loss": 0.4159, "step": 30901 }, { "epoch": 0.9471006497486821, "grad_norm": 1.427459317703494, "learning_rate": 7.320466372585544e-08, "loss": 0.5666, "step": 30902 }, { "epoch": 0.9471312982714233, "grad_norm": 1.296926765384674, "learning_rate": 7.312006979720344e-08, "loss": 0.5432, "step": 30903 }, { "epoch": 0.9471619467941645, "grad_norm": 1.3593236710533378, "learning_rate": 7.30355244143599e-08, "loss": 0.6043, "step": 30904 }, { "epoch": 0.9471925953169057, "grad_norm": 1.3071754746203847, "learning_rate": 7.295102757815864e-08, "loss": 0.5141, "step": 30905 }, { "epoch": 0.947223243839647, "grad_norm": 1.5117222621312727, "learning_rate": 7.286657928943064e-08, "loss": 0.5927, "step": 30906 }, { "epoch": 0.9472538923623881, "grad_norm": 1.2721575071469804, "learning_rate": 7.278217954900968e-08, "loss": 0.55, "step": 30907 }, { "epoch": 0.9472845408851294, "grad_norm": 1.2802049324975138, "learning_rate": 7.269782835772621e-08, "loss": 0.6843, "step": 30908 }, { "epoch": 0.9473151894078705, "grad_norm": 1.399637949212086, "learning_rate": 7.261352571641179e-08, "loss": 0.5948, "step": 30909 }, { "epoch": 0.9473458379306118, "grad_norm": 1.3262909190504593, "learning_rate": 7.252927162589684e-08, "loss": 0.6075, "step": 30910 }, { "epoch": 0.9473764864533529, "grad_norm": 1.209374676737108, "learning_rate": 7.244506608701186e-08, "loss": 0.6069, "step": 30911 }, { "epoch": 0.9474071349760942, "grad_norm": 1.353093018044314, "learning_rate": 7.236090910058668e-08, "loss": 0.5581, "step": 30912 }, { "epoch": 0.9474377834988353, "grad_norm": 1.294448336397343, "learning_rate": 7.227680066745013e-08, "loss": 0.5717, "step": 30913 }, { "epoch": 0.9474684320215766, "grad_norm": 1.264673394297993, "learning_rate": 7.219274078843097e-08, "loss": 0.4829, "step": 30914 }, { "epoch": 0.9474990805443178, "grad_norm": 1.3809653195604572, "learning_rate": 7.210872946435743e-08, "loss": 0.6302, "step": 30915 }, { "epoch": 0.947529729067059, "grad_norm": 1.50021091460763, "learning_rate": 7.202476669605774e-08, "loss": 0.5982, "step": 30916 }, { "epoch": 0.9475603775898002, "grad_norm": 0.4520181813124699, "learning_rate": 7.194085248435844e-08, "loss": 0.3704, "step": 30917 }, { "epoch": 0.9475910261125414, "grad_norm": 1.4188951399122052, "learning_rate": 7.18569868300878e-08, "loss": 0.6247, "step": 30918 }, { "epoch": 0.9476216746352826, "grad_norm": 1.3265178520150271, "learning_rate": 7.177316973407011e-08, "loss": 0.5655, "step": 30919 }, { "epoch": 0.9476523231580238, "grad_norm": 1.4224169433152256, "learning_rate": 7.168940119713252e-08, "loss": 0.58, "step": 30920 }, { "epoch": 0.947682971680765, "grad_norm": 1.3528673389704977, "learning_rate": 7.160568122010103e-08, "loss": 0.5603, "step": 30921 }, { "epoch": 0.9477136202035062, "grad_norm": 1.4658631707516676, "learning_rate": 7.152200980379887e-08, "loss": 0.5473, "step": 30922 }, { "epoch": 0.9477442687262474, "grad_norm": 1.3969343987723941, "learning_rate": 7.143838694905148e-08, "loss": 0.5875, "step": 30923 }, { "epoch": 0.9477749172489885, "grad_norm": 0.44448922847402506, "learning_rate": 7.13548126566832e-08, "loss": 0.3817, "step": 30924 }, { "epoch": 0.9478055657717298, "grad_norm": 1.4202877561164595, "learning_rate": 7.127128692751617e-08, "loss": 0.5911, "step": 30925 }, { "epoch": 0.947836214294471, "grad_norm": 1.3939575460033475, "learning_rate": 7.118780976237471e-08, "loss": 0.6072, "step": 30926 }, { "epoch": 0.9478668628172122, "grad_norm": 1.2048566838290866, "learning_rate": 7.110438116208096e-08, "loss": 0.5471, "step": 30927 }, { "epoch": 0.9478975113399534, "grad_norm": 0.44457973054655797, "learning_rate": 7.102100112745702e-08, "loss": 0.4027, "step": 30928 }, { "epoch": 0.9479281598626946, "grad_norm": 1.3186139429913313, "learning_rate": 7.093766965932392e-08, "loss": 0.5425, "step": 30929 }, { "epoch": 0.9479588083854358, "grad_norm": 1.5028598360871104, "learning_rate": 7.08543867585032e-08, "loss": 0.5778, "step": 30930 }, { "epoch": 0.947989456908177, "grad_norm": 1.1842134553003492, "learning_rate": 7.077115242581534e-08, "loss": 0.6697, "step": 30931 }, { "epoch": 0.9480201054309182, "grad_norm": 0.42846198696209603, "learning_rate": 7.068796666208078e-08, "loss": 0.3867, "step": 30932 }, { "epoch": 0.9480507539536595, "grad_norm": 1.1881530685922645, "learning_rate": 7.060482946811831e-08, "loss": 0.549, "step": 30933 }, { "epoch": 0.9480814024764006, "grad_norm": 1.2373264967492297, "learning_rate": 7.052174084474784e-08, "loss": 0.5973, "step": 30934 }, { "epoch": 0.9481120509991419, "grad_norm": 1.3307990448932439, "learning_rate": 7.043870079278869e-08, "loss": 0.4832, "step": 30935 }, { "epoch": 0.948142699521883, "grad_norm": 1.3635069229894308, "learning_rate": 7.035570931305746e-08, "loss": 0.6509, "step": 30936 }, { "epoch": 0.9481733480446243, "grad_norm": 1.4897428460160222, "learning_rate": 7.027276640637293e-08, "loss": 0.628, "step": 30937 }, { "epoch": 0.9482039965673654, "grad_norm": 1.3477260879907953, "learning_rate": 7.018987207355276e-08, "loss": 0.5818, "step": 30938 }, { "epoch": 0.9482346450901067, "grad_norm": 1.3709478126857657, "learning_rate": 7.010702631541245e-08, "loss": 0.588, "step": 30939 }, { "epoch": 0.9482652936128478, "grad_norm": 1.3233661486216592, "learning_rate": 7.002422913276907e-08, "loss": 0.6143, "step": 30940 }, { "epoch": 0.9482959421355891, "grad_norm": 1.608934327939718, "learning_rate": 6.994148052643868e-08, "loss": 0.595, "step": 30941 }, { "epoch": 0.9483265906583302, "grad_norm": 1.3178120206948294, "learning_rate": 6.98587804972356e-08, "loss": 0.6143, "step": 30942 }, { "epoch": 0.9483572391810715, "grad_norm": 0.42161878265131586, "learning_rate": 6.977612904597586e-08, "loss": 0.3838, "step": 30943 }, { "epoch": 0.9483878877038127, "grad_norm": 1.3537038706022129, "learning_rate": 6.969352617347325e-08, "loss": 0.6473, "step": 30944 }, { "epoch": 0.9484185362265539, "grad_norm": 1.3488185582152756, "learning_rate": 6.961097188054211e-08, "loss": 0.5884, "step": 30945 }, { "epoch": 0.9484491847492951, "grad_norm": 1.4435067698200057, "learning_rate": 6.952846616799569e-08, "loss": 0.6696, "step": 30946 }, { "epoch": 0.9484798332720363, "grad_norm": 1.399428797104713, "learning_rate": 6.944600903664612e-08, "loss": 0.622, "step": 30947 }, { "epoch": 0.9485104817947775, "grad_norm": 1.5435445969776713, "learning_rate": 6.936360048730718e-08, "loss": 0.5633, "step": 30948 }, { "epoch": 0.9485411303175187, "grad_norm": 0.44432733694750576, "learning_rate": 6.928124052078933e-08, "loss": 0.3899, "step": 30949 }, { "epoch": 0.9485717788402599, "grad_norm": 1.5980717194023089, "learning_rate": 6.919892913790582e-08, "loss": 0.5675, "step": 30950 }, { "epoch": 0.9486024273630012, "grad_norm": 1.2975949436569998, "learning_rate": 6.911666633946712e-08, "loss": 0.5325, "step": 30951 }, { "epoch": 0.9486330758857423, "grad_norm": 1.1523524811061403, "learning_rate": 6.903445212628257e-08, "loss": 0.6107, "step": 30952 }, { "epoch": 0.9486637244084836, "grad_norm": 1.4611755121810137, "learning_rate": 6.895228649916374e-08, "loss": 0.4695, "step": 30953 }, { "epoch": 0.9486943729312247, "grad_norm": 1.3194426593482815, "learning_rate": 6.887016945892e-08, "loss": 0.6508, "step": 30954 }, { "epoch": 0.9487250214539659, "grad_norm": 1.5659568126018366, "learning_rate": 6.878810100635958e-08, "loss": 0.5965, "step": 30955 }, { "epoch": 0.9487556699767071, "grad_norm": 1.361570425443625, "learning_rate": 6.870608114229183e-08, "loss": 0.556, "step": 30956 }, { "epoch": 0.9487863184994483, "grad_norm": 1.9252863571134002, "learning_rate": 6.8624109867525e-08, "loss": 0.6401, "step": 30957 }, { "epoch": 0.9488169670221895, "grad_norm": 1.4761358985542188, "learning_rate": 6.854218718286676e-08, "loss": 0.612, "step": 30958 }, { "epoch": 0.9488476155449307, "grad_norm": 1.4597083166777405, "learning_rate": 6.846031308912371e-08, "loss": 0.5858, "step": 30959 }, { "epoch": 0.948878264067672, "grad_norm": 1.28297225970469, "learning_rate": 6.837848758710241e-08, "loss": 0.5711, "step": 30960 }, { "epoch": 0.9489089125904131, "grad_norm": 1.2890725233662994, "learning_rate": 6.829671067761112e-08, "loss": 0.6237, "step": 30961 }, { "epoch": 0.9489395611131544, "grad_norm": 1.2108881780031637, "learning_rate": 6.821498236145363e-08, "loss": 0.5224, "step": 30962 }, { "epoch": 0.9489702096358955, "grad_norm": 0.43634179919431276, "learning_rate": 6.81333026394354e-08, "loss": 0.3924, "step": 30963 }, { "epoch": 0.9490008581586368, "grad_norm": 1.3791968030235364, "learning_rate": 6.805167151236137e-08, "loss": 0.5761, "step": 30964 }, { "epoch": 0.9490315066813779, "grad_norm": 0.43174960098972703, "learning_rate": 6.797008898103697e-08, "loss": 0.3912, "step": 30965 }, { "epoch": 0.9490621552041192, "grad_norm": 1.3152189530258125, "learning_rate": 6.788855504626435e-08, "loss": 0.5392, "step": 30966 }, { "epoch": 0.9490928037268603, "grad_norm": 1.3187387996584583, "learning_rate": 6.780706970884788e-08, "loss": 0.6069, "step": 30967 }, { "epoch": 0.9491234522496016, "grad_norm": 1.2175134419941698, "learning_rate": 6.772563296959079e-08, "loss": 0.5579, "step": 30968 }, { "epoch": 0.9491541007723427, "grad_norm": 1.3132409723906233, "learning_rate": 6.764424482929465e-08, "loss": 0.6387, "step": 30969 }, { "epoch": 0.949184749295084, "grad_norm": 1.4145292262699038, "learning_rate": 6.75629052887622e-08, "loss": 0.597, "step": 30970 }, { "epoch": 0.9492153978178252, "grad_norm": 1.3559462489551928, "learning_rate": 6.748161434879386e-08, "loss": 0.6129, "step": 30971 }, { "epoch": 0.9492460463405664, "grad_norm": 0.462203809633993, "learning_rate": 6.740037201019179e-08, "loss": 0.3948, "step": 30972 }, { "epoch": 0.9492766948633076, "grad_norm": 1.4287052810270866, "learning_rate": 6.731917827375589e-08, "loss": 0.5886, "step": 30973 }, { "epoch": 0.9493073433860488, "grad_norm": 1.3335264211608688, "learning_rate": 6.723803314028554e-08, "loss": 0.6, "step": 30974 }, { "epoch": 0.94933799190879, "grad_norm": 1.2842649154317531, "learning_rate": 6.71569366105812e-08, "loss": 0.6542, "step": 30975 }, { "epoch": 0.9493686404315312, "grad_norm": 1.4915341013911123, "learning_rate": 6.707588868544168e-08, "loss": 0.6075, "step": 30976 }, { "epoch": 0.9493992889542724, "grad_norm": 1.4165532263823097, "learning_rate": 6.699488936566634e-08, "loss": 0.6544, "step": 30977 }, { "epoch": 0.9494299374770137, "grad_norm": 1.4327174360450843, "learning_rate": 6.691393865205176e-08, "loss": 0.6484, "step": 30978 }, { "epoch": 0.9494605859997548, "grad_norm": 1.480189523607967, "learning_rate": 6.683303654539619e-08, "loss": 0.6187, "step": 30979 }, { "epoch": 0.9494912345224961, "grad_norm": 1.4004864737370348, "learning_rate": 6.675218304649733e-08, "loss": 0.5749, "step": 30980 }, { "epoch": 0.9495218830452372, "grad_norm": 1.2358955718752924, "learning_rate": 6.667137815615176e-08, "loss": 0.5375, "step": 30981 }, { "epoch": 0.9495525315679785, "grad_norm": 1.2255329403757875, "learning_rate": 6.659062187515498e-08, "loss": 0.6391, "step": 30982 }, { "epoch": 0.9495831800907196, "grad_norm": 1.6619478449130987, "learning_rate": 6.650991420430241e-08, "loss": 0.7277, "step": 30983 }, { "epoch": 0.9496138286134609, "grad_norm": 1.3829417108701374, "learning_rate": 6.642925514439125e-08, "loss": 0.6123, "step": 30984 }, { "epoch": 0.949644477136202, "grad_norm": 1.2608887253060113, "learning_rate": 6.634864469621361e-08, "loss": 0.5539, "step": 30985 }, { "epoch": 0.9496751256589432, "grad_norm": 1.3789371292659898, "learning_rate": 6.626808286056607e-08, "loss": 0.6035, "step": 30986 }, { "epoch": 0.9497057741816844, "grad_norm": 1.317775966054238, "learning_rate": 6.61875696382408e-08, "loss": 0.6815, "step": 30987 }, { "epoch": 0.9497364227044256, "grad_norm": 1.2142968020597062, "learning_rate": 6.610710503003214e-08, "loss": 0.611, "step": 30988 }, { "epoch": 0.9497670712271669, "grad_norm": 1.2147788781490878, "learning_rate": 6.602668903673226e-08, "loss": 0.5096, "step": 30989 }, { "epoch": 0.949797719749908, "grad_norm": 0.4553921808086755, "learning_rate": 6.59463216591344e-08, "loss": 0.3788, "step": 30990 }, { "epoch": 0.9498283682726493, "grad_norm": 1.3158434884924215, "learning_rate": 6.586600289802958e-08, "loss": 0.5312, "step": 30991 }, { "epoch": 0.9498590167953904, "grad_norm": 1.2586766116016168, "learning_rate": 6.578573275420941e-08, "loss": 0.558, "step": 30992 }, { "epoch": 0.9498896653181317, "grad_norm": 1.3298596412784809, "learning_rate": 6.570551122846491e-08, "loss": 0.5288, "step": 30993 }, { "epoch": 0.9499203138408728, "grad_norm": 1.2650108436311407, "learning_rate": 6.562533832158657e-08, "loss": 0.4834, "step": 30994 }, { "epoch": 0.9499509623636141, "grad_norm": 1.3231996407874818, "learning_rate": 6.554521403436376e-08, "loss": 0.6502, "step": 30995 }, { "epoch": 0.9499816108863552, "grad_norm": 1.4351321953272622, "learning_rate": 6.54651383675875e-08, "loss": 0.6506, "step": 30996 }, { "epoch": 0.9500122594090965, "grad_norm": 1.390671996334715, "learning_rate": 6.538511132204495e-08, "loss": 0.4891, "step": 30997 }, { "epoch": 0.9500429079318377, "grad_norm": 1.3654294844992665, "learning_rate": 6.530513289852603e-08, "loss": 0.5871, "step": 30998 }, { "epoch": 0.9500735564545789, "grad_norm": 1.5286788239102622, "learning_rate": 6.522520309781788e-08, "loss": 0.6611, "step": 30999 }, { "epoch": 0.9501042049773201, "grad_norm": 1.2630593411783633, "learning_rate": 6.514532192070876e-08, "loss": 0.6033, "step": 31000 }, { "epoch": 0.9501348535000613, "grad_norm": 0.4585725574777806, "learning_rate": 6.506548936798474e-08, "loss": 0.3862, "step": 31001 }, { "epoch": 0.9501655020228025, "grad_norm": 1.284055748333401, "learning_rate": 6.498570544043348e-08, "loss": 0.6409, "step": 31002 }, { "epoch": 0.9501961505455437, "grad_norm": 1.3618839424928044, "learning_rate": 6.490597013884103e-08, "loss": 0.6587, "step": 31003 }, { "epoch": 0.9502267990682849, "grad_norm": 1.3762921007257236, "learning_rate": 6.482628346399289e-08, "loss": 0.6104, "step": 31004 }, { "epoch": 0.9502574475910261, "grad_norm": 1.3525931231929973, "learning_rate": 6.474664541667341e-08, "loss": 0.623, "step": 31005 }, { "epoch": 0.9502880961137673, "grad_norm": 1.2953406314074398, "learning_rate": 6.466705599766809e-08, "loss": 0.6444, "step": 31006 }, { "epoch": 0.9503187446365086, "grad_norm": 1.3914407863661775, "learning_rate": 6.45875152077613e-08, "loss": 0.6453, "step": 31007 }, { "epoch": 0.9503493931592497, "grad_norm": 1.4059336849145714, "learning_rate": 6.450802304773629e-08, "loss": 0.6419, "step": 31008 }, { "epoch": 0.950380041681991, "grad_norm": 1.2671068280754068, "learning_rate": 6.442857951837689e-08, "loss": 0.5679, "step": 31009 }, { "epoch": 0.9504106902047321, "grad_norm": 1.4139966648403457, "learning_rate": 6.434918462046525e-08, "loss": 0.6775, "step": 31010 }, { "epoch": 0.9504413387274734, "grad_norm": 0.4538379784854887, "learning_rate": 6.426983835478462e-08, "loss": 0.4002, "step": 31011 }, { "epoch": 0.9504719872502145, "grad_norm": 1.2244532627355793, "learning_rate": 6.419054072211494e-08, "loss": 0.6, "step": 31012 }, { "epoch": 0.9505026357729558, "grad_norm": 1.4405713795269945, "learning_rate": 6.411129172323949e-08, "loss": 0.6741, "step": 31013 }, { "epoch": 0.950533284295697, "grad_norm": 1.380203858839591, "learning_rate": 6.403209135893818e-08, "loss": 0.4833, "step": 31014 }, { "epoch": 0.9505639328184382, "grad_norm": 1.3818188332654395, "learning_rate": 6.39529396299915e-08, "loss": 0.627, "step": 31015 }, { "epoch": 0.9505945813411794, "grad_norm": 0.4400460888168732, "learning_rate": 6.387383653717938e-08, "loss": 0.3807, "step": 31016 }, { "epoch": 0.9506252298639205, "grad_norm": 1.3890838473924105, "learning_rate": 6.379478208128176e-08, "loss": 0.6148, "step": 31017 }, { "epoch": 0.9506558783866618, "grad_norm": 1.29208448427465, "learning_rate": 6.37157762630769e-08, "loss": 0.6084, "step": 31018 }, { "epoch": 0.9506865269094029, "grad_norm": 1.439869089652473, "learning_rate": 6.363681908334307e-08, "loss": 0.6073, "step": 31019 }, { "epoch": 0.9507171754321442, "grad_norm": 1.4682045409319728, "learning_rate": 6.355791054285908e-08, "loss": 0.7321, "step": 31020 }, { "epoch": 0.9507478239548853, "grad_norm": 1.2006868337223093, "learning_rate": 6.347905064240211e-08, "loss": 0.5106, "step": 31021 }, { "epoch": 0.9507784724776266, "grad_norm": 1.1360988063692983, "learning_rate": 6.340023938274931e-08, "loss": 0.4472, "step": 31022 }, { "epoch": 0.9508091210003677, "grad_norm": 1.400084743851937, "learning_rate": 6.332147676467671e-08, "loss": 0.5918, "step": 31023 }, { "epoch": 0.950839769523109, "grad_norm": 1.3741655462470204, "learning_rate": 6.324276278896091e-08, "loss": 0.6946, "step": 31024 }, { "epoch": 0.9508704180458502, "grad_norm": 1.2441127791944202, "learning_rate": 6.316409745637686e-08, "loss": 0.5334, "step": 31025 }, { "epoch": 0.9509010665685914, "grad_norm": 0.44861319984010123, "learning_rate": 6.30854807677006e-08, "loss": 0.3863, "step": 31026 }, { "epoch": 0.9509317150913326, "grad_norm": 1.3776873006220633, "learning_rate": 6.300691272370596e-08, "loss": 0.5612, "step": 31027 }, { "epoch": 0.9509623636140738, "grad_norm": 1.3737525520565366, "learning_rate": 6.292839332516731e-08, "loss": 0.586, "step": 31028 }, { "epoch": 0.950993012136815, "grad_norm": 1.3661158859927243, "learning_rate": 6.284992257285904e-08, "loss": 0.6266, "step": 31029 }, { "epoch": 0.9510236606595562, "grad_norm": 1.4147810002888772, "learning_rate": 6.277150046755276e-08, "loss": 0.5943, "step": 31030 }, { "epoch": 0.9510543091822974, "grad_norm": 1.3147381317577653, "learning_rate": 6.269312701002284e-08, "loss": 0.6675, "step": 31031 }, { "epoch": 0.9510849577050386, "grad_norm": 1.4785508134775576, "learning_rate": 6.261480220104088e-08, "loss": 0.5518, "step": 31032 }, { "epoch": 0.9511156062277798, "grad_norm": 1.3301025823635582, "learning_rate": 6.253652604137794e-08, "loss": 0.5848, "step": 31033 }, { "epoch": 0.9511462547505211, "grad_norm": 1.38081912675967, "learning_rate": 6.245829853180618e-08, "loss": 0.6713, "step": 31034 }, { "epoch": 0.9511769032732622, "grad_norm": 1.40207531176067, "learning_rate": 6.23801196730961e-08, "loss": 0.5771, "step": 31035 }, { "epoch": 0.9512075517960035, "grad_norm": 1.3248036249581043, "learning_rate": 6.230198946601818e-08, "loss": 0.5931, "step": 31036 }, { "epoch": 0.9512382003187446, "grad_norm": 1.3472629844419635, "learning_rate": 6.222390791134236e-08, "loss": 0.602, "step": 31037 }, { "epoch": 0.9512688488414859, "grad_norm": 0.4530392615381174, "learning_rate": 6.214587500983748e-08, "loss": 0.3857, "step": 31038 }, { "epoch": 0.951299497364227, "grad_norm": 1.6146720707253637, "learning_rate": 6.206789076227238e-08, "loss": 0.5977, "step": 31039 }, { "epoch": 0.9513301458869683, "grad_norm": 1.4928871331732332, "learning_rate": 6.198995516941642e-08, "loss": 0.5477, "step": 31040 }, { "epoch": 0.9513607944097094, "grad_norm": 1.485598970910669, "learning_rate": 6.191206823203622e-08, "loss": 0.5917, "step": 31041 }, { "epoch": 0.9513914429324507, "grad_norm": 1.4982020946703207, "learning_rate": 6.183422995090005e-08, "loss": 0.5453, "step": 31042 }, { "epoch": 0.9514220914551919, "grad_norm": 1.3234504780442935, "learning_rate": 6.175644032677508e-08, "loss": 0.5963, "step": 31043 }, { "epoch": 0.9514527399779331, "grad_norm": 1.31621425525495, "learning_rate": 6.167869936042737e-08, "loss": 0.5856, "step": 31044 }, { "epoch": 0.9514833885006743, "grad_norm": 1.214588965642296, "learning_rate": 6.160100705262295e-08, "loss": 0.6245, "step": 31045 }, { "epoch": 0.9515140370234155, "grad_norm": 1.4457808167612618, "learning_rate": 6.152336340412679e-08, "loss": 0.6057, "step": 31046 }, { "epoch": 0.9515446855461567, "grad_norm": 0.44176321068781554, "learning_rate": 6.144576841570494e-08, "loss": 0.376, "step": 31047 }, { "epoch": 0.9515753340688978, "grad_norm": 1.9067440745318005, "learning_rate": 6.136822208812121e-08, "loss": 0.5619, "step": 31048 }, { "epoch": 0.9516059825916391, "grad_norm": 1.4874881067252834, "learning_rate": 6.129072442214057e-08, "loss": 0.7128, "step": 31049 }, { "epoch": 0.9516366311143802, "grad_norm": 1.369279121576374, "learning_rate": 6.121327541852517e-08, "loss": 0.6166, "step": 31050 }, { "epoch": 0.9516672796371215, "grad_norm": 1.3225446588671843, "learning_rate": 6.113587507803997e-08, "loss": 0.5384, "step": 31051 }, { "epoch": 0.9516979281598626, "grad_norm": 1.4065637305299798, "learning_rate": 6.105852340144602e-08, "loss": 0.6252, "step": 31052 }, { "epoch": 0.9517285766826039, "grad_norm": 1.4709602296586048, "learning_rate": 6.098122038950605e-08, "loss": 0.5542, "step": 31053 }, { "epoch": 0.9517592252053451, "grad_norm": 1.402380991734985, "learning_rate": 6.090396604298276e-08, "loss": 0.5549, "step": 31054 }, { "epoch": 0.9517898737280863, "grad_norm": 1.4096920969473974, "learning_rate": 6.082676036263558e-08, "loss": 0.601, "step": 31055 }, { "epoch": 0.9518205222508275, "grad_norm": 1.3303216464381904, "learning_rate": 6.074960334922609e-08, "loss": 0.5842, "step": 31056 }, { "epoch": 0.9518511707735687, "grad_norm": 1.2843651261612512, "learning_rate": 6.06724950035148e-08, "loss": 0.5041, "step": 31057 }, { "epoch": 0.9518818192963099, "grad_norm": 1.2687843439661555, "learning_rate": 6.059543532626111e-08, "loss": 0.5781, "step": 31058 }, { "epoch": 0.9519124678190511, "grad_norm": 1.249213369840252, "learning_rate": 6.051842431822442e-08, "loss": 0.5526, "step": 31059 }, { "epoch": 0.9519431163417923, "grad_norm": 1.3607453741411366, "learning_rate": 6.044146198016299e-08, "loss": 0.6005, "step": 31060 }, { "epoch": 0.9519737648645336, "grad_norm": 1.3225656904323506, "learning_rate": 6.036454831283623e-08, "loss": 0.5712, "step": 31061 }, { "epoch": 0.9520044133872747, "grad_norm": 0.4458750513396376, "learning_rate": 6.02876833170013e-08, "loss": 0.394, "step": 31062 }, { "epoch": 0.952035061910016, "grad_norm": 1.330317923597252, "learning_rate": 6.021086699341594e-08, "loss": 0.5689, "step": 31063 }, { "epoch": 0.9520657104327571, "grad_norm": 1.6380328036033458, "learning_rate": 6.01340993428362e-08, "loss": 0.6755, "step": 31064 }, { "epoch": 0.9520963589554984, "grad_norm": 1.4440625027779397, "learning_rate": 6.005738036601982e-08, "loss": 0.5346, "step": 31065 }, { "epoch": 0.9521270074782395, "grad_norm": 1.4789161986243178, "learning_rate": 5.998071006372175e-08, "loss": 0.6175, "step": 31066 }, { "epoch": 0.9521576560009808, "grad_norm": 1.4583691866877675, "learning_rate": 5.990408843669803e-08, "loss": 0.6376, "step": 31067 }, { "epoch": 0.9521883045237219, "grad_norm": 1.3813477754833352, "learning_rate": 5.982751548570253e-08, "loss": 0.6497, "step": 31068 }, { "epoch": 0.9522189530464632, "grad_norm": 0.4587188214287032, "learning_rate": 5.975099121149074e-08, "loss": 0.3854, "step": 31069 }, { "epoch": 0.9522496015692044, "grad_norm": 1.2196581580592682, "learning_rate": 5.967451561481708e-08, "loss": 0.5585, "step": 31070 }, { "epoch": 0.9522802500919456, "grad_norm": 1.3473953276547739, "learning_rate": 5.959808869643369e-08, "loss": 0.6031, "step": 31071 }, { "epoch": 0.9523108986146868, "grad_norm": 1.6565748672701646, "learning_rate": 5.952171045709443e-08, "loss": 0.5864, "step": 31072 }, { "epoch": 0.952341547137428, "grad_norm": 0.4491251201876375, "learning_rate": 5.944538089755258e-08, "loss": 0.3888, "step": 31073 }, { "epoch": 0.9523721956601692, "grad_norm": 1.301490906028536, "learning_rate": 5.936910001855867e-08, "loss": 0.6963, "step": 31074 }, { "epoch": 0.9524028441829104, "grad_norm": 1.3483987088524925, "learning_rate": 5.929286782086541e-08, "loss": 0.6777, "step": 31075 }, { "epoch": 0.9524334927056516, "grad_norm": 1.478688408235775, "learning_rate": 5.921668430522387e-08, "loss": 0.5555, "step": 31076 }, { "epoch": 0.9524641412283928, "grad_norm": 1.2944474864974123, "learning_rate": 5.914054947238457e-08, "loss": 0.6008, "step": 31077 }, { "epoch": 0.952494789751134, "grad_norm": 0.4385717114189952, "learning_rate": 5.906446332309745e-08, "loss": 0.3808, "step": 31078 }, { "epoch": 0.9525254382738751, "grad_norm": 0.45015035251001767, "learning_rate": 5.898842585811193e-08, "loss": 0.4022, "step": 31079 }, { "epoch": 0.9525560867966164, "grad_norm": 1.342450345997603, "learning_rate": 5.8912437078177953e-08, "loss": 0.5937, "step": 31080 }, { "epoch": 0.9525867353193576, "grad_norm": 1.358301467267918, "learning_rate": 5.883649698404437e-08, "loss": 0.6454, "step": 31081 }, { "epoch": 0.9526173838420988, "grad_norm": 1.780156655755498, "learning_rate": 5.876060557645835e-08, "loss": 0.6215, "step": 31082 }, { "epoch": 0.95264803236484, "grad_norm": 1.4705507408202667, "learning_rate": 5.8684762856168756e-08, "loss": 0.5734, "step": 31083 }, { "epoch": 0.9526786808875812, "grad_norm": 1.6640679098050282, "learning_rate": 5.8608968823922754e-08, "loss": 0.5745, "step": 31084 }, { "epoch": 0.9527093294103224, "grad_norm": 1.391025597659071, "learning_rate": 5.8533223480466417e-08, "loss": 0.6228, "step": 31085 }, { "epoch": 0.9527399779330636, "grad_norm": 1.4581656677237707, "learning_rate": 5.845752682654693e-08, "loss": 0.5835, "step": 31086 }, { "epoch": 0.9527706264558048, "grad_norm": 1.3778513244234967, "learning_rate": 5.838187886290925e-08, "loss": 0.6331, "step": 31087 }, { "epoch": 0.952801274978546, "grad_norm": 1.3497738557331378, "learning_rate": 5.8306279590299444e-08, "loss": 0.57, "step": 31088 }, { "epoch": 0.9528319235012872, "grad_norm": 1.8257248160024158, "learning_rate": 5.823072900946303e-08, "loss": 0.6087, "step": 31089 }, { "epoch": 0.9528625720240285, "grad_norm": 1.6154905749036648, "learning_rate": 5.815522712114274e-08, "loss": 0.7335, "step": 31090 }, { "epoch": 0.9528932205467696, "grad_norm": 1.3947616669635485, "learning_rate": 5.8079773926083546e-08, "loss": 0.6339, "step": 31091 }, { "epoch": 0.9529238690695109, "grad_norm": 1.2760466829342991, "learning_rate": 5.80043694250293e-08, "loss": 0.5498, "step": 31092 }, { "epoch": 0.952954517592252, "grad_norm": 1.2024425869758584, "learning_rate": 5.792901361872216e-08, "loss": 0.4818, "step": 31093 }, { "epoch": 0.9529851661149933, "grad_norm": 1.4584781051530156, "learning_rate": 5.7853706507904337e-08, "loss": 0.5894, "step": 31094 }, { "epoch": 0.9530158146377344, "grad_norm": 1.3960010988094995, "learning_rate": 5.7778448093319115e-08, "loss": 0.5373, "step": 31095 }, { "epoch": 0.9530464631604757, "grad_norm": 1.4341105660084608, "learning_rate": 5.770323837570757e-08, "loss": 0.6365, "step": 31096 }, { "epoch": 0.9530771116832168, "grad_norm": 1.4768398226951824, "learning_rate": 5.762807735581022e-08, "loss": 0.5724, "step": 31097 }, { "epoch": 0.9531077602059581, "grad_norm": 1.4006997966019057, "learning_rate": 5.755296503436758e-08, "loss": 0.6142, "step": 31098 }, { "epoch": 0.9531384087286993, "grad_norm": 1.301069491491008, "learning_rate": 5.747790141212073e-08, "loss": 0.4314, "step": 31099 }, { "epoch": 0.9531690572514405, "grad_norm": 1.417248650180595, "learning_rate": 5.7402886489809075e-08, "loss": 0.5669, "step": 31100 }, { "epoch": 0.9531997057741817, "grad_norm": 1.2985117835691453, "learning_rate": 5.7327920268170356e-08, "loss": 0.5847, "step": 31101 }, { "epoch": 0.9532303542969229, "grad_norm": 1.2413810965629857, "learning_rate": 5.725300274794454e-08, "loss": 0.5467, "step": 31102 }, { "epoch": 0.9532610028196641, "grad_norm": 1.331909919428154, "learning_rate": 5.717813392986993e-08, "loss": 0.548, "step": 31103 }, { "epoch": 0.9532916513424053, "grad_norm": 1.4866516933761884, "learning_rate": 5.71033138146837e-08, "loss": 0.5982, "step": 31104 }, { "epoch": 0.9533222998651465, "grad_norm": 0.4520842305911822, "learning_rate": 5.702854240312361e-08, "loss": 0.4142, "step": 31105 }, { "epoch": 0.9533529483878878, "grad_norm": 1.5741479177881093, "learning_rate": 5.6953819695925175e-08, "loss": 0.5947, "step": 31106 }, { "epoch": 0.9533835969106289, "grad_norm": 1.5484664138950488, "learning_rate": 5.6879145693826133e-08, "loss": 0.5998, "step": 31107 }, { "epoch": 0.9534142454333702, "grad_norm": 1.4513153962128191, "learning_rate": 5.6804520397561456e-08, "loss": 0.6049, "step": 31108 }, { "epoch": 0.9534448939561113, "grad_norm": 1.4661244147089907, "learning_rate": 5.672994380786667e-08, "loss": 0.6541, "step": 31109 }, { "epoch": 0.9534755424788525, "grad_norm": 1.295493667180956, "learning_rate": 5.665541592547619e-08, "loss": 0.576, "step": 31110 }, { "epoch": 0.9535061910015937, "grad_norm": 1.2650964189891165, "learning_rate": 5.6580936751125526e-08, "loss": 0.5637, "step": 31111 }, { "epoch": 0.9535368395243349, "grad_norm": 1.6381942314726132, "learning_rate": 5.650650628554688e-08, "loss": 0.5706, "step": 31112 }, { "epoch": 0.9535674880470761, "grad_norm": 1.3016616074611178, "learning_rate": 5.643212452947466e-08, "loss": 0.5964, "step": 31113 }, { "epoch": 0.9535981365698173, "grad_norm": 1.2661282078252785, "learning_rate": 5.635779148364162e-08, "loss": 0.5837, "step": 31114 }, { "epoch": 0.9536287850925586, "grad_norm": 1.2651062012924346, "learning_rate": 5.6283507148780505e-08, "loss": 0.5579, "step": 31115 }, { "epoch": 0.9536594336152997, "grad_norm": 0.4504258885357624, "learning_rate": 5.6209271525622385e-08, "loss": 0.3814, "step": 31116 }, { "epoch": 0.953690082138041, "grad_norm": 1.3847493228168506, "learning_rate": 5.613508461489947e-08, "loss": 0.6538, "step": 31117 }, { "epoch": 0.9537207306607821, "grad_norm": 1.380907632674369, "learning_rate": 5.6060946417342276e-08, "loss": 0.5864, "step": 31118 }, { "epoch": 0.9537513791835234, "grad_norm": 1.32818759644895, "learning_rate": 5.598685693368189e-08, "loss": 0.608, "step": 31119 }, { "epoch": 0.9537820277062645, "grad_norm": 1.4642999617302856, "learning_rate": 5.591281616464772e-08, "loss": 0.5608, "step": 31120 }, { "epoch": 0.9538126762290058, "grad_norm": 1.2346899813333803, "learning_rate": 5.5838824110969745e-08, "loss": 0.6221, "step": 31121 }, { "epoch": 0.9538433247517469, "grad_norm": 1.5392760245466657, "learning_rate": 5.5764880773376826e-08, "loss": 0.5739, "step": 31122 }, { "epoch": 0.9538739732744882, "grad_norm": 1.3480783901462798, "learning_rate": 5.5690986152597824e-08, "loss": 0.5774, "step": 31123 }, { "epoch": 0.9539046217972293, "grad_norm": 0.4554931602001818, "learning_rate": 5.5617140249359934e-08, "loss": 0.4144, "step": 31124 }, { "epoch": 0.9539352703199706, "grad_norm": 1.4603424669809881, "learning_rate": 5.554334306439202e-08, "loss": 0.6474, "step": 31125 }, { "epoch": 0.9539659188427118, "grad_norm": 1.4163063068916228, "learning_rate": 5.5469594598420164e-08, "loss": 0.5162, "step": 31126 }, { "epoch": 0.953996567365453, "grad_norm": 1.3562411131794478, "learning_rate": 5.5395894852172116e-08, "loss": 0.5812, "step": 31127 }, { "epoch": 0.9540272158881942, "grad_norm": 1.3251221195928509, "learning_rate": 5.532224382637286e-08, "loss": 0.6329, "step": 31128 }, { "epoch": 0.9540578644109354, "grad_norm": 1.260312261515674, "learning_rate": 5.5248641521749024e-08, "loss": 0.6167, "step": 31129 }, { "epoch": 0.9540885129336766, "grad_norm": 1.328735791689122, "learning_rate": 5.5175087939025596e-08, "loss": 0.6377, "step": 31130 }, { "epoch": 0.9541191614564178, "grad_norm": 0.4509399288418168, "learning_rate": 5.510158307892699e-08, "loss": 0.3859, "step": 31131 }, { "epoch": 0.954149809979159, "grad_norm": 1.3621033076417977, "learning_rate": 5.5028126942177626e-08, "loss": 0.6153, "step": 31132 }, { "epoch": 0.9541804585019003, "grad_norm": 1.4530180308148997, "learning_rate": 5.4954719529501376e-08, "loss": 0.4975, "step": 31133 }, { "epoch": 0.9542111070246414, "grad_norm": 1.3847768303210877, "learning_rate": 5.488136084162155e-08, "loss": 0.6092, "step": 31134 }, { "epoch": 0.9542417555473827, "grad_norm": 1.210538794627512, "learning_rate": 5.480805087926089e-08, "loss": 0.6453, "step": 31135 }, { "epoch": 0.9542724040701238, "grad_norm": 1.4694709318551828, "learning_rate": 5.473478964314216e-08, "loss": 0.6733, "step": 31136 }, { "epoch": 0.9543030525928651, "grad_norm": 1.427095475340849, "learning_rate": 5.4661577133986455e-08, "loss": 0.4674, "step": 31137 }, { "epoch": 0.9543337011156062, "grad_norm": 1.2735282651120352, "learning_rate": 5.458841335251597e-08, "loss": 0.6027, "step": 31138 }, { "epoch": 0.9543643496383475, "grad_norm": 1.3672517045987163, "learning_rate": 5.4515298299450126e-08, "loss": 0.6446, "step": 31139 }, { "epoch": 0.9543949981610886, "grad_norm": 1.4543153535076463, "learning_rate": 5.444223197551168e-08, "loss": 0.719, "step": 31140 }, { "epoch": 0.9544256466838298, "grad_norm": 1.5261978090058685, "learning_rate": 5.43692143814184e-08, "loss": 0.6706, "step": 31141 }, { "epoch": 0.954456295206571, "grad_norm": 1.0903586101365004, "learning_rate": 5.429624551789136e-08, "loss": 0.4638, "step": 31142 }, { "epoch": 0.9544869437293122, "grad_norm": 1.232952753967354, "learning_rate": 5.422332538564834e-08, "loss": 0.5893, "step": 31143 }, { "epoch": 0.9545175922520535, "grad_norm": 1.5440510480518355, "learning_rate": 5.4150453985408194e-08, "loss": 0.6884, "step": 31144 }, { "epoch": 0.9545482407747946, "grad_norm": 1.3210947953009837, "learning_rate": 5.407763131788979e-08, "loss": 0.6506, "step": 31145 }, { "epoch": 0.9545788892975359, "grad_norm": 1.485042397238618, "learning_rate": 5.400485738380923e-08, "loss": 0.6618, "step": 31146 }, { "epoch": 0.954609537820277, "grad_norm": 1.2911281173109987, "learning_rate": 5.393213218388482e-08, "loss": 0.5748, "step": 31147 }, { "epoch": 0.9546401863430183, "grad_norm": 1.371170191187934, "learning_rate": 5.3859455718832667e-08, "loss": 0.5357, "step": 31148 }, { "epoch": 0.9546708348657594, "grad_norm": 0.43763456277059853, "learning_rate": 5.3786827989368296e-08, "loss": 0.3949, "step": 31149 }, { "epoch": 0.9547014833885007, "grad_norm": 1.605433056804893, "learning_rate": 5.3714248996207804e-08, "loss": 0.5395, "step": 31150 }, { "epoch": 0.9547321319112418, "grad_norm": 1.365531587886231, "learning_rate": 5.364171874006674e-08, "loss": 0.5539, "step": 31151 }, { "epoch": 0.9547627804339831, "grad_norm": 1.430687532925143, "learning_rate": 5.3569237221659523e-08, "loss": 0.6407, "step": 31152 }, { "epoch": 0.9547934289567243, "grad_norm": 0.4642006216367357, "learning_rate": 5.3496804441700024e-08, "loss": 0.3935, "step": 31153 }, { "epoch": 0.9548240774794655, "grad_norm": 1.1976735937736962, "learning_rate": 5.342442040090212e-08, "loss": 0.6479, "step": 31154 }, { "epoch": 0.9548547260022067, "grad_norm": 1.2413305827788446, "learning_rate": 5.335208509997858e-08, "loss": 0.6107, "step": 31155 }, { "epoch": 0.9548853745249479, "grad_norm": 1.3961454320568023, "learning_rate": 5.327979853964327e-08, "loss": 0.5802, "step": 31156 }, { "epoch": 0.9549160230476891, "grad_norm": 1.538065764851681, "learning_rate": 5.320756072060784e-08, "loss": 0.5488, "step": 31157 }, { "epoch": 0.9549466715704303, "grad_norm": 1.2261233364813018, "learning_rate": 5.31353716435834e-08, "loss": 0.5547, "step": 31158 }, { "epoch": 0.9549773200931715, "grad_norm": 1.3978204230066353, "learning_rate": 5.3063231309282706e-08, "loss": 0.5958, "step": 31159 }, { "epoch": 0.9550079686159128, "grad_norm": 1.1757248858209952, "learning_rate": 5.299113971841463e-08, "loss": 0.7007, "step": 31160 }, { "epoch": 0.9550386171386539, "grad_norm": 1.3375537423300752, "learning_rate": 5.291909687169139e-08, "loss": 0.5317, "step": 31161 }, { "epoch": 0.9550692656613952, "grad_norm": 1.4633932080893743, "learning_rate": 5.2847102769821854e-08, "loss": 0.609, "step": 31162 }, { "epoch": 0.9550999141841363, "grad_norm": 1.2474994198700198, "learning_rate": 5.2775157413515464e-08, "loss": 0.6259, "step": 31163 }, { "epoch": 0.9551305627068776, "grad_norm": 1.4734456361276544, "learning_rate": 5.2703260803481645e-08, "loss": 0.6254, "step": 31164 }, { "epoch": 0.9551612112296187, "grad_norm": 1.3759263539410824, "learning_rate": 5.263141294042817e-08, "loss": 0.6145, "step": 31165 }, { "epoch": 0.95519185975236, "grad_norm": 1.374582111275511, "learning_rate": 5.2559613825062806e-08, "loss": 0.6443, "step": 31166 }, { "epoch": 0.9552225082751011, "grad_norm": 1.4456360665492474, "learning_rate": 5.2487863458093867e-08, "loss": 0.5958, "step": 31167 }, { "epoch": 0.9552531567978424, "grad_norm": 1.2115868366822249, "learning_rate": 5.2416161840228016e-08, "loss": 0.5818, "step": 31168 }, { "epoch": 0.9552838053205835, "grad_norm": 1.2799257302175775, "learning_rate": 5.234450897217136e-08, "loss": 0.5546, "step": 31169 }, { "epoch": 0.9553144538433248, "grad_norm": 0.4499269770442986, "learning_rate": 5.227290485462999e-08, "loss": 0.3844, "step": 31170 }, { "epoch": 0.955345102366066, "grad_norm": 1.2969637680688073, "learning_rate": 5.2201349488310015e-08, "loss": 0.5196, "step": 31171 }, { "epoch": 0.9553757508888071, "grad_norm": 1.3176785600064007, "learning_rate": 5.212984287391587e-08, "loss": 0.4892, "step": 31172 }, { "epoch": 0.9554063994115484, "grad_norm": 1.2673212817852213, "learning_rate": 5.205838501215254e-08, "loss": 0.5728, "step": 31173 }, { "epoch": 0.9554370479342895, "grad_norm": 1.4185849010833562, "learning_rate": 5.1986975903723926e-08, "loss": 0.6164, "step": 31174 }, { "epoch": 0.9554676964570308, "grad_norm": 1.3642686454300585, "learning_rate": 5.191561554933333e-08, "loss": 0.6448, "step": 31175 }, { "epoch": 0.9554983449797719, "grad_norm": 1.3233279897359562, "learning_rate": 5.184430394968465e-08, "loss": 0.647, "step": 31176 }, { "epoch": 0.9555289935025132, "grad_norm": 1.3092488462900211, "learning_rate": 5.177304110547954e-08, "loss": 0.5583, "step": 31177 }, { "epoch": 0.9555596420252543, "grad_norm": 1.4641483403746383, "learning_rate": 5.170182701742133e-08, "loss": 0.6243, "step": 31178 }, { "epoch": 0.9555902905479956, "grad_norm": 1.4225861552882662, "learning_rate": 5.163066168621056e-08, "loss": 0.5804, "step": 31179 }, { "epoch": 0.9556209390707368, "grad_norm": 1.321052325515, "learning_rate": 5.1559545112548904e-08, "loss": 0.5606, "step": 31180 }, { "epoch": 0.955651587593478, "grad_norm": 1.3011689365085803, "learning_rate": 5.1488477297137465e-08, "loss": 0.6262, "step": 31181 }, { "epoch": 0.9556822361162192, "grad_norm": 1.5618678348395572, "learning_rate": 5.141745824067623e-08, "loss": 0.6381, "step": 31182 }, { "epoch": 0.9557128846389604, "grad_norm": 1.4467239725906393, "learning_rate": 5.1346487943865206e-08, "loss": 0.5369, "step": 31183 }, { "epoch": 0.9557435331617016, "grad_norm": 1.3062276443707954, "learning_rate": 5.127556640740272e-08, "loss": 0.5617, "step": 31184 }, { "epoch": 0.9557741816844428, "grad_norm": 1.3112826900908687, "learning_rate": 5.1204693631988764e-08, "loss": 0.5826, "step": 31185 }, { "epoch": 0.955804830207184, "grad_norm": 1.303089323378319, "learning_rate": 5.113386961832112e-08, "loss": 0.5524, "step": 31186 }, { "epoch": 0.9558354787299252, "grad_norm": 0.45312383017618457, "learning_rate": 5.106309436709756e-08, "loss": 0.4004, "step": 31187 }, { "epoch": 0.9558661272526664, "grad_norm": 1.1760460496258556, "learning_rate": 5.099236787901529e-08, "loss": 0.4179, "step": 31188 }, { "epoch": 0.9558967757754077, "grad_norm": 0.42024456094851576, "learning_rate": 5.092169015477211e-08, "loss": 0.378, "step": 31189 }, { "epoch": 0.9559274242981488, "grad_norm": 1.5055952887274155, "learning_rate": 5.0851061195063e-08, "loss": 0.5339, "step": 31190 }, { "epoch": 0.9559580728208901, "grad_norm": 1.2932307045555325, "learning_rate": 5.0780481000585194e-08, "loss": 0.6127, "step": 31191 }, { "epoch": 0.9559887213436312, "grad_norm": 1.4253519900689315, "learning_rate": 5.070994957203368e-08, "loss": 0.6182, "step": 31192 }, { "epoch": 0.9560193698663725, "grad_norm": 1.1461174933813483, "learning_rate": 5.0639466910102905e-08, "loss": 0.4812, "step": 31193 }, { "epoch": 0.9560500183891136, "grad_norm": 1.2850045697859616, "learning_rate": 5.0569033015488436e-08, "loss": 0.5157, "step": 31194 }, { "epoch": 0.9560806669118549, "grad_norm": 0.4662180755838689, "learning_rate": 5.0498647888883036e-08, "loss": 0.3969, "step": 31195 }, { "epoch": 0.956111315434596, "grad_norm": 1.2690257207085602, "learning_rate": 5.0428311530981155e-08, "loss": 0.5691, "step": 31196 }, { "epoch": 0.9561419639573373, "grad_norm": 1.4779124153494252, "learning_rate": 5.0358023942476134e-08, "loss": 0.5417, "step": 31197 }, { "epoch": 0.9561726124800785, "grad_norm": 1.2635768174236248, "learning_rate": 5.0287785124059074e-08, "loss": 0.7188, "step": 31198 }, { "epoch": 0.9562032610028197, "grad_norm": 1.480315862391548, "learning_rate": 5.021759507642277e-08, "loss": 0.5801, "step": 31199 }, { "epoch": 0.9562339095255609, "grad_norm": 1.2920367159462558, "learning_rate": 5.014745380025998e-08, "loss": 0.5354, "step": 31200 }, { "epoch": 0.9562645580483021, "grad_norm": 1.4572041507046458, "learning_rate": 5.007736129625962e-08, "loss": 0.6613, "step": 31201 }, { "epoch": 0.9562952065710433, "grad_norm": 1.3610987246418382, "learning_rate": 5.00073175651139e-08, "loss": 0.5817, "step": 31202 }, { "epoch": 0.9563258550937844, "grad_norm": 0.4486649483245562, "learning_rate": 4.993732260751283e-08, "loss": 0.4087, "step": 31203 }, { "epoch": 0.9563565036165257, "grad_norm": 1.3313847819785896, "learning_rate": 4.986737642414585e-08, "loss": 0.5386, "step": 31204 }, { "epoch": 0.9563871521392668, "grad_norm": 1.3995367646167292, "learning_rate": 4.979747901570242e-08, "loss": 0.6073, "step": 31205 }, { "epoch": 0.9564178006620081, "grad_norm": 1.2903526858375343, "learning_rate": 4.9727630382870315e-08, "loss": 0.6378, "step": 31206 }, { "epoch": 0.9564484491847492, "grad_norm": 0.4300995644476437, "learning_rate": 4.9657830526338993e-08, "loss": 0.3901, "step": 31207 }, { "epoch": 0.9564790977074905, "grad_norm": 1.2029373809916968, "learning_rate": 4.958807944679567e-08, "loss": 0.5475, "step": 31208 }, { "epoch": 0.9565097462302317, "grad_norm": 1.308359436432374, "learning_rate": 4.9518377144927024e-08, "loss": 0.5949, "step": 31209 }, { "epoch": 0.9565403947529729, "grad_norm": 1.447869357478997, "learning_rate": 4.9448723621420834e-08, "loss": 0.6063, "step": 31210 }, { "epoch": 0.9565710432757141, "grad_norm": 1.3604409974505605, "learning_rate": 4.9379118876963227e-08, "loss": 0.6216, "step": 31211 }, { "epoch": 0.9566016917984553, "grad_norm": 1.6012260474620101, "learning_rate": 4.9309562912239207e-08, "loss": 0.6011, "step": 31212 }, { "epoch": 0.9566323403211965, "grad_norm": 0.4333018344151339, "learning_rate": 4.924005572793544e-08, "loss": 0.3933, "step": 31213 }, { "epoch": 0.9566629888439377, "grad_norm": 1.5667867142538512, "learning_rate": 4.917059732473528e-08, "loss": 0.618, "step": 31214 }, { "epoch": 0.9566936373666789, "grad_norm": 1.2721773918430337, "learning_rate": 4.9101187703324835e-08, "loss": 0.6039, "step": 31215 }, { "epoch": 0.9567242858894202, "grad_norm": 1.2245662696211763, "learning_rate": 4.90318268643869e-08, "loss": 0.5811, "step": 31216 }, { "epoch": 0.9567549344121613, "grad_norm": 1.3925828385551697, "learning_rate": 4.896251480860481e-08, "loss": 0.5911, "step": 31217 }, { "epoch": 0.9567855829349026, "grad_norm": 1.333517592416813, "learning_rate": 4.889325153666247e-08, "loss": 0.5723, "step": 31218 }, { "epoch": 0.9568162314576437, "grad_norm": 1.3394263841258982, "learning_rate": 4.882403704924099e-08, "loss": 0.5643, "step": 31219 }, { "epoch": 0.956846879980385, "grad_norm": 1.162596411258314, "learning_rate": 4.8754871347023725e-08, "loss": 0.5508, "step": 31220 }, { "epoch": 0.9568775285031261, "grad_norm": 1.393798668340838, "learning_rate": 4.868575443069068e-08, "loss": 0.5596, "step": 31221 }, { "epoch": 0.9569081770258674, "grad_norm": 1.1763086873384245, "learning_rate": 4.8616686300924644e-08, "loss": 0.4962, "step": 31222 }, { "epoch": 0.9569388255486085, "grad_norm": 1.2168443294842453, "learning_rate": 4.854766695840507e-08, "loss": 0.5262, "step": 31223 }, { "epoch": 0.9569694740713498, "grad_norm": 1.2307228637182388, "learning_rate": 4.847869640381142e-08, "loss": 0.624, "step": 31224 }, { "epoch": 0.957000122594091, "grad_norm": 1.5806205604872785, "learning_rate": 4.840977463782481e-08, "loss": 0.64, "step": 31225 }, { "epoch": 0.9570307711168322, "grad_norm": 1.3161753284535094, "learning_rate": 4.8340901661123596e-08, "loss": 0.6429, "step": 31226 }, { "epoch": 0.9570614196395734, "grad_norm": 1.4008245590318933, "learning_rate": 4.827207747438667e-08, "loss": 0.6468, "step": 31227 }, { "epoch": 0.9570920681623146, "grad_norm": 1.9380682150238209, "learning_rate": 4.820330207829127e-08, "loss": 0.5438, "step": 31228 }, { "epoch": 0.9571227166850558, "grad_norm": 0.45492715408087436, "learning_rate": 4.81345754735163e-08, "loss": 0.3951, "step": 31229 }, { "epoch": 0.957153365207797, "grad_norm": 1.3363197397188538, "learning_rate": 4.806589766073788e-08, "loss": 0.5254, "step": 31230 }, { "epoch": 0.9571840137305382, "grad_norm": 1.2968186340057535, "learning_rate": 4.7997268640633255e-08, "loss": 0.6444, "step": 31231 }, { "epoch": 0.9572146622532794, "grad_norm": 1.269773983865588, "learning_rate": 4.792868841387854e-08, "loss": 0.5198, "step": 31232 }, { "epoch": 0.9572453107760206, "grad_norm": 0.45315756121638084, "learning_rate": 4.786015698114988e-08, "loss": 0.3975, "step": 31233 }, { "epoch": 0.9572759592987617, "grad_norm": 1.302615949332056, "learning_rate": 4.779167434312171e-08, "loss": 0.4969, "step": 31234 }, { "epoch": 0.957306607821503, "grad_norm": 1.4418165111957826, "learning_rate": 4.7723240500469616e-08, "loss": 0.6806, "step": 31235 }, { "epoch": 0.9573372563442442, "grad_norm": 0.44661062475914015, "learning_rate": 4.7654855453866944e-08, "loss": 0.3808, "step": 31236 }, { "epoch": 0.9573679048669854, "grad_norm": 1.3799833779933792, "learning_rate": 4.758651920398871e-08, "loss": 0.6531, "step": 31237 }, { "epoch": 0.9573985533897266, "grad_norm": 1.5962984467575982, "learning_rate": 4.7518231751507715e-08, "loss": 0.6843, "step": 31238 }, { "epoch": 0.9574292019124678, "grad_norm": 1.4569332240638935, "learning_rate": 4.744999309709619e-08, "loss": 0.6522, "step": 31239 }, { "epoch": 0.957459850435209, "grad_norm": 1.250387718950311, "learning_rate": 4.738180324142749e-08, "loss": 0.4548, "step": 31240 }, { "epoch": 0.9574904989579502, "grad_norm": 1.5619683333285563, "learning_rate": 4.7313662185172745e-08, "loss": 0.6452, "step": 31241 }, { "epoch": 0.9575211474806914, "grad_norm": 1.2028076050808116, "learning_rate": 4.7245569929003644e-08, "loss": 0.4866, "step": 31242 }, { "epoch": 0.9575517960034327, "grad_norm": 1.3906874846497754, "learning_rate": 4.717752647359131e-08, "loss": 0.6355, "step": 31243 }, { "epoch": 0.9575824445261738, "grad_norm": 1.4440675776287364, "learning_rate": 4.710953181960576e-08, "loss": 0.5089, "step": 31244 }, { "epoch": 0.9576130930489151, "grad_norm": 0.4325338003721393, "learning_rate": 4.704158596771813e-08, "loss": 0.394, "step": 31245 }, { "epoch": 0.9576437415716562, "grad_norm": 1.309958627781482, "learning_rate": 4.6973688918596214e-08, "loss": 0.5092, "step": 31246 }, { "epoch": 0.9576743900943975, "grad_norm": 1.584980258899664, "learning_rate": 4.6905840672910044e-08, "loss": 0.6147, "step": 31247 }, { "epoch": 0.9577050386171386, "grad_norm": 1.4798390632644944, "learning_rate": 4.6838041231327956e-08, "loss": 0.6297, "step": 31248 }, { "epoch": 0.9577356871398799, "grad_norm": 0.48667775838059035, "learning_rate": 4.677029059451776e-08, "loss": 0.4089, "step": 31249 }, { "epoch": 0.957766335662621, "grad_norm": 1.3874931531445134, "learning_rate": 4.670258876314781e-08, "loss": 0.6226, "step": 31250 }, { "epoch": 0.9577969841853623, "grad_norm": 1.5131816423823203, "learning_rate": 4.663493573788369e-08, "loss": 0.661, "step": 31251 }, { "epoch": 0.9578276327081034, "grad_norm": 1.3736310526028888, "learning_rate": 4.6567331519393747e-08, "loss": 0.5742, "step": 31252 }, { "epoch": 0.9578582812308447, "grad_norm": 1.2055044479427695, "learning_rate": 4.6499776108343e-08, "loss": 0.5029, "step": 31253 }, { "epoch": 0.9578889297535859, "grad_norm": 1.4879917662424373, "learning_rate": 4.643226950539703e-08, "loss": 0.5981, "step": 31254 }, { "epoch": 0.9579195782763271, "grad_norm": 1.408990091989303, "learning_rate": 4.6364811711221426e-08, "loss": 0.6389, "step": 31255 }, { "epoch": 0.9579502267990683, "grad_norm": 1.3273015033481466, "learning_rate": 4.6297402726481197e-08, "loss": 0.6179, "step": 31256 }, { "epoch": 0.9579808753218095, "grad_norm": 1.3638629393775408, "learning_rate": 4.623004255183971e-08, "loss": 0.5636, "step": 31257 }, { "epoch": 0.9580115238445507, "grad_norm": 1.3990214881959364, "learning_rate": 4.616273118796144e-08, "loss": 0.6279, "step": 31258 }, { "epoch": 0.9580421723672919, "grad_norm": 1.0673335955748662, "learning_rate": 4.609546863550918e-08, "loss": 0.4981, "step": 31259 }, { "epoch": 0.9580728208900331, "grad_norm": 1.3995126680629255, "learning_rate": 4.602825489514573e-08, "loss": 0.5617, "step": 31260 }, { "epoch": 0.9581034694127744, "grad_norm": 1.2846300626224751, "learning_rate": 4.5961089967533346e-08, "loss": 0.5975, "step": 31261 }, { "epoch": 0.9581341179355155, "grad_norm": 1.3375520506334782, "learning_rate": 4.589397385333427e-08, "loss": 0.5699, "step": 31262 }, { "epoch": 0.9581647664582568, "grad_norm": 1.3415440492703277, "learning_rate": 4.582690655320854e-08, "loss": 0.6383, "step": 31263 }, { "epoch": 0.9581954149809979, "grad_norm": 0.4535085597982933, "learning_rate": 4.575988806781895e-08, "loss": 0.4004, "step": 31264 }, { "epoch": 0.9582260635037391, "grad_norm": 1.4235891001464434, "learning_rate": 4.569291839782386e-08, "loss": 0.4646, "step": 31265 }, { "epoch": 0.9582567120264803, "grad_norm": 1.3679237313245776, "learning_rate": 4.562599754388441e-08, "loss": 0.5541, "step": 31266 }, { "epoch": 0.9582873605492215, "grad_norm": 1.3620836815035313, "learning_rate": 4.5559125506660084e-08, "loss": 0.5972, "step": 31267 }, { "epoch": 0.9583180090719627, "grad_norm": 1.3686727522392448, "learning_rate": 4.5492302286808676e-08, "loss": 0.6945, "step": 31268 }, { "epoch": 0.9583486575947039, "grad_norm": 1.256252708187268, "learning_rate": 4.542552788498966e-08, "loss": 0.6252, "step": 31269 }, { "epoch": 0.9583793061174452, "grad_norm": 1.2320238063549789, "learning_rate": 4.535880230186085e-08, "loss": 0.5318, "step": 31270 }, { "epoch": 0.9584099546401863, "grad_norm": 1.3414629668883118, "learning_rate": 4.5292125538078933e-08, "loss": 0.6617, "step": 31271 }, { "epoch": 0.9584406031629276, "grad_norm": 1.3547313516235449, "learning_rate": 4.522549759430173e-08, "loss": 0.58, "step": 31272 }, { "epoch": 0.9584712516856687, "grad_norm": 1.4025880402786535, "learning_rate": 4.5158918471185365e-08, "loss": 0.5716, "step": 31273 }, { "epoch": 0.95850190020841, "grad_norm": 0.414050280118053, "learning_rate": 4.5092388169385436e-08, "loss": 0.3863, "step": 31274 }, { "epoch": 0.9585325487311511, "grad_norm": 1.3793030697266218, "learning_rate": 4.502590668955864e-08, "loss": 0.5562, "step": 31275 }, { "epoch": 0.9585631972538924, "grad_norm": 1.254589661874499, "learning_rate": 4.495947403235889e-08, "loss": 0.558, "step": 31276 }, { "epoch": 0.9585938457766335, "grad_norm": 1.4348513041210393, "learning_rate": 4.489309019844124e-08, "loss": 0.5902, "step": 31277 }, { "epoch": 0.9586244942993748, "grad_norm": 1.506013298201286, "learning_rate": 4.482675518846069e-08, "loss": 0.6638, "step": 31278 }, { "epoch": 0.958655142822116, "grad_norm": 0.4439096669562051, "learning_rate": 4.4760469003068965e-08, "loss": 0.395, "step": 31279 }, { "epoch": 0.9586857913448572, "grad_norm": 0.4662000833588732, "learning_rate": 4.469423164292053e-08, "loss": 0.4008, "step": 31280 }, { "epoch": 0.9587164398675984, "grad_norm": 0.43426461255330084, "learning_rate": 4.462804310866764e-08, "loss": 0.4026, "step": 31281 }, { "epoch": 0.9587470883903396, "grad_norm": 1.431621700895704, "learning_rate": 4.456190340096256e-08, "loss": 0.6409, "step": 31282 }, { "epoch": 0.9587777369130808, "grad_norm": 1.4118062812787526, "learning_rate": 4.449581252045698e-08, "loss": 0.6358, "step": 31283 }, { "epoch": 0.958808385435822, "grad_norm": 1.3415289977256457, "learning_rate": 4.442977046780206e-08, "loss": 0.6135, "step": 31284 }, { "epoch": 0.9588390339585632, "grad_norm": 1.2555693948352775, "learning_rate": 4.4363777243648377e-08, "loss": 0.5602, "step": 31285 }, { "epoch": 0.9588696824813044, "grad_norm": 0.4415479391711258, "learning_rate": 4.4297832848647084e-08, "loss": 0.3779, "step": 31286 }, { "epoch": 0.9589003310040456, "grad_norm": 1.3561260768108803, "learning_rate": 4.4231937283446544e-08, "loss": 0.5894, "step": 31287 }, { "epoch": 0.9589309795267869, "grad_norm": 1.4010050604177477, "learning_rate": 4.416609054869681e-08, "loss": 0.5736, "step": 31288 }, { "epoch": 0.958961628049528, "grad_norm": 1.3747426859827145, "learning_rate": 4.410029264504678e-08, "loss": 0.5605, "step": 31289 }, { "epoch": 0.9589922765722693, "grad_norm": 1.3969576397800905, "learning_rate": 4.4034543573144295e-08, "loss": 0.5188, "step": 31290 }, { "epoch": 0.9590229250950104, "grad_norm": 1.3693526552298938, "learning_rate": 4.396884333363771e-08, "loss": 0.6027, "step": 31291 }, { "epoch": 0.9590535736177517, "grad_norm": 1.4694363448692775, "learning_rate": 4.3903191927173736e-08, "loss": 0.6488, "step": 31292 }, { "epoch": 0.9590842221404928, "grad_norm": 1.2593078339776345, "learning_rate": 4.383758935440019e-08, "loss": 0.637, "step": 31293 }, { "epoch": 0.9591148706632341, "grad_norm": 1.4775322283040282, "learning_rate": 4.377203561596266e-08, "loss": 0.6158, "step": 31294 }, { "epoch": 0.9591455191859752, "grad_norm": 1.4112375482558677, "learning_rate": 4.3706530712507854e-08, "loss": 0.5979, "step": 31295 }, { "epoch": 0.9591761677087164, "grad_norm": 1.3230651948108947, "learning_rate": 4.364107464468026e-08, "loss": 0.563, "step": 31296 }, { "epoch": 0.9592068162314576, "grad_norm": 1.5379204599088032, "learning_rate": 4.3575667413125466e-08, "loss": 0.66, "step": 31297 }, { "epoch": 0.9592374647541988, "grad_norm": 1.2777961903206334, "learning_rate": 4.351030901848741e-08, "loss": 0.5975, "step": 31298 }, { "epoch": 0.9592681132769401, "grad_norm": 1.3102355827044996, "learning_rate": 4.344499946141056e-08, "loss": 0.5687, "step": 31299 }, { "epoch": 0.9592987617996812, "grad_norm": 1.2851980787407664, "learning_rate": 4.337973874253887e-08, "loss": 0.5821, "step": 31300 }, { "epoch": 0.9593294103224225, "grad_norm": 0.4502169854455125, "learning_rate": 4.331452686251458e-08, "loss": 0.4042, "step": 31301 }, { "epoch": 0.9593600588451636, "grad_norm": 1.2887861408076295, "learning_rate": 4.324936382198053e-08, "loss": 0.4955, "step": 31302 }, { "epoch": 0.9593907073679049, "grad_norm": 1.4870184836931288, "learning_rate": 4.318424962157786e-08, "loss": 0.5355, "step": 31303 }, { "epoch": 0.959421355890646, "grad_norm": 0.43526718137907794, "learning_rate": 4.3119184261949945e-08, "loss": 0.3845, "step": 31304 }, { "epoch": 0.9594520044133873, "grad_norm": 1.6139761717317607, "learning_rate": 4.3054167743737385e-08, "loss": 0.6168, "step": 31305 }, { "epoch": 0.9594826529361284, "grad_norm": 1.356482376056234, "learning_rate": 4.298920006757967e-08, "loss": 0.6639, "step": 31306 }, { "epoch": 0.9595133014588697, "grad_norm": 1.2431045148434932, "learning_rate": 4.2924281234117407e-08, "loss": 0.5998, "step": 31307 }, { "epoch": 0.9595439499816109, "grad_norm": 1.2804204539849326, "learning_rate": 4.285941124399118e-08, "loss": 0.567, "step": 31308 }, { "epoch": 0.9595745985043521, "grad_norm": 1.4691280178048156, "learning_rate": 4.2794590097839375e-08, "loss": 0.6626, "step": 31309 }, { "epoch": 0.9596052470270933, "grad_norm": 0.43321123646478454, "learning_rate": 4.272981779630036e-08, "loss": 0.3751, "step": 31310 }, { "epoch": 0.9596358955498345, "grad_norm": 1.3218899536536726, "learning_rate": 4.266509434001309e-08, "loss": 0.5765, "step": 31311 }, { "epoch": 0.9596665440725757, "grad_norm": 0.44211681175116485, "learning_rate": 4.260041972961537e-08, "loss": 0.3854, "step": 31312 }, { "epoch": 0.9596971925953169, "grad_norm": 1.4238399953044663, "learning_rate": 4.253579396574392e-08, "loss": 0.6481, "step": 31313 }, { "epoch": 0.9597278411180581, "grad_norm": 1.4398814990309619, "learning_rate": 4.247121704903545e-08, "loss": 0.7109, "step": 31314 }, { "epoch": 0.9597584896407994, "grad_norm": 1.3491860732039191, "learning_rate": 4.2406688980126675e-08, "loss": 0.6409, "step": 31315 }, { "epoch": 0.9597891381635405, "grad_norm": 1.4477568762903754, "learning_rate": 4.234220975965375e-08, "loss": 0.6215, "step": 31316 }, { "epoch": 0.9598197866862818, "grad_norm": 1.286562607795384, "learning_rate": 4.227777938825117e-08, "loss": 0.5052, "step": 31317 }, { "epoch": 0.9598504352090229, "grad_norm": 1.3085087935676651, "learning_rate": 4.221339786655343e-08, "loss": 0.5663, "step": 31318 }, { "epoch": 0.9598810837317642, "grad_norm": 1.5312312301925624, "learning_rate": 4.214906519519668e-08, "loss": 0.6002, "step": 31319 }, { "epoch": 0.9599117322545053, "grad_norm": 1.2417027283996738, "learning_rate": 4.20847813748132e-08, "loss": 0.615, "step": 31320 }, { "epoch": 0.9599423807772466, "grad_norm": 1.2943078574931661, "learning_rate": 4.2020546406036364e-08, "loss": 0.6573, "step": 31321 }, { "epoch": 0.9599730292999877, "grad_norm": 1.6452725449718937, "learning_rate": 4.195636028950012e-08, "loss": 0.6798, "step": 31322 }, { "epoch": 0.960003677822729, "grad_norm": 1.505968563436229, "learning_rate": 4.189222302583673e-08, "loss": 0.5584, "step": 31323 }, { "epoch": 0.9600343263454701, "grad_norm": 1.4534990784812896, "learning_rate": 4.182813461567792e-08, "loss": 0.5097, "step": 31324 }, { "epoch": 0.9600649748682114, "grad_norm": 1.3772614381763253, "learning_rate": 4.176409505965484e-08, "loss": 0.6708, "step": 31325 }, { "epoch": 0.9600956233909526, "grad_norm": 0.4525802684625338, "learning_rate": 4.1700104358398106e-08, "loss": 0.3885, "step": 31326 }, { "epoch": 0.9601262719136937, "grad_norm": 1.3604206928852272, "learning_rate": 4.163616251253999e-08, "loss": 0.4825, "step": 31327 }, { "epoch": 0.960156920436435, "grad_norm": 1.372022650195288, "learning_rate": 4.1572269522708875e-08, "loss": 0.5542, "step": 31328 }, { "epoch": 0.9601875689591761, "grad_norm": 1.4301098309250992, "learning_rate": 4.150842538953481e-08, "loss": 0.5272, "step": 31329 }, { "epoch": 0.9602182174819174, "grad_norm": 1.39048339293208, "learning_rate": 4.144463011364675e-08, "loss": 0.597, "step": 31330 }, { "epoch": 0.9602488660046585, "grad_norm": 1.2711108214942515, "learning_rate": 4.138088369567361e-08, "loss": 0.6004, "step": 31331 }, { "epoch": 0.9602795145273998, "grad_norm": 1.4318251071920618, "learning_rate": 4.1317186136243805e-08, "loss": 0.6853, "step": 31332 }, { "epoch": 0.9603101630501409, "grad_norm": 1.1991292850357589, "learning_rate": 4.125353743598348e-08, "loss": 0.5002, "step": 31333 }, { "epoch": 0.9603408115728822, "grad_norm": 1.2960513258101412, "learning_rate": 4.118993759552159e-08, "loss": 0.5147, "step": 31334 }, { "epoch": 0.9603714600956234, "grad_norm": 1.192952692790587, "learning_rate": 4.112638661548429e-08, "loss": 0.5943, "step": 31335 }, { "epoch": 0.9604021086183646, "grad_norm": 0.450603883010294, "learning_rate": 4.1062884496496645e-08, "loss": 0.4019, "step": 31336 }, { "epoch": 0.9604327571411058, "grad_norm": 1.4163223448825302, "learning_rate": 4.099943123918593e-08, "loss": 0.5324, "step": 31337 }, { "epoch": 0.960463405663847, "grad_norm": 1.2730089166484788, "learning_rate": 4.093602684417608e-08, "loss": 0.4672, "step": 31338 }, { "epoch": 0.9604940541865882, "grad_norm": 1.4190452979935781, "learning_rate": 4.087267131209271e-08, "loss": 0.5558, "step": 31339 }, { "epoch": 0.9605247027093294, "grad_norm": 1.4335168533139928, "learning_rate": 4.080936464355978e-08, "loss": 0.6155, "step": 31340 }, { "epoch": 0.9605553512320706, "grad_norm": 1.625226805145453, "learning_rate": 4.074610683920066e-08, "loss": 0.6466, "step": 31341 }, { "epoch": 0.9605859997548118, "grad_norm": 1.2801625669260406, "learning_rate": 4.068289789963931e-08, "loss": 0.5229, "step": 31342 }, { "epoch": 0.960616648277553, "grad_norm": 1.3006538292878944, "learning_rate": 4.061973782549855e-08, "loss": 0.5176, "step": 31343 }, { "epoch": 0.9606472968002943, "grad_norm": 0.43100333157001336, "learning_rate": 4.0556626617399566e-08, "loss": 0.3745, "step": 31344 }, { "epoch": 0.9606779453230354, "grad_norm": 1.4504224851319523, "learning_rate": 4.0493564275965735e-08, "loss": 0.5201, "step": 31345 }, { "epoch": 0.9607085938457767, "grad_norm": 1.3860042894903069, "learning_rate": 4.043055080181824e-08, "loss": 0.6689, "step": 31346 }, { "epoch": 0.9607392423685178, "grad_norm": 1.4847286893701361, "learning_rate": 4.036758619557657e-08, "loss": 0.6475, "step": 31347 }, { "epoch": 0.9607698908912591, "grad_norm": 1.3135920016983018, "learning_rate": 4.030467045786246e-08, "loss": 0.5741, "step": 31348 }, { "epoch": 0.9608005394140002, "grad_norm": 1.3526788144632085, "learning_rate": 4.024180358929486e-08, "loss": 0.5143, "step": 31349 }, { "epoch": 0.9608311879367415, "grad_norm": 0.4567922002808782, "learning_rate": 4.017898559049438e-08, "loss": 0.3733, "step": 31350 }, { "epoch": 0.9608618364594826, "grad_norm": 1.285355056679997, "learning_rate": 4.011621646207942e-08, "loss": 0.4669, "step": 31351 }, { "epoch": 0.9608924849822239, "grad_norm": 1.2290652475362067, "learning_rate": 4.005349620466836e-08, "loss": 0.4955, "step": 31352 }, { "epoch": 0.960923133504965, "grad_norm": 1.3255322309022488, "learning_rate": 3.999082481887906e-08, "loss": 0.5626, "step": 31353 }, { "epoch": 0.9609537820277063, "grad_norm": 1.4846524510186214, "learning_rate": 3.992820230532934e-08, "loss": 0.5951, "step": 31354 }, { "epoch": 0.9609844305504475, "grad_norm": 1.4266766816967986, "learning_rate": 3.9865628664635945e-08, "loss": 0.6091, "step": 31355 }, { "epoch": 0.9610150790731887, "grad_norm": 1.3806947038822022, "learning_rate": 3.980310389741615e-08, "loss": 0.5702, "step": 31356 }, { "epoch": 0.9610457275959299, "grad_norm": 1.3057855447717448, "learning_rate": 3.974062800428502e-08, "loss": 0.5636, "step": 31357 }, { "epoch": 0.961076376118671, "grad_norm": 1.2798858118341876, "learning_rate": 3.9678200985858726e-08, "loss": 0.5143, "step": 31358 }, { "epoch": 0.9611070246414123, "grad_norm": 0.4215265388105358, "learning_rate": 3.961582284275234e-08, "loss": 0.4036, "step": 31359 }, { "epoch": 0.9611376731641534, "grad_norm": 1.3275468057026947, "learning_rate": 3.9553493575579804e-08, "loss": 0.6339, "step": 31360 }, { "epoch": 0.9611683216868947, "grad_norm": 1.6020203443749033, "learning_rate": 3.949121318495674e-08, "loss": 0.6591, "step": 31361 }, { "epoch": 0.9611989702096359, "grad_norm": 1.507888273686677, "learning_rate": 3.9428981671495446e-08, "loss": 0.6091, "step": 31362 }, { "epoch": 0.9612296187323771, "grad_norm": 1.9348899466665985, "learning_rate": 3.936679903580986e-08, "loss": 0.5961, "step": 31363 }, { "epoch": 0.9612602672551183, "grad_norm": 1.209328141237103, "learning_rate": 3.9304665278512846e-08, "loss": 0.5124, "step": 31364 }, { "epoch": 0.9612909157778595, "grad_norm": 1.3112903369471272, "learning_rate": 3.924258040021556e-08, "loss": 0.5226, "step": 31365 }, { "epoch": 0.9613215643006007, "grad_norm": 1.3183675834462982, "learning_rate": 3.9180544401530296e-08, "loss": 0.5112, "step": 31366 }, { "epoch": 0.9613522128233419, "grad_norm": 1.2648825438708755, "learning_rate": 3.911855728306879e-08, "loss": 0.5547, "step": 31367 }, { "epoch": 0.9613828613460831, "grad_norm": 1.2430195580459877, "learning_rate": 3.905661904544167e-08, "loss": 0.6051, "step": 31368 }, { "epoch": 0.9614135098688243, "grad_norm": 0.4335474817184177, "learning_rate": 3.899472968925844e-08, "loss": 0.3926, "step": 31369 }, { "epoch": 0.9614441583915655, "grad_norm": 1.4299033267014583, "learning_rate": 3.893288921512972e-08, "loss": 0.607, "step": 31370 }, { "epoch": 0.9614748069143068, "grad_norm": 1.5688161242717085, "learning_rate": 3.8871097623664475e-08, "loss": 0.5546, "step": 31371 }, { "epoch": 0.9615054554370479, "grad_norm": 1.375170780415768, "learning_rate": 3.880935491547222e-08, "loss": 0.5552, "step": 31372 }, { "epoch": 0.9615361039597892, "grad_norm": 1.5016855853056372, "learning_rate": 3.874766109115968e-08, "loss": 0.6054, "step": 31373 }, { "epoch": 0.9615667524825303, "grad_norm": 1.3398978537910098, "learning_rate": 3.8686016151336384e-08, "loss": 0.5893, "step": 31374 }, { "epoch": 0.9615974010052716, "grad_norm": 1.320635045590806, "learning_rate": 3.8624420096609604e-08, "loss": 0.6673, "step": 31375 }, { "epoch": 0.9616280495280127, "grad_norm": 1.2360713173228792, "learning_rate": 3.856287292758554e-08, "loss": 0.6012, "step": 31376 }, { "epoch": 0.961658698050754, "grad_norm": 1.3728616298988405, "learning_rate": 3.8501374644870914e-08, "loss": 0.553, "step": 31377 }, { "epoch": 0.9616893465734951, "grad_norm": 1.2150291191731621, "learning_rate": 3.8439925249071366e-08, "loss": 0.6141, "step": 31378 }, { "epoch": 0.9617199950962364, "grad_norm": 0.4456321164246542, "learning_rate": 3.837852474079307e-08, "loss": 0.3951, "step": 31379 }, { "epoch": 0.9617506436189776, "grad_norm": 1.503079155228713, "learning_rate": 3.831717312064054e-08, "loss": 0.5725, "step": 31380 }, { "epoch": 0.9617812921417188, "grad_norm": 1.3401889323864746, "learning_rate": 3.8255870389218297e-08, "loss": 0.5983, "step": 31381 }, { "epoch": 0.96181194066446, "grad_norm": 0.4495054610230037, "learning_rate": 3.8194616547130856e-08, "loss": 0.3837, "step": 31382 }, { "epoch": 0.9618425891872012, "grad_norm": 1.388452673532616, "learning_rate": 3.813341159498107e-08, "loss": 0.6729, "step": 31383 }, { "epoch": 0.9618732377099424, "grad_norm": 1.4770383365968474, "learning_rate": 3.807225553337235e-08, "loss": 0.5964, "step": 31384 }, { "epoch": 0.9619038862326836, "grad_norm": 1.3638393065763632, "learning_rate": 3.801114836290754e-08, "loss": 0.591, "step": 31385 }, { "epoch": 0.9619345347554248, "grad_norm": 1.3752590057449794, "learning_rate": 3.795009008418837e-08, "loss": 0.6368, "step": 31386 }, { "epoch": 0.961965183278166, "grad_norm": 1.503858067900289, "learning_rate": 3.788908069781605e-08, "loss": 0.5539, "step": 31387 }, { "epoch": 0.9619958318009072, "grad_norm": 1.4295388835867684, "learning_rate": 3.782812020439286e-08, "loss": 0.715, "step": 31388 }, { "epoch": 0.9620264803236483, "grad_norm": 1.479663247806509, "learning_rate": 3.7767208604518325e-08, "loss": 0.5731, "step": 31389 }, { "epoch": 0.9620571288463896, "grad_norm": 1.551780587677745, "learning_rate": 3.770634589879363e-08, "loss": 0.6187, "step": 31390 }, { "epoch": 0.9620877773691308, "grad_norm": 1.5074483414081634, "learning_rate": 3.764553208781774e-08, "loss": 0.5914, "step": 31391 }, { "epoch": 0.962118425891872, "grad_norm": 1.453087034204739, "learning_rate": 3.7584767172190175e-08, "loss": 0.5554, "step": 31392 }, { "epoch": 0.9621490744146132, "grad_norm": 1.3718658619839172, "learning_rate": 3.7524051152509346e-08, "loss": 0.5756, "step": 31393 }, { "epoch": 0.9621797229373544, "grad_norm": 0.445739510615307, "learning_rate": 3.746338402937366e-08, "loss": 0.3953, "step": 31394 }, { "epoch": 0.9622103714600956, "grad_norm": 1.3090996743660448, "learning_rate": 3.740276580338098e-08, "loss": 0.6371, "step": 31395 }, { "epoch": 0.9622410199828368, "grad_norm": 1.5142049452753799, "learning_rate": 3.7342196475129156e-08, "loss": 0.7018, "step": 31396 }, { "epoch": 0.962271668505578, "grad_norm": 0.44396794430228814, "learning_rate": 3.728167604521382e-08, "loss": 0.3875, "step": 31397 }, { "epoch": 0.9623023170283193, "grad_norm": 1.2409987630296135, "learning_rate": 3.722120451423228e-08, "loss": 0.5578, "step": 31398 }, { "epoch": 0.9623329655510604, "grad_norm": 0.4511812796707533, "learning_rate": 3.7160781882780164e-08, "loss": 0.3853, "step": 31399 }, { "epoch": 0.9623636140738017, "grad_norm": 1.3286529942649596, "learning_rate": 3.710040815145199e-08, "loss": 0.6278, "step": 31400 }, { "epoch": 0.9623942625965428, "grad_norm": 0.44981924611062, "learning_rate": 3.704008332084341e-08, "loss": 0.4051, "step": 31401 }, { "epoch": 0.9624249111192841, "grad_norm": 1.3600775095673374, "learning_rate": 3.6979807391549495e-08, "loss": 0.5911, "step": 31402 }, { "epoch": 0.9624555596420252, "grad_norm": 1.3080355286908514, "learning_rate": 3.6919580364162547e-08, "loss": 0.6089, "step": 31403 }, { "epoch": 0.9624862081647665, "grad_norm": 0.42180425821647655, "learning_rate": 3.685940223927709e-08, "loss": 0.4024, "step": 31404 }, { "epoch": 0.9625168566875076, "grad_norm": 1.6034612694826462, "learning_rate": 3.6799273017485985e-08, "loss": 0.5778, "step": 31405 }, { "epoch": 0.9625475052102489, "grad_norm": 0.4386398585846606, "learning_rate": 3.673919269938153e-08, "loss": 0.3788, "step": 31406 }, { "epoch": 0.96257815373299, "grad_norm": 0.4339696987166384, "learning_rate": 3.667916128555549e-08, "loss": 0.3968, "step": 31407 }, { "epoch": 0.9626088022557313, "grad_norm": 1.3966679066404788, "learning_rate": 3.661917877659959e-08, "loss": 0.5459, "step": 31408 }, { "epoch": 0.9626394507784725, "grad_norm": 1.4533584778837896, "learning_rate": 3.655924517310505e-08, "loss": 0.6472, "step": 31409 }, { "epoch": 0.9626700993012137, "grad_norm": 0.44194215945997267, "learning_rate": 3.649936047566249e-08, "loss": 0.4031, "step": 31410 }, { "epoch": 0.9627007478239549, "grad_norm": 1.4877258978424888, "learning_rate": 3.6439524684860894e-08, "loss": 0.6169, "step": 31411 }, { "epoch": 0.9627313963466961, "grad_norm": 1.3019712555611376, "learning_rate": 3.6379737801290896e-08, "loss": 0.5754, "step": 31412 }, { "epoch": 0.9627620448694373, "grad_norm": 1.3270013196347754, "learning_rate": 3.631999982554202e-08, "loss": 0.5769, "step": 31413 }, { "epoch": 0.9627926933921785, "grad_norm": 1.2674122748283523, "learning_rate": 3.626031075820158e-08, "loss": 0.5247, "step": 31414 }, { "epoch": 0.9628233419149197, "grad_norm": 1.424676227691215, "learning_rate": 3.6200670599858544e-08, "loss": 0.5328, "step": 31415 }, { "epoch": 0.962853990437661, "grad_norm": 1.2711445542272324, "learning_rate": 3.614107935110023e-08, "loss": 0.5765, "step": 31416 }, { "epoch": 0.9628846389604021, "grad_norm": 1.371368430268346, "learning_rate": 3.608153701251393e-08, "loss": 0.6034, "step": 31417 }, { "epoch": 0.9629152874831434, "grad_norm": 1.459942576164975, "learning_rate": 3.6022043584686416e-08, "loss": 0.6202, "step": 31418 }, { "epoch": 0.9629459360058845, "grad_norm": 1.3941293792550318, "learning_rate": 3.596259906820387e-08, "loss": 0.6645, "step": 31419 }, { "epoch": 0.9629765845286257, "grad_norm": 1.364797920166494, "learning_rate": 3.590320346365139e-08, "loss": 0.6534, "step": 31420 }, { "epoch": 0.9630072330513669, "grad_norm": 0.4785514082470024, "learning_rate": 3.584385677161517e-08, "loss": 0.4054, "step": 31421 }, { "epoch": 0.9630378815741081, "grad_norm": 1.3786457028853407, "learning_rate": 3.5784558992679744e-08, "loss": 0.6038, "step": 31422 }, { "epoch": 0.9630685300968493, "grad_norm": 1.5426956180927867, "learning_rate": 3.572531012742908e-08, "loss": 0.6963, "step": 31423 }, { "epoch": 0.9630991786195905, "grad_norm": 0.4343754392502706, "learning_rate": 3.5666110176447166e-08, "loss": 0.3842, "step": 31424 }, { "epoch": 0.9631298271423318, "grad_norm": 0.4298433057257771, "learning_rate": 3.5606959140316863e-08, "loss": 0.384, "step": 31425 }, { "epoch": 0.9631604756650729, "grad_norm": 0.4340613812737264, "learning_rate": 3.5547857019621603e-08, "loss": 0.3857, "step": 31426 }, { "epoch": 0.9631911241878142, "grad_norm": 1.1683383459556305, "learning_rate": 3.548880381494369e-08, "loss": 0.5226, "step": 31427 }, { "epoch": 0.9632217727105553, "grad_norm": 1.4125790682014938, "learning_rate": 3.542979952686432e-08, "loss": 0.5699, "step": 31428 }, { "epoch": 0.9632524212332966, "grad_norm": 1.3960347422576018, "learning_rate": 3.537084415596636e-08, "loss": 0.6069, "step": 31429 }, { "epoch": 0.9632830697560377, "grad_norm": 1.4637144795293044, "learning_rate": 3.531193770282826e-08, "loss": 0.6818, "step": 31430 }, { "epoch": 0.963313718278779, "grad_norm": 0.4398987067560353, "learning_rate": 3.5253080168033414e-08, "loss": 0.383, "step": 31431 }, { "epoch": 0.9633443668015201, "grad_norm": 1.2733371837779706, "learning_rate": 3.519427155215971e-08, "loss": 0.5738, "step": 31432 }, { "epoch": 0.9633750153242614, "grad_norm": 1.3566663370993155, "learning_rate": 3.513551185578667e-08, "loss": 0.6775, "step": 31433 }, { "epoch": 0.9634056638470025, "grad_norm": 1.5161740864785471, "learning_rate": 3.5076801079493847e-08, "loss": 0.6429, "step": 31434 }, { "epoch": 0.9634363123697438, "grad_norm": 1.3270257028032366, "learning_rate": 3.501813922386022e-08, "loss": 0.5881, "step": 31435 }, { "epoch": 0.963466960892485, "grad_norm": 2.498705685403679, "learning_rate": 3.4959526289463085e-08, "loss": 0.5811, "step": 31436 }, { "epoch": 0.9634976094152262, "grad_norm": 1.4178772798764936, "learning_rate": 3.490096227687978e-08, "loss": 0.5721, "step": 31437 }, { "epoch": 0.9635282579379674, "grad_norm": 1.293890779623438, "learning_rate": 3.484244718668761e-08, "loss": 0.4921, "step": 31438 }, { "epoch": 0.9635589064607086, "grad_norm": 1.3362060955263868, "learning_rate": 3.478398101946334e-08, "loss": 0.5977, "step": 31439 }, { "epoch": 0.9635895549834498, "grad_norm": 1.666485363608407, "learning_rate": 3.4725563775783175e-08, "loss": 0.7036, "step": 31440 }, { "epoch": 0.963620203506191, "grad_norm": 1.440143161873786, "learning_rate": 3.466719545622166e-08, "loss": 0.5924, "step": 31441 }, { "epoch": 0.9636508520289322, "grad_norm": 1.405128931853169, "learning_rate": 3.460887606135554e-08, "loss": 0.5836, "step": 31442 }, { "epoch": 0.9636815005516735, "grad_norm": 1.590087805007913, "learning_rate": 3.455060559175882e-08, "loss": 0.6556, "step": 31443 }, { "epoch": 0.9637121490744146, "grad_norm": 1.3122927976212242, "learning_rate": 3.449238404800492e-08, "loss": 0.6518, "step": 31444 }, { "epoch": 0.9637427975971559, "grad_norm": 1.4662905873655727, "learning_rate": 3.443421143066783e-08, "loss": 0.6226, "step": 31445 }, { "epoch": 0.963773446119897, "grad_norm": 1.5152778380823932, "learning_rate": 3.437608774032153e-08, "loss": 0.5802, "step": 31446 }, { "epoch": 0.9638040946426383, "grad_norm": 1.4524725978090494, "learning_rate": 3.431801297753778e-08, "loss": 0.5254, "step": 31447 }, { "epoch": 0.9638347431653794, "grad_norm": 1.2418330133358564, "learning_rate": 3.4259987142888916e-08, "loss": 0.5729, "step": 31448 }, { "epoch": 0.9638653916881207, "grad_norm": 1.234807264419555, "learning_rate": 3.420201023694725e-08, "loss": 0.5159, "step": 31449 }, { "epoch": 0.9638960402108618, "grad_norm": 1.385410820299535, "learning_rate": 3.4144082260283984e-08, "loss": 0.7199, "step": 31450 }, { "epoch": 0.963926688733603, "grad_norm": 1.3052668707209634, "learning_rate": 3.4086203213469224e-08, "loss": 0.5805, "step": 31451 }, { "epoch": 0.9639573372563442, "grad_norm": 1.257598861581196, "learning_rate": 3.402837309707363e-08, "loss": 0.6295, "step": 31452 }, { "epoch": 0.9639879857790854, "grad_norm": 1.3923700444807692, "learning_rate": 3.39705919116673e-08, "loss": 0.6329, "step": 31453 }, { "epoch": 0.9640186343018267, "grad_norm": 0.4389582660769587, "learning_rate": 3.391285965781976e-08, "loss": 0.3716, "step": 31454 }, { "epoch": 0.9640492828245678, "grad_norm": 1.4075672904660756, "learning_rate": 3.385517633609892e-08, "loss": 0.5974, "step": 31455 }, { "epoch": 0.9640799313473091, "grad_norm": 1.275349713008486, "learning_rate": 3.3797541947073744e-08, "loss": 0.5084, "step": 31456 }, { "epoch": 0.9641105798700502, "grad_norm": 1.1916574471491814, "learning_rate": 3.3739956491311564e-08, "loss": 0.5529, "step": 31457 }, { "epoch": 0.9641412283927915, "grad_norm": 1.5679305807053845, "learning_rate": 3.368241996938137e-08, "loss": 0.6408, "step": 31458 }, { "epoch": 0.9641718769155326, "grad_norm": 1.470154334035199, "learning_rate": 3.362493238184827e-08, "loss": 0.5478, "step": 31459 }, { "epoch": 0.9642025254382739, "grad_norm": 0.4219881793380945, "learning_rate": 3.356749372927903e-08, "loss": 0.3713, "step": 31460 }, { "epoch": 0.964233173961015, "grad_norm": 1.1217589780609678, "learning_rate": 3.3510104012240976e-08, "loss": 0.5417, "step": 31461 }, { "epoch": 0.9642638224837563, "grad_norm": 1.389231688975101, "learning_rate": 3.345276323129809e-08, "loss": 0.5797, "step": 31462 }, { "epoch": 0.9642944710064975, "grad_norm": 1.3157834292977055, "learning_rate": 3.3395471387015485e-08, "loss": 0.5798, "step": 31463 }, { "epoch": 0.9643251195292387, "grad_norm": 1.340965502791686, "learning_rate": 3.333822847995882e-08, "loss": 0.6722, "step": 31464 }, { "epoch": 0.9643557680519799, "grad_norm": 1.2373569807111684, "learning_rate": 3.3281034510690977e-08, "loss": 0.5749, "step": 31465 }, { "epoch": 0.9643864165747211, "grad_norm": 1.1900898706050664, "learning_rate": 3.322388947977595e-08, "loss": 0.5164, "step": 31466 }, { "epoch": 0.9644170650974623, "grad_norm": 1.4176551238637094, "learning_rate": 3.316679338777662e-08, "loss": 0.6567, "step": 31467 }, { "epoch": 0.9644477136202035, "grad_norm": 1.4741773144591126, "learning_rate": 3.3109746235255316e-08, "loss": 0.5607, "step": 31468 }, { "epoch": 0.9644783621429447, "grad_norm": 1.2500608430152644, "learning_rate": 3.305274802277547e-08, "loss": 0.483, "step": 31469 }, { "epoch": 0.964509010665686, "grad_norm": 1.3409796261600986, "learning_rate": 3.2995798750897204e-08, "loss": 0.5948, "step": 31470 }, { "epoch": 0.9645396591884271, "grad_norm": 1.244709303309689, "learning_rate": 3.2938898420182276e-08, "loss": 0.5804, "step": 31471 }, { "epoch": 0.9645703077111684, "grad_norm": 1.349854018269495, "learning_rate": 3.2882047031191354e-08, "loss": 0.6629, "step": 31472 }, { "epoch": 0.9646009562339095, "grad_norm": 1.3616607085854877, "learning_rate": 3.282524458448455e-08, "loss": 0.6303, "step": 31473 }, { "epoch": 0.9646316047566508, "grad_norm": 1.5532892108959226, "learning_rate": 3.2768491080620854e-08, "loss": 0.6802, "step": 31474 }, { "epoch": 0.9646622532793919, "grad_norm": 1.3027582858591993, "learning_rate": 3.2711786520160934e-08, "loss": 0.5077, "step": 31475 }, { "epoch": 0.9646929018021332, "grad_norm": 1.3912915590211807, "learning_rate": 3.265513090366213e-08, "loss": 0.6736, "step": 31476 }, { "epoch": 0.9647235503248743, "grad_norm": 1.2783410214710074, "learning_rate": 3.259852423168397e-08, "loss": 0.6718, "step": 31477 }, { "epoch": 0.9647541988476156, "grad_norm": 1.462719441729196, "learning_rate": 3.25419665047827e-08, "loss": 0.6609, "step": 31478 }, { "epoch": 0.9647848473703567, "grad_norm": 1.3988006293205215, "learning_rate": 3.248545772351674e-08, "loss": 0.7222, "step": 31479 }, { "epoch": 0.964815495893098, "grad_norm": 1.3825645116717973, "learning_rate": 3.2428997888442894e-08, "loss": 0.6159, "step": 31480 }, { "epoch": 0.9648461444158392, "grad_norm": 0.4732889298641497, "learning_rate": 3.237258700011736e-08, "loss": 0.3871, "step": 31481 }, { "epoch": 0.9648767929385803, "grad_norm": 1.4171455781510807, "learning_rate": 3.231622505909471e-08, "loss": 0.68, "step": 31482 }, { "epoch": 0.9649074414613216, "grad_norm": 1.2812533049310981, "learning_rate": 3.225991206593226e-08, "loss": 0.5195, "step": 31483 }, { "epoch": 0.9649380899840627, "grad_norm": 1.3257788087676077, "learning_rate": 3.2203648021183475e-08, "loss": 0.5584, "step": 31484 }, { "epoch": 0.964968738506804, "grad_norm": 1.539433658562187, "learning_rate": 3.2147432925403455e-08, "loss": 0.5903, "step": 31485 }, { "epoch": 0.9649993870295451, "grad_norm": 1.3892763915341144, "learning_rate": 3.2091266779145643e-08, "loss": 0.6224, "step": 31486 }, { "epoch": 0.9650300355522864, "grad_norm": 1.4317807804746723, "learning_rate": 3.20351495829635e-08, "loss": 0.6664, "step": 31487 }, { "epoch": 0.9650606840750275, "grad_norm": 1.2710916777126091, "learning_rate": 3.1979081337410453e-08, "loss": 0.56, "step": 31488 }, { "epoch": 0.9650913325977688, "grad_norm": 1.2795224222368993, "learning_rate": 3.1923062043038856e-08, "loss": 0.5851, "step": 31489 }, { "epoch": 0.96512198112051, "grad_norm": 1.3033009282223251, "learning_rate": 3.186709170039992e-08, "loss": 0.6085, "step": 31490 }, { "epoch": 0.9651526296432512, "grad_norm": 0.45223778452857777, "learning_rate": 3.181117031004599e-08, "loss": 0.3957, "step": 31491 }, { "epoch": 0.9651832781659924, "grad_norm": 1.1417031053699647, "learning_rate": 3.175529787252773e-08, "loss": 0.4244, "step": 31492 }, { "epoch": 0.9652139266887336, "grad_norm": 1.4255679019370258, "learning_rate": 3.1699474388395824e-08, "loss": 0.5774, "step": 31493 }, { "epoch": 0.9652445752114748, "grad_norm": 1.3283360089829834, "learning_rate": 3.1643699858199815e-08, "loss": 0.5496, "step": 31494 }, { "epoch": 0.965275223734216, "grad_norm": 1.6707540084250372, "learning_rate": 3.158797428248983e-08, "loss": 0.6005, "step": 31495 }, { "epoch": 0.9653058722569572, "grad_norm": 1.4206006038785437, "learning_rate": 3.153229766181487e-08, "loss": 0.5655, "step": 31496 }, { "epoch": 0.9653365207796984, "grad_norm": 1.2105449823024355, "learning_rate": 3.1476669996723384e-08, "loss": 0.5771, "step": 31497 }, { "epoch": 0.9653671693024396, "grad_norm": 1.40275747114355, "learning_rate": 3.1421091287763826e-08, "loss": 0.5533, "step": 31498 }, { "epoch": 0.9653978178251809, "grad_norm": 1.455786994999729, "learning_rate": 3.136556153548298e-08, "loss": 0.6669, "step": 31499 }, { "epoch": 0.965428466347922, "grad_norm": 1.686137662132091, "learning_rate": 3.131008074042874e-08, "loss": 0.7252, "step": 31500 }, { "epoch": 0.9654591148706633, "grad_norm": 1.205398695289094, "learning_rate": 3.1254648903147336e-08, "loss": 0.5808, "step": 31501 }, { "epoch": 0.9654897633934044, "grad_norm": 1.1895802493264531, "learning_rate": 3.119926602418555e-08, "loss": 0.5171, "step": 31502 }, { "epoch": 0.9655204119161457, "grad_norm": 1.3360158516818603, "learning_rate": 3.1143932104088506e-08, "loss": 0.6711, "step": 31503 }, { "epoch": 0.9655510604388868, "grad_norm": 1.3322374751575816, "learning_rate": 3.1088647143401875e-08, "loss": 0.5539, "step": 31504 }, { "epoch": 0.9655817089616281, "grad_norm": 0.4435354513485196, "learning_rate": 3.1033411142670225e-08, "loss": 0.3932, "step": 31505 }, { "epoch": 0.9656123574843692, "grad_norm": 1.403857369845269, "learning_rate": 3.097822410243756e-08, "loss": 0.5379, "step": 31506 }, { "epoch": 0.9656430060071105, "grad_norm": 0.46208948826261015, "learning_rate": 3.09230860232479e-08, "loss": 0.4014, "step": 31507 }, { "epoch": 0.9656736545298517, "grad_norm": 0.4451431366696665, "learning_rate": 3.086799690564413e-08, "loss": 0.3947, "step": 31508 }, { "epoch": 0.9657043030525929, "grad_norm": 1.5328785334722843, "learning_rate": 3.081295675016971e-08, "loss": 0.5953, "step": 31509 }, { "epoch": 0.9657349515753341, "grad_norm": 1.3392325562447, "learning_rate": 3.075796555736643e-08, "loss": 0.5788, "step": 31510 }, { "epoch": 0.9657656000980753, "grad_norm": 0.45131326845048386, "learning_rate": 3.0703023327776635e-08, "loss": 0.4136, "step": 31511 }, { "epoch": 0.9657962486208165, "grad_norm": 1.5056271999947461, "learning_rate": 3.0648130061941004e-08, "loss": 0.5477, "step": 31512 }, { "epoch": 0.9658268971435576, "grad_norm": 1.5014246220551941, "learning_rate": 3.0593285760401325e-08, "loss": 0.6145, "step": 31513 }, { "epoch": 0.9658575456662989, "grad_norm": 0.44091007436528523, "learning_rate": 3.0538490423697166e-08, "loss": 0.3896, "step": 31514 }, { "epoch": 0.96588819418904, "grad_norm": 0.4515447643853333, "learning_rate": 3.048374405236865e-08, "loss": 0.4207, "step": 31515 }, { "epoch": 0.9659188427117813, "grad_norm": 1.4400142496865993, "learning_rate": 3.042904664695534e-08, "loss": 0.5903, "step": 31516 }, { "epoch": 0.9659494912345225, "grad_norm": 1.295064628837473, "learning_rate": 3.037439820799626e-08, "loss": 0.5358, "step": 31517 }, { "epoch": 0.9659801397572637, "grad_norm": 1.1920250138174622, "learning_rate": 3.031979873602986e-08, "loss": 0.6791, "step": 31518 }, { "epoch": 0.9660107882800049, "grad_norm": 1.286212572817843, "learning_rate": 3.026524823159405e-08, "loss": 0.5763, "step": 31519 }, { "epoch": 0.9660414368027461, "grad_norm": 1.415630184870047, "learning_rate": 3.021074669522617e-08, "loss": 0.6071, "step": 31520 }, { "epoch": 0.9660720853254873, "grad_norm": 1.4357365389393688, "learning_rate": 3.015629412746357e-08, "loss": 0.6251, "step": 31521 }, { "epoch": 0.9661027338482285, "grad_norm": 1.3904011848239248, "learning_rate": 3.010189052884249e-08, "loss": 0.6573, "step": 31522 }, { "epoch": 0.9661333823709697, "grad_norm": 0.4591808914930878, "learning_rate": 3.0047535899898616e-08, "loss": 0.3762, "step": 31523 }, { "epoch": 0.966164030893711, "grad_norm": 1.152171728986287, "learning_rate": 2.999323024116873e-08, "loss": 0.481, "step": 31524 }, { "epoch": 0.9661946794164521, "grad_norm": 0.4655072902007253, "learning_rate": 2.993897355318687e-08, "loss": 0.4156, "step": 31525 }, { "epoch": 0.9662253279391934, "grad_norm": 1.333289380290798, "learning_rate": 2.988476583648814e-08, "loss": 0.5078, "step": 31526 }, { "epoch": 0.9662559764619345, "grad_norm": 1.3460989807165369, "learning_rate": 2.983060709160601e-08, "loss": 0.6103, "step": 31527 }, { "epoch": 0.9662866249846758, "grad_norm": 0.459293615638116, "learning_rate": 2.9776497319074503e-08, "loss": 0.3826, "step": 31528 }, { "epoch": 0.9663172735074169, "grad_norm": 1.3773557880225622, "learning_rate": 2.9722436519427632e-08, "loss": 0.5996, "step": 31529 }, { "epoch": 0.9663479220301582, "grad_norm": 1.3316873217604295, "learning_rate": 2.966842469319664e-08, "loss": 0.6658, "step": 31530 }, { "epoch": 0.9663785705528993, "grad_norm": 1.4474244369867983, "learning_rate": 2.9614461840914432e-08, "loss": 0.595, "step": 31531 }, { "epoch": 0.9664092190756406, "grad_norm": 1.404028595556508, "learning_rate": 2.9560547963112808e-08, "loss": 0.6708, "step": 31532 }, { "epoch": 0.9664398675983817, "grad_norm": 1.2993508174483412, "learning_rate": 2.9506683060323005e-08, "loss": 0.5386, "step": 31533 }, { "epoch": 0.966470516121123, "grad_norm": 1.5202834284952151, "learning_rate": 2.945286713307516e-08, "loss": 0.6602, "step": 31534 }, { "epoch": 0.9665011646438642, "grad_norm": 1.4847077073742296, "learning_rate": 2.9399100181900507e-08, "loss": 0.6361, "step": 31535 }, { "epoch": 0.9665318131666054, "grad_norm": 1.2154829207989004, "learning_rate": 2.9345382207328078e-08, "loss": 0.4761, "step": 31536 }, { "epoch": 0.9665624616893466, "grad_norm": 1.3672911408345563, "learning_rate": 2.9291713209887442e-08, "loss": 0.5939, "step": 31537 }, { "epoch": 0.9665931102120878, "grad_norm": 1.2474152116072805, "learning_rate": 2.9238093190107066e-08, "loss": 0.4922, "step": 31538 }, { "epoch": 0.966623758734829, "grad_norm": 1.3523074952421827, "learning_rate": 2.918452214851597e-08, "loss": 0.5719, "step": 31539 }, { "epoch": 0.9666544072575702, "grad_norm": 0.48540274786805115, "learning_rate": 2.9131000085641514e-08, "loss": 0.4116, "step": 31540 }, { "epoch": 0.9666850557803114, "grad_norm": 1.4521490572070754, "learning_rate": 2.9077527002011054e-08, "loss": 0.6658, "step": 31541 }, { "epoch": 0.9667157043030526, "grad_norm": 1.2502714202959366, "learning_rate": 2.902410289815194e-08, "loss": 0.6065, "step": 31542 }, { "epoch": 0.9667463528257938, "grad_norm": 1.1155562430492552, "learning_rate": 2.897072777458987e-08, "loss": 0.538, "step": 31543 }, { "epoch": 0.966777001348535, "grad_norm": 0.45807996538562584, "learning_rate": 2.891740163185108e-08, "loss": 0.3928, "step": 31544 }, { "epoch": 0.9668076498712762, "grad_norm": 0.44881376845140686, "learning_rate": 2.8864124470461276e-08, "loss": 0.4, "step": 31545 }, { "epoch": 0.9668382983940174, "grad_norm": 0.457770606823789, "learning_rate": 2.8810896290945023e-08, "loss": 0.3983, "step": 31546 }, { "epoch": 0.9668689469167586, "grad_norm": 0.4316671081430095, "learning_rate": 2.875771709382691e-08, "loss": 0.3823, "step": 31547 }, { "epoch": 0.9668995954394998, "grad_norm": 0.4664222294844859, "learning_rate": 2.870458687963096e-08, "loss": 0.3835, "step": 31548 }, { "epoch": 0.966930243962241, "grad_norm": 1.3125211040038869, "learning_rate": 2.865150564888064e-08, "loss": 0.6026, "step": 31549 }, { "epoch": 0.9669608924849822, "grad_norm": 1.3544095441622666, "learning_rate": 2.859847340209887e-08, "loss": 0.5162, "step": 31550 }, { "epoch": 0.9669915410077234, "grad_norm": 1.4345580773968483, "learning_rate": 2.8545490139809117e-08, "loss": 0.6369, "step": 31551 }, { "epoch": 0.9670221895304646, "grad_norm": 0.4278258312292093, "learning_rate": 2.8492555862531522e-08, "loss": 0.3803, "step": 31552 }, { "epoch": 0.9670528380532059, "grad_norm": 1.3300270204417928, "learning_rate": 2.843967057078956e-08, "loss": 0.6079, "step": 31553 }, { "epoch": 0.967083486575947, "grad_norm": 1.4038806858068054, "learning_rate": 2.8386834265103357e-08, "loss": 0.6042, "step": 31554 }, { "epoch": 0.9671141350986883, "grad_norm": 0.45392152535337776, "learning_rate": 2.8334046945993622e-08, "loss": 0.3808, "step": 31555 }, { "epoch": 0.9671447836214294, "grad_norm": 1.3759734137273973, "learning_rate": 2.8281308613980483e-08, "loss": 0.6293, "step": 31556 }, { "epoch": 0.9671754321441707, "grad_norm": 0.43694433973131813, "learning_rate": 2.822861926958409e-08, "loss": 0.3924, "step": 31557 }, { "epoch": 0.9672060806669118, "grad_norm": 1.250719108087765, "learning_rate": 2.817597891332291e-08, "loss": 0.5449, "step": 31558 }, { "epoch": 0.9672367291896531, "grad_norm": 0.4500154690617017, "learning_rate": 2.8123387545715975e-08, "loss": 0.4142, "step": 31559 }, { "epoch": 0.9672673777123942, "grad_norm": 1.2067717754730125, "learning_rate": 2.8070845167281202e-08, "loss": 0.5663, "step": 31560 }, { "epoch": 0.9672980262351355, "grad_norm": 1.2610706347848855, "learning_rate": 2.8018351778537068e-08, "loss": 0.5834, "step": 31561 }, { "epoch": 0.9673286747578766, "grad_norm": 1.5070976473153859, "learning_rate": 2.7965907379999822e-08, "loss": 0.6555, "step": 31562 }, { "epoch": 0.9673593232806179, "grad_norm": 1.099141497429072, "learning_rate": 2.7913511972186834e-08, "loss": 0.4744, "step": 31563 }, { "epoch": 0.9673899718033591, "grad_norm": 1.3284128695968433, "learning_rate": 2.78611655556138e-08, "loss": 0.5923, "step": 31564 }, { "epoch": 0.9674206203261003, "grad_norm": 1.409422874012547, "learning_rate": 2.7808868130796974e-08, "loss": 0.6234, "step": 31565 }, { "epoch": 0.9674512688488415, "grad_norm": 1.363001681481089, "learning_rate": 2.7756619698252053e-08, "loss": 0.6711, "step": 31566 }, { "epoch": 0.9674819173715827, "grad_norm": 0.46177971381546035, "learning_rate": 2.7704420258492515e-08, "loss": 0.4142, "step": 31567 }, { "epoch": 0.9675125658943239, "grad_norm": 1.2931453282815455, "learning_rate": 2.765226981203406e-08, "loss": 0.491, "step": 31568 }, { "epoch": 0.9675432144170651, "grad_norm": 1.542925982321631, "learning_rate": 2.7600168359390166e-08, "loss": 0.6078, "step": 31569 }, { "epoch": 0.9675738629398063, "grad_norm": 1.45066448564224, "learning_rate": 2.7548115901074314e-08, "loss": 0.6314, "step": 31570 }, { "epoch": 0.9676045114625476, "grad_norm": 1.3939029649625045, "learning_rate": 2.749611243759831e-08, "loss": 0.5755, "step": 31571 }, { "epoch": 0.9676351599852887, "grad_norm": 1.3939376244548691, "learning_rate": 2.7444157969475637e-08, "loss": 0.604, "step": 31572 }, { "epoch": 0.96766580850803, "grad_norm": 1.3882749772016212, "learning_rate": 2.7392252497218662e-08, "loss": 0.6526, "step": 31573 }, { "epoch": 0.9676964570307711, "grad_norm": 0.43561245729178105, "learning_rate": 2.7340396021336977e-08, "loss": 0.373, "step": 31574 }, { "epoch": 0.9677271055535123, "grad_norm": 1.5147886066854384, "learning_rate": 2.7288588542343508e-08, "loss": 0.5894, "step": 31575 }, { "epoch": 0.9677577540762535, "grad_norm": 1.4575536371820903, "learning_rate": 2.7236830060747287e-08, "loss": 0.6186, "step": 31576 }, { "epoch": 0.9677884025989947, "grad_norm": 0.4547223599730509, "learning_rate": 2.7185120577059575e-08, "loss": 0.4126, "step": 31577 }, { "epoch": 0.9678190511217359, "grad_norm": 1.1783100569319775, "learning_rate": 2.713346009178941e-08, "loss": 0.5727, "step": 31578 }, { "epoch": 0.9678496996444771, "grad_norm": 1.3324799924541066, "learning_rate": 2.7081848605444716e-08, "loss": 0.5039, "step": 31579 }, { "epoch": 0.9678803481672184, "grad_norm": 0.45297069018644465, "learning_rate": 2.7030286118535643e-08, "loss": 0.3597, "step": 31580 }, { "epoch": 0.9679109966899595, "grad_norm": 1.345736786165738, "learning_rate": 2.6978772631569007e-08, "loss": 0.6163, "step": 31581 }, { "epoch": 0.9679416452127008, "grad_norm": 1.40358275337206, "learning_rate": 2.692730814505329e-08, "loss": 0.6116, "step": 31582 }, { "epoch": 0.9679722937354419, "grad_norm": 1.3438501675041445, "learning_rate": 2.687589265949475e-08, "loss": 0.5994, "step": 31583 }, { "epoch": 0.9680029422581832, "grad_norm": 1.3390990371228326, "learning_rate": 2.682452617540077e-08, "loss": 0.595, "step": 31584 }, { "epoch": 0.9680335907809243, "grad_norm": 1.2818612713803763, "learning_rate": 2.6773208693277595e-08, "loss": 0.5225, "step": 31585 }, { "epoch": 0.9680642393036656, "grad_norm": 1.508339191353403, "learning_rate": 2.672194021362984e-08, "loss": 0.6342, "step": 31586 }, { "epoch": 0.9680948878264067, "grad_norm": 1.2577314667443584, "learning_rate": 2.667072073696375e-08, "loss": 0.618, "step": 31587 }, { "epoch": 0.968125536349148, "grad_norm": 1.36640800200836, "learning_rate": 2.661955026378338e-08, "loss": 0.6156, "step": 31588 }, { "epoch": 0.9681561848718891, "grad_norm": 1.4288063300110654, "learning_rate": 2.6568428794593314e-08, "loss": 0.6373, "step": 31589 }, { "epoch": 0.9681868333946304, "grad_norm": 1.2665323039277938, "learning_rate": 2.6517356329896492e-08, "loss": 0.5546, "step": 31590 }, { "epoch": 0.9682174819173716, "grad_norm": 1.5090278699562927, "learning_rate": 2.6466332870196953e-08, "loss": 0.5939, "step": 31591 }, { "epoch": 0.9682481304401128, "grad_norm": 1.320440939777367, "learning_rate": 2.641535841599707e-08, "loss": 0.6543, "step": 31592 }, { "epoch": 0.968278778962854, "grad_norm": 1.482498651976366, "learning_rate": 2.6364432967799225e-08, "loss": 0.6617, "step": 31593 }, { "epoch": 0.9683094274855952, "grad_norm": 1.2030469575787472, "learning_rate": 2.6313556526105234e-08, "loss": 0.6215, "step": 31594 }, { "epoch": 0.9683400760083364, "grad_norm": 1.5317836476592737, "learning_rate": 2.626272909141636e-08, "loss": 0.5989, "step": 31595 }, { "epoch": 0.9683707245310776, "grad_norm": 1.4949252464276477, "learning_rate": 2.6211950664233322e-08, "loss": 0.6191, "step": 31596 }, { "epoch": 0.9684013730538188, "grad_norm": 1.3068337399899894, "learning_rate": 2.6161221245056266e-08, "loss": 0.6028, "step": 31597 }, { "epoch": 0.96843202157656, "grad_norm": 0.4502271937285401, "learning_rate": 2.6110540834385912e-08, "loss": 0.4034, "step": 31598 }, { "epoch": 0.9684626700993012, "grad_norm": 0.43337507726758495, "learning_rate": 2.605990943272074e-08, "loss": 0.3824, "step": 31599 }, { "epoch": 0.9684933186220425, "grad_norm": 0.44875082457844484, "learning_rate": 2.600932704056036e-08, "loss": 0.3853, "step": 31600 }, { "epoch": 0.9685239671447836, "grad_norm": 1.2384793517850878, "learning_rate": 2.5958793658401593e-08, "loss": 0.5409, "step": 31601 }, { "epoch": 0.9685546156675249, "grad_norm": 1.239621418827696, "learning_rate": 2.5908309286744593e-08, "loss": 0.5836, "step": 31602 }, { "epoch": 0.968585264190266, "grad_norm": 1.4148592097611798, "learning_rate": 2.5857873926085075e-08, "loss": 0.5923, "step": 31603 }, { "epoch": 0.9686159127130073, "grad_norm": 1.2826147807509907, "learning_rate": 2.5807487576920977e-08, "loss": 0.5352, "step": 31604 }, { "epoch": 0.9686465612357484, "grad_norm": 1.3030734043761851, "learning_rate": 2.575715023974801e-08, "loss": 0.5349, "step": 31605 }, { "epoch": 0.9686772097584896, "grad_norm": 1.3323212723971078, "learning_rate": 2.5706861915062996e-08, "loss": 0.5504, "step": 31606 }, { "epoch": 0.9687078582812308, "grad_norm": 0.4655674725060214, "learning_rate": 2.5656622603361104e-08, "loss": 0.3928, "step": 31607 }, { "epoch": 0.968738506803972, "grad_norm": 1.7060590537980647, "learning_rate": 2.5606432305136376e-08, "loss": 0.5977, "step": 31608 }, { "epoch": 0.9687691553267133, "grad_norm": 1.2549017226088262, "learning_rate": 2.5556291020885084e-08, "loss": 0.6211, "step": 31609 }, { "epoch": 0.9687998038494544, "grad_norm": 0.4249020179528126, "learning_rate": 2.5506198751100163e-08, "loss": 0.3815, "step": 31610 }, { "epoch": 0.9688304523721957, "grad_norm": 1.3362741718264448, "learning_rate": 2.5456155496275114e-08, "loss": 0.5545, "step": 31611 }, { "epoch": 0.9688611008949368, "grad_norm": 1.4186277821108635, "learning_rate": 2.5406161256903982e-08, "loss": 0.5796, "step": 31612 }, { "epoch": 0.9688917494176781, "grad_norm": 1.493458532823248, "learning_rate": 2.5356216033478598e-08, "loss": 0.5843, "step": 31613 }, { "epoch": 0.9689223979404192, "grad_norm": 1.3843719554686424, "learning_rate": 2.5306319826490788e-08, "loss": 0.6191, "step": 31614 }, { "epoch": 0.9689530464631605, "grad_norm": 0.43995501332936415, "learning_rate": 2.5256472636432938e-08, "loss": 0.4082, "step": 31615 }, { "epoch": 0.9689836949859016, "grad_norm": 1.4725715493886218, "learning_rate": 2.5206674463795766e-08, "loss": 0.6792, "step": 31616 }, { "epoch": 0.9690143435086429, "grad_norm": 1.39564381369733, "learning_rate": 2.5156925309070544e-08, "loss": 0.4979, "step": 31617 }, { "epoch": 0.9690449920313841, "grad_norm": 1.293526629813689, "learning_rate": 2.510722517274633e-08, "loss": 0.6543, "step": 31618 }, { "epoch": 0.9690756405541253, "grad_norm": 1.4270833841958834, "learning_rate": 2.5057574055313837e-08, "loss": 0.5044, "step": 31619 }, { "epoch": 0.9691062890768665, "grad_norm": 1.5104233778606728, "learning_rate": 2.500797195726212e-08, "loss": 0.6995, "step": 31620 }, { "epoch": 0.9691369375996077, "grad_norm": 1.3058787251533654, "learning_rate": 2.4958418879079683e-08, "loss": 0.5955, "step": 31621 }, { "epoch": 0.9691675861223489, "grad_norm": 1.4817772636423472, "learning_rate": 2.4908914821255016e-08, "loss": 0.6754, "step": 31622 }, { "epoch": 0.9691982346450901, "grad_norm": 1.415636384292249, "learning_rate": 2.4859459784275507e-08, "loss": 0.5845, "step": 31623 }, { "epoch": 0.9692288831678313, "grad_norm": 1.4167980330158563, "learning_rate": 2.481005376862855e-08, "loss": 0.5886, "step": 31624 }, { "epoch": 0.9692595316905726, "grad_norm": 1.3887983991045592, "learning_rate": 2.4760696774800973e-08, "loss": 0.5686, "step": 31625 }, { "epoch": 0.9692901802133137, "grad_norm": 1.245883521377402, "learning_rate": 2.4711388803279613e-08, "loss": 0.5502, "step": 31626 }, { "epoch": 0.969320828736055, "grad_norm": 1.3361729702764975, "learning_rate": 2.466212985454963e-08, "loss": 0.5058, "step": 31627 }, { "epoch": 0.9693514772587961, "grad_norm": 0.44740095539002517, "learning_rate": 2.4612919929096756e-08, "loss": 0.3673, "step": 31628 }, { "epoch": 0.9693821257815374, "grad_norm": 1.4144282649372075, "learning_rate": 2.4563759027406155e-08, "loss": 0.6701, "step": 31629 }, { "epoch": 0.9694127743042785, "grad_norm": 0.45252776715523796, "learning_rate": 2.4514647149961878e-08, "loss": 0.3783, "step": 31630 }, { "epoch": 0.9694434228270198, "grad_norm": 1.463241092586309, "learning_rate": 2.4465584297247434e-08, "loss": 0.6805, "step": 31631 }, { "epoch": 0.9694740713497609, "grad_norm": 1.2975643664876453, "learning_rate": 2.4416570469746326e-08, "loss": 0.5738, "step": 31632 }, { "epoch": 0.9695047198725022, "grad_norm": 1.5545117556844772, "learning_rate": 2.4367605667942607e-08, "loss": 0.5714, "step": 31633 }, { "epoch": 0.9695353683952433, "grad_norm": 1.3069500625365778, "learning_rate": 2.431868989231756e-08, "loss": 0.5539, "step": 31634 }, { "epoch": 0.9695660169179846, "grad_norm": 1.4466863166855644, "learning_rate": 2.4269823143353578e-08, "loss": 0.6984, "step": 31635 }, { "epoch": 0.9695966654407258, "grad_norm": 1.418696535595307, "learning_rate": 2.422100542153194e-08, "loss": 0.6876, "step": 31636 }, { "epoch": 0.9696273139634669, "grad_norm": 1.2278672008397324, "learning_rate": 2.4172236727333933e-08, "loss": 0.6052, "step": 31637 }, { "epoch": 0.9696579624862082, "grad_norm": 1.3558935823871914, "learning_rate": 2.412351706123972e-08, "loss": 0.532, "step": 31638 }, { "epoch": 0.9696886110089493, "grad_norm": 1.2168089998099219, "learning_rate": 2.4074846423730035e-08, "loss": 0.5989, "step": 31639 }, { "epoch": 0.9697192595316906, "grad_norm": 1.2144831959044202, "learning_rate": 2.4026224815283937e-08, "loss": 0.4963, "step": 31640 }, { "epoch": 0.9697499080544317, "grad_norm": 1.4317609522944506, "learning_rate": 2.3977652236380488e-08, "loss": 0.6537, "step": 31641 }, { "epoch": 0.969780556577173, "grad_norm": 1.490246601999415, "learning_rate": 2.3929128687498748e-08, "loss": 0.5494, "step": 31642 }, { "epoch": 0.9698112050999141, "grad_norm": 1.3558407820047653, "learning_rate": 2.3880654169116113e-08, "loss": 0.6797, "step": 31643 }, { "epoch": 0.9698418536226554, "grad_norm": 1.3240640135190818, "learning_rate": 2.3832228681710533e-08, "loss": 0.5436, "step": 31644 }, { "epoch": 0.9698725021453966, "grad_norm": 1.3811964882434464, "learning_rate": 2.378385222575996e-08, "loss": 0.5334, "step": 31645 }, { "epoch": 0.9699031506681378, "grad_norm": 1.3304657289217754, "learning_rate": 2.3735524801739017e-08, "loss": 0.6405, "step": 31646 }, { "epoch": 0.969933799190879, "grad_norm": 1.3793705712000806, "learning_rate": 2.3687246410126207e-08, "loss": 0.6192, "step": 31647 }, { "epoch": 0.9699644477136202, "grad_norm": 1.5419981897497161, "learning_rate": 2.3639017051396152e-08, "loss": 0.6518, "step": 31648 }, { "epoch": 0.9699950962363614, "grad_norm": 1.209136246772511, "learning_rate": 2.3590836726024024e-08, "loss": 0.4888, "step": 31649 }, { "epoch": 0.9700257447591026, "grad_norm": 1.3261945365184766, "learning_rate": 2.3542705434484448e-08, "loss": 0.5471, "step": 31650 }, { "epoch": 0.9700563932818438, "grad_norm": 1.3713982560964442, "learning_rate": 2.3494623177252042e-08, "loss": 0.5835, "step": 31651 }, { "epoch": 0.970087041804585, "grad_norm": 1.4689544874297167, "learning_rate": 2.3446589954799757e-08, "loss": 0.6012, "step": 31652 }, { "epoch": 0.9701176903273262, "grad_norm": 1.2198620958137671, "learning_rate": 2.3398605767602224e-08, "loss": 0.6326, "step": 31653 }, { "epoch": 0.9701483388500675, "grad_norm": 1.3238901783128492, "learning_rate": 2.3350670616131275e-08, "loss": 0.6376, "step": 31654 }, { "epoch": 0.9701789873728086, "grad_norm": 1.3215153199700256, "learning_rate": 2.3302784500859877e-08, "loss": 0.583, "step": 31655 }, { "epoch": 0.9702096358955499, "grad_norm": 1.2687014826747935, "learning_rate": 2.325494742225931e-08, "loss": 0.597, "step": 31656 }, { "epoch": 0.970240284418291, "grad_norm": 1.2478420737942095, "learning_rate": 2.320715938080087e-08, "loss": 0.6503, "step": 31657 }, { "epoch": 0.9702709329410323, "grad_norm": 1.4179456948662128, "learning_rate": 2.3159420376955844e-08, "loss": 0.5792, "step": 31658 }, { "epoch": 0.9703015814637734, "grad_norm": 1.45134719223141, "learning_rate": 2.311173041119441e-08, "loss": 0.6291, "step": 31659 }, { "epoch": 0.9703322299865147, "grad_norm": 1.2773433854821667, "learning_rate": 2.3064089483986195e-08, "loss": 0.5464, "step": 31660 }, { "epoch": 0.9703628785092558, "grad_norm": 1.3053520050238843, "learning_rate": 2.301649759580138e-08, "loss": 0.6362, "step": 31661 }, { "epoch": 0.9703935270319971, "grad_norm": 0.4370307598020704, "learning_rate": 2.2968954747108474e-08, "loss": 0.3937, "step": 31662 }, { "epoch": 0.9704241755547383, "grad_norm": 1.3942239402341476, "learning_rate": 2.292146093837544e-08, "loss": 0.5704, "step": 31663 }, { "epoch": 0.9704548240774795, "grad_norm": 0.45628602537982643, "learning_rate": 2.287401617007079e-08, "loss": 0.3892, "step": 31664 }, { "epoch": 0.9704854726002207, "grad_norm": 1.5744181299520643, "learning_rate": 2.2826620442661927e-08, "loss": 0.682, "step": 31665 }, { "epoch": 0.9705161211229619, "grad_norm": 0.4749894237650224, "learning_rate": 2.277927375661626e-08, "loss": 0.4001, "step": 31666 }, { "epoch": 0.9705467696457031, "grad_norm": 1.4089464753898917, "learning_rate": 2.2731976112399522e-08, "loss": 0.5351, "step": 31667 }, { "epoch": 0.9705774181684442, "grad_norm": 1.4596689848794264, "learning_rate": 2.268472751047801e-08, "loss": 0.5476, "step": 31668 }, { "epoch": 0.9706080666911855, "grad_norm": 1.3302842855673038, "learning_rate": 2.2637527951317462e-08, "loss": 0.6811, "step": 31669 }, { "epoch": 0.9706387152139266, "grad_norm": 1.3011831600014427, "learning_rate": 2.2590377435383058e-08, "loss": 0.487, "step": 31670 }, { "epoch": 0.9706693637366679, "grad_norm": 1.38110775259685, "learning_rate": 2.2543275963138877e-08, "loss": 0.5656, "step": 31671 }, { "epoch": 0.970700012259409, "grad_norm": 1.2574266396942684, "learning_rate": 2.2496223535049544e-08, "loss": 0.6029, "step": 31672 }, { "epoch": 0.9707306607821503, "grad_norm": 1.339199412373695, "learning_rate": 2.244922015157802e-08, "loss": 0.5482, "step": 31673 }, { "epoch": 0.9707613093048915, "grad_norm": 0.46157302270358713, "learning_rate": 2.2402265813188938e-08, "loss": 0.3985, "step": 31674 }, { "epoch": 0.9707919578276327, "grad_norm": 1.3930401388702884, "learning_rate": 2.235536052034304e-08, "loss": 0.595, "step": 31675 }, { "epoch": 0.9708226063503739, "grad_norm": 1.3659878344316287, "learning_rate": 2.2308504273503285e-08, "loss": 0.5244, "step": 31676 }, { "epoch": 0.9708532548731151, "grad_norm": 1.517345873026601, "learning_rate": 2.2261697073132084e-08, "loss": 0.6011, "step": 31677 }, { "epoch": 0.9708839033958563, "grad_norm": 1.5380431140199196, "learning_rate": 2.221493891968962e-08, "loss": 0.6721, "step": 31678 }, { "epoch": 0.9709145519185975, "grad_norm": 1.335684997428543, "learning_rate": 2.2168229813637198e-08, "loss": 0.5908, "step": 31679 }, { "epoch": 0.9709452004413387, "grad_norm": 3.0188163006395112, "learning_rate": 2.2121569755434446e-08, "loss": 0.5496, "step": 31680 }, { "epoch": 0.97097584896408, "grad_norm": 1.2875082678531866, "learning_rate": 2.2074958745541553e-08, "loss": 0.6199, "step": 31681 }, { "epoch": 0.9710064974868211, "grad_norm": 1.5277070186763493, "learning_rate": 2.202839678441815e-08, "loss": 0.5845, "step": 31682 }, { "epoch": 0.9710371460095624, "grad_norm": 1.3645278717245835, "learning_rate": 2.1981883872522204e-08, "loss": 0.6444, "step": 31683 }, { "epoch": 0.9710677945323035, "grad_norm": 0.4525641327811535, "learning_rate": 2.1935420010312235e-08, "loss": 0.4105, "step": 31684 }, { "epoch": 0.9710984430550448, "grad_norm": 1.2225435852843818, "learning_rate": 2.188900519824677e-08, "loss": 0.6396, "step": 31685 }, { "epoch": 0.9711290915777859, "grad_norm": 1.309164850096973, "learning_rate": 2.1842639436782664e-08, "loss": 0.5873, "step": 31686 }, { "epoch": 0.9711597401005272, "grad_norm": 1.3769976595915807, "learning_rate": 2.1796322726376772e-08, "loss": 0.5636, "step": 31687 }, { "epoch": 0.9711903886232683, "grad_norm": 1.3634273243331116, "learning_rate": 2.17500550674854e-08, "loss": 0.6382, "step": 31688 }, { "epoch": 0.9712210371460096, "grad_norm": 1.4906130819162713, "learning_rate": 2.1703836460564286e-08, "loss": 0.5595, "step": 31689 }, { "epoch": 0.9712516856687508, "grad_norm": 1.43877351631601, "learning_rate": 2.1657666906069185e-08, "loss": 0.6809, "step": 31690 }, { "epoch": 0.971282334191492, "grad_norm": 1.2192084250388278, "learning_rate": 2.161154640445473e-08, "loss": 0.6028, "step": 31691 }, { "epoch": 0.9713129827142332, "grad_norm": 1.3176835864792829, "learning_rate": 2.1565474956175002e-08, "loss": 0.6228, "step": 31692 }, { "epoch": 0.9713436312369744, "grad_norm": 1.416309578472012, "learning_rate": 2.1519452561685194e-08, "loss": 0.6046, "step": 31693 }, { "epoch": 0.9713742797597156, "grad_norm": 1.4012774718523135, "learning_rate": 2.147347922143772e-08, "loss": 0.623, "step": 31694 }, { "epoch": 0.9714049282824568, "grad_norm": 0.4428377609299211, "learning_rate": 2.1427554935886106e-08, "loss": 0.4229, "step": 31695 }, { "epoch": 0.971435576805198, "grad_norm": 1.4410610085063038, "learning_rate": 2.1381679705482217e-08, "loss": 0.5457, "step": 31696 }, { "epoch": 0.9714662253279392, "grad_norm": 1.2582683539626531, "learning_rate": 2.133585353067902e-08, "loss": 0.6652, "step": 31697 }, { "epoch": 0.9714968738506804, "grad_norm": 1.5703472705077464, "learning_rate": 2.129007641192671e-08, "loss": 0.6374, "step": 31698 }, { "epoch": 0.9715275223734215, "grad_norm": 1.3495318558007203, "learning_rate": 2.12443483496777e-08, "loss": 0.6049, "step": 31699 }, { "epoch": 0.9715581708961628, "grad_norm": 1.4063809520022232, "learning_rate": 2.1198669344382196e-08, "loss": 0.5837, "step": 31700 }, { "epoch": 0.971588819418904, "grad_norm": 1.2064743602993517, "learning_rate": 2.1153039396489274e-08, "loss": 0.5769, "step": 31701 }, { "epoch": 0.9716194679416452, "grad_norm": 1.4222325707626011, "learning_rate": 2.110745850645024e-08, "loss": 0.575, "step": 31702 }, { "epoch": 0.9716501164643864, "grad_norm": 1.2105907800003177, "learning_rate": 2.1061926674712518e-08, "loss": 0.4767, "step": 31703 }, { "epoch": 0.9716807649871276, "grad_norm": 1.2901080698429928, "learning_rate": 2.10164439017263e-08, "loss": 0.5982, "step": 31704 }, { "epoch": 0.9717114135098688, "grad_norm": 1.3418601565420096, "learning_rate": 2.0971010187938456e-08, "loss": 0.6769, "step": 31705 }, { "epoch": 0.97174206203261, "grad_norm": 1.3392813989901966, "learning_rate": 2.092562553379751e-08, "loss": 0.5466, "step": 31706 }, { "epoch": 0.9717727105553512, "grad_norm": 0.4642562349223382, "learning_rate": 2.088028993975033e-08, "loss": 0.4176, "step": 31707 }, { "epoch": 0.9718033590780925, "grad_norm": 1.4126501427657392, "learning_rate": 2.0835003406243227e-08, "loss": 0.5806, "step": 31708 }, { "epoch": 0.9718340076008336, "grad_norm": 1.2956360784214696, "learning_rate": 2.078976593372306e-08, "loss": 0.594, "step": 31709 }, { "epoch": 0.9718646561235749, "grad_norm": 1.3472095008240115, "learning_rate": 2.074457752263559e-08, "loss": 0.6287, "step": 31710 }, { "epoch": 0.971895304646316, "grad_norm": 1.2854714743109812, "learning_rate": 2.06994381734249e-08, "loss": 0.544, "step": 31711 }, { "epoch": 0.9719259531690573, "grad_norm": 1.5178982894508508, "learning_rate": 2.065434788653786e-08, "loss": 0.5584, "step": 31712 }, { "epoch": 0.9719566016917984, "grad_norm": 1.416037677900921, "learning_rate": 2.0609306662416896e-08, "loss": 0.5799, "step": 31713 }, { "epoch": 0.9719872502145397, "grad_norm": 1.4894082310556305, "learning_rate": 2.0564314501506088e-08, "loss": 0.5984, "step": 31714 }, { "epoch": 0.9720178987372808, "grad_norm": 1.3017910490784972, "learning_rate": 2.051937140425009e-08, "loss": 0.567, "step": 31715 }, { "epoch": 0.9720485472600221, "grad_norm": 1.44017866260664, "learning_rate": 2.0474477371090208e-08, "loss": 0.5606, "step": 31716 }, { "epoch": 0.9720791957827633, "grad_norm": 1.2967962594918407, "learning_rate": 2.042963240246887e-08, "loss": 0.5229, "step": 31717 }, { "epoch": 0.9721098443055045, "grad_norm": 0.47703106383763905, "learning_rate": 2.0384836498829608e-08, "loss": 0.427, "step": 31718 }, { "epoch": 0.9721404928282457, "grad_norm": 1.2913934028495644, "learning_rate": 2.0340089660611518e-08, "loss": 0.5722, "step": 31719 }, { "epoch": 0.9721711413509869, "grad_norm": 1.5411536958294725, "learning_rate": 2.0295391888257577e-08, "loss": 0.6143, "step": 31720 }, { "epoch": 0.9722017898737281, "grad_norm": 1.3898166000421066, "learning_rate": 2.0250743182206877e-08, "loss": 0.6412, "step": 31721 }, { "epoch": 0.9722324383964693, "grad_norm": 1.6102055500553107, "learning_rate": 2.0206143542899625e-08, "loss": 0.6134, "step": 31722 }, { "epoch": 0.9722630869192105, "grad_norm": 1.3086453748407683, "learning_rate": 2.016159297077547e-08, "loss": 0.6359, "step": 31723 }, { "epoch": 0.9722937354419517, "grad_norm": 1.510513462281567, "learning_rate": 2.0117091466272943e-08, "loss": 0.5903, "step": 31724 }, { "epoch": 0.9723243839646929, "grad_norm": 1.557202407168063, "learning_rate": 2.0072639029831142e-08, "loss": 0.6131, "step": 31725 }, { "epoch": 0.9723550324874342, "grad_norm": 1.2274735456830952, "learning_rate": 2.002823566188805e-08, "loss": 0.5595, "step": 31726 }, { "epoch": 0.9723856810101753, "grad_norm": 1.3190595162940433, "learning_rate": 1.9983881362880542e-08, "loss": 0.5601, "step": 31727 }, { "epoch": 0.9724163295329166, "grad_norm": 1.6455234567324701, "learning_rate": 1.993957613324604e-08, "loss": 0.5724, "step": 31728 }, { "epoch": 0.9724469780556577, "grad_norm": 0.46203276113671815, "learning_rate": 1.9895319973421423e-08, "loss": 0.3881, "step": 31729 }, { "epoch": 0.9724776265783989, "grad_norm": 1.4295652420203375, "learning_rate": 1.985111288384245e-08, "loss": 0.5483, "step": 31730 }, { "epoch": 0.9725082751011401, "grad_norm": 1.4414331334396833, "learning_rate": 1.9806954864944328e-08, "loss": 0.5782, "step": 31731 }, { "epoch": 0.9725389236238813, "grad_norm": 1.3751031838426204, "learning_rate": 1.976284591716282e-08, "loss": 0.5762, "step": 31732 }, { "epoch": 0.9725695721466225, "grad_norm": 0.46619122253496614, "learning_rate": 1.9718786040932024e-08, "loss": 0.3793, "step": 31733 }, { "epoch": 0.9726002206693637, "grad_norm": 0.4729244098053417, "learning_rate": 1.9674775236686595e-08, "loss": 0.3847, "step": 31734 }, { "epoch": 0.972630869192105, "grad_norm": 1.3883157726687876, "learning_rate": 1.9630813504859515e-08, "loss": 0.5865, "step": 31735 }, { "epoch": 0.9726615177148461, "grad_norm": 1.3244363230066312, "learning_rate": 1.9586900845884327e-08, "loss": 0.593, "step": 31736 }, { "epoch": 0.9726921662375874, "grad_norm": 1.4826252135806666, "learning_rate": 1.954303726019402e-08, "loss": 0.6016, "step": 31737 }, { "epoch": 0.9727228147603285, "grad_norm": 1.3881971909001884, "learning_rate": 1.9499222748219916e-08, "loss": 0.5472, "step": 31738 }, { "epoch": 0.9727534632830698, "grad_norm": 1.408798984214285, "learning_rate": 1.9455457310394444e-08, "loss": 0.7155, "step": 31739 }, { "epoch": 0.9727841118058109, "grad_norm": 1.29876512913045, "learning_rate": 1.941174094714948e-08, "loss": 0.5817, "step": 31740 }, { "epoch": 0.9728147603285522, "grad_norm": 1.2909060709051572, "learning_rate": 1.936807365891413e-08, "loss": 0.5176, "step": 31741 }, { "epoch": 0.9728454088512933, "grad_norm": 1.305116276799914, "learning_rate": 1.9324455446119718e-08, "loss": 0.4998, "step": 31742 }, { "epoch": 0.9728760573740346, "grad_norm": 1.6061951758585682, "learning_rate": 1.928088630919589e-08, "loss": 0.6153, "step": 31743 }, { "epoch": 0.9729067058967757, "grad_norm": 1.3517199693675508, "learning_rate": 1.9237366248571754e-08, "loss": 0.5305, "step": 31744 }, { "epoch": 0.972937354419517, "grad_norm": 0.45797522700642374, "learning_rate": 1.919389526467641e-08, "loss": 0.4038, "step": 31745 }, { "epoch": 0.9729680029422582, "grad_norm": 1.4170018823313628, "learning_rate": 1.9150473357937847e-08, "loss": 0.5252, "step": 31746 }, { "epoch": 0.9729986514649994, "grad_norm": 1.2359807899213133, "learning_rate": 1.9107100528784063e-08, "loss": 0.536, "step": 31747 }, { "epoch": 0.9730292999877406, "grad_norm": 1.40332615582566, "learning_rate": 1.9063776777642485e-08, "loss": 0.4979, "step": 31748 }, { "epoch": 0.9730599485104818, "grad_norm": 0.4620535072133344, "learning_rate": 1.9020502104939996e-08, "loss": 0.3902, "step": 31749 }, { "epoch": 0.973090597033223, "grad_norm": 1.3298838588898656, "learning_rate": 1.8977276511102927e-08, "loss": 0.6545, "step": 31750 }, { "epoch": 0.9731212455559642, "grad_norm": 0.4408148684783838, "learning_rate": 1.8934099996557044e-08, "loss": 0.3998, "step": 31751 }, { "epoch": 0.9731518940787054, "grad_norm": 1.3764897030067909, "learning_rate": 1.8890972561728115e-08, "loss": 0.6576, "step": 31752 }, { "epoch": 0.9731825426014467, "grad_norm": 1.5554749862699702, "learning_rate": 1.8847894207040806e-08, "loss": 0.6264, "step": 31753 }, { "epoch": 0.9732131911241878, "grad_norm": 1.408632219798197, "learning_rate": 1.8804864932919774e-08, "loss": 0.6645, "step": 31754 }, { "epoch": 0.9732438396469291, "grad_norm": 1.4151952376220491, "learning_rate": 1.8761884739788573e-08, "loss": 0.6575, "step": 31755 }, { "epoch": 0.9732744881696702, "grad_norm": 1.425211560914995, "learning_rate": 1.8718953628071303e-08, "loss": 0.6222, "step": 31756 }, { "epoch": 0.9733051366924115, "grad_norm": 1.4348181865564626, "learning_rate": 1.867607159819096e-08, "loss": 0.5741, "step": 31757 }, { "epoch": 0.9733357852151526, "grad_norm": 1.4309762804195398, "learning_rate": 1.863323865056943e-08, "loss": 0.5509, "step": 31758 }, { "epoch": 0.9733664337378939, "grad_norm": 1.2720943854606523, "learning_rate": 1.8590454785629152e-08, "loss": 0.5688, "step": 31759 }, { "epoch": 0.973397082260635, "grad_norm": 1.3091940320885993, "learning_rate": 1.8547720003792013e-08, "loss": 0.5951, "step": 31760 }, { "epoch": 0.9734277307833762, "grad_norm": 0.4645427511860501, "learning_rate": 1.850503430547823e-08, "loss": 0.3777, "step": 31761 }, { "epoch": 0.9734583793061174, "grad_norm": 1.5065262325005422, "learning_rate": 1.8462397691109135e-08, "loss": 0.6158, "step": 31762 }, { "epoch": 0.9734890278288586, "grad_norm": 1.408744846454032, "learning_rate": 1.8419810161104946e-08, "loss": 0.5526, "step": 31763 }, { "epoch": 0.9735196763515999, "grad_norm": 1.370938159888684, "learning_rate": 1.837727171588477e-08, "loss": 0.5203, "step": 31764 }, { "epoch": 0.973550324874341, "grad_norm": 1.5237493848945196, "learning_rate": 1.833478235586772e-08, "loss": 0.655, "step": 31765 }, { "epoch": 0.9735809733970823, "grad_norm": 0.4447342334712792, "learning_rate": 1.8292342081472346e-08, "loss": 0.3846, "step": 31766 }, { "epoch": 0.9736116219198234, "grad_norm": 1.1971333598875629, "learning_rate": 1.8249950893117762e-08, "loss": 0.5678, "step": 31767 }, { "epoch": 0.9736422704425647, "grad_norm": 1.2368083516386232, "learning_rate": 1.8207608791220855e-08, "loss": 0.513, "step": 31768 }, { "epoch": 0.9736729189653058, "grad_norm": 1.258839693027894, "learning_rate": 1.8165315776199065e-08, "loss": 0.6307, "step": 31769 }, { "epoch": 0.9737035674880471, "grad_norm": 1.3466898917593633, "learning_rate": 1.8123071848469286e-08, "loss": 0.6617, "step": 31770 }, { "epoch": 0.9737342160107882, "grad_norm": 0.4409287036944291, "learning_rate": 1.8080877008447296e-08, "loss": 0.4036, "step": 31771 }, { "epoch": 0.9737648645335295, "grad_norm": 1.2910566868813023, "learning_rate": 1.8038731256549426e-08, "loss": 0.6145, "step": 31772 }, { "epoch": 0.9737955130562707, "grad_norm": 1.4348923054756795, "learning_rate": 1.7996634593189787e-08, "loss": 0.6333, "step": 31773 }, { "epoch": 0.9738261615790119, "grad_norm": 1.1044182117257646, "learning_rate": 1.795458701878472e-08, "loss": 0.5358, "step": 31774 }, { "epoch": 0.9738568101017531, "grad_norm": 1.3474814648360711, "learning_rate": 1.7912588533747777e-08, "loss": 0.6557, "step": 31775 }, { "epoch": 0.9738874586244943, "grad_norm": 1.2654813997999508, "learning_rate": 1.787063913849252e-08, "loss": 0.5631, "step": 31776 }, { "epoch": 0.9739181071472355, "grad_norm": 1.3958515695416756, "learning_rate": 1.7828738833433055e-08, "loss": 0.6358, "step": 31777 }, { "epoch": 0.9739487556699767, "grad_norm": 1.3652020102265658, "learning_rate": 1.7786887618981287e-08, "loss": 0.5526, "step": 31778 }, { "epoch": 0.9739794041927179, "grad_norm": 1.3057811781776174, "learning_rate": 1.774508549555021e-08, "loss": 0.6644, "step": 31779 }, { "epoch": 0.9740100527154592, "grad_norm": 1.413631563434682, "learning_rate": 1.7703332463551714e-08, "loss": 0.5786, "step": 31780 }, { "epoch": 0.9740407012382003, "grad_norm": 1.4068005081831232, "learning_rate": 1.7661628523397146e-08, "loss": 0.628, "step": 31781 }, { "epoch": 0.9740713497609416, "grad_norm": 1.4002220188473664, "learning_rate": 1.7619973675496728e-08, "loss": 0.6552, "step": 31782 }, { "epoch": 0.9741019982836827, "grad_norm": 1.4874192418951, "learning_rate": 1.7578367920262352e-08, "loss": 0.6283, "step": 31783 }, { "epoch": 0.974132646806424, "grad_norm": 1.4096065941352471, "learning_rate": 1.7536811258102582e-08, "loss": 0.6913, "step": 31784 }, { "epoch": 0.9741632953291651, "grad_norm": 1.2205922773285165, "learning_rate": 1.7495303689427644e-08, "loss": 0.6281, "step": 31785 }, { "epoch": 0.9741939438519064, "grad_norm": 1.5104699055693835, "learning_rate": 1.7453845214646102e-08, "loss": 0.6351, "step": 31786 }, { "epoch": 0.9742245923746475, "grad_norm": 1.3519984486198247, "learning_rate": 1.741243583416652e-08, "loss": 0.6405, "step": 31787 }, { "epoch": 0.9742552408973888, "grad_norm": 1.548281998358902, "learning_rate": 1.7371075548397454e-08, "loss": 0.5927, "step": 31788 }, { "epoch": 0.97428588942013, "grad_norm": 1.3318300245476076, "learning_rate": 1.7329764357746358e-08, "loss": 0.6466, "step": 31789 }, { "epoch": 0.9743165379428712, "grad_norm": 1.2938914483017616, "learning_rate": 1.7288502262619022e-08, "loss": 0.6182, "step": 31790 }, { "epoch": 0.9743471864656124, "grad_norm": 1.4993661423764792, "learning_rate": 1.724728926342345e-08, "loss": 0.5386, "step": 31791 }, { "epoch": 0.9743778349883535, "grad_norm": 1.5522073161016108, "learning_rate": 1.7206125360565427e-08, "loss": 0.6192, "step": 31792 }, { "epoch": 0.9744084835110948, "grad_norm": 1.5003738031998042, "learning_rate": 1.716501055445019e-08, "loss": 0.6667, "step": 31793 }, { "epoch": 0.9744391320338359, "grad_norm": 1.5317913194239197, "learning_rate": 1.7123944845482963e-08, "loss": 0.579, "step": 31794 }, { "epoch": 0.9744697805565772, "grad_norm": 0.4496615832188526, "learning_rate": 1.708292823406843e-08, "loss": 0.3892, "step": 31795 }, { "epoch": 0.9745004290793183, "grad_norm": 1.5740343405149542, "learning_rate": 1.7041960720610708e-08, "loss": 0.5158, "step": 31796 }, { "epoch": 0.9745310776020596, "grad_norm": 1.2192018722085969, "learning_rate": 1.700104230551336e-08, "loss": 0.5191, "step": 31797 }, { "epoch": 0.9745617261248007, "grad_norm": 1.2722279963845073, "learning_rate": 1.6960172989179958e-08, "loss": 0.5298, "step": 31798 }, { "epoch": 0.974592374647542, "grad_norm": 1.3874509408351026, "learning_rate": 1.6919352772012954e-08, "loss": 0.6803, "step": 31799 }, { "epoch": 0.9746230231702832, "grad_norm": 1.4810217113121562, "learning_rate": 1.687858165441425e-08, "loss": 0.5807, "step": 31800 }, { "epoch": 0.9746536716930244, "grad_norm": 1.2063822455055686, "learning_rate": 1.6837859636786303e-08, "loss": 0.5841, "step": 31801 }, { "epoch": 0.9746843202157656, "grad_norm": 1.3837537845005747, "learning_rate": 1.6797186719529347e-08, "loss": 0.5994, "step": 31802 }, { "epoch": 0.9747149687385068, "grad_norm": 1.3528887677551722, "learning_rate": 1.6756562903045282e-08, "loss": 0.604, "step": 31803 }, { "epoch": 0.974745617261248, "grad_norm": 1.3029490964876322, "learning_rate": 1.671598818773379e-08, "loss": 0.6238, "step": 31804 }, { "epoch": 0.9747762657839892, "grad_norm": 1.2461816900485978, "learning_rate": 1.667546257399455e-08, "loss": 0.5181, "step": 31805 }, { "epoch": 0.9748069143067304, "grad_norm": 1.4871967889406834, "learning_rate": 1.663498606222669e-08, "loss": 0.6095, "step": 31806 }, { "epoch": 0.9748375628294716, "grad_norm": 0.4352034263487895, "learning_rate": 1.6594558652829884e-08, "loss": 0.3964, "step": 31807 }, { "epoch": 0.9748682113522128, "grad_norm": 1.3335655359742074, "learning_rate": 1.6554180346201597e-08, "loss": 0.6336, "step": 31808 }, { "epoch": 0.9748988598749541, "grad_norm": 1.4049388717739555, "learning_rate": 1.6513851142739845e-08, "loss": 0.6311, "step": 31809 }, { "epoch": 0.9749295083976952, "grad_norm": 1.5750122082940003, "learning_rate": 1.647357104284264e-08, "loss": 0.613, "step": 31810 }, { "epoch": 0.9749601569204365, "grad_norm": 1.3340540842086672, "learning_rate": 1.6433340046906334e-08, "loss": 0.5768, "step": 31811 }, { "epoch": 0.9749908054431776, "grad_norm": 0.4567362949827326, "learning_rate": 1.639315815532727e-08, "loss": 0.4073, "step": 31812 }, { "epoch": 0.9750214539659189, "grad_norm": 1.351162177157905, "learning_rate": 1.635302536850181e-08, "loss": 0.6443, "step": 31813 }, { "epoch": 0.97505210248866, "grad_norm": 1.34013610678071, "learning_rate": 1.6312941686824622e-08, "loss": 0.5395, "step": 31814 }, { "epoch": 0.9750827510114013, "grad_norm": 1.2829731349924756, "learning_rate": 1.6272907110691516e-08, "loss": 0.6668, "step": 31815 }, { "epoch": 0.9751133995341424, "grad_norm": 1.498047657615909, "learning_rate": 1.6232921640497167e-08, "loss": 0.6519, "step": 31816 }, { "epoch": 0.9751440480568837, "grad_norm": 1.5136980398865099, "learning_rate": 1.6192985276634042e-08, "loss": 0.6498, "step": 31817 }, { "epoch": 0.9751746965796249, "grad_norm": 0.46321948641055816, "learning_rate": 1.6153098019496826e-08, "loss": 0.3864, "step": 31818 }, { "epoch": 0.9752053451023661, "grad_norm": 1.3286086721052734, "learning_rate": 1.6113259869478536e-08, "loss": 0.5777, "step": 31819 }, { "epoch": 0.9752359936251073, "grad_norm": 1.1760427141270418, "learning_rate": 1.607347082697164e-08, "loss": 0.5213, "step": 31820 }, { "epoch": 0.9752666421478485, "grad_norm": 1.3894307545162805, "learning_rate": 1.6033730892367484e-08, "loss": 0.5819, "step": 31821 }, { "epoch": 0.9752972906705897, "grad_norm": 1.3769134861720966, "learning_rate": 1.5994040066058535e-08, "loss": 0.6147, "step": 31822 }, { "epoch": 0.9753279391933308, "grad_norm": 1.445066683081505, "learning_rate": 1.595439834843504e-08, "loss": 0.629, "step": 31823 }, { "epoch": 0.9753585877160721, "grad_norm": 1.3346426051850926, "learning_rate": 1.5914805739888906e-08, "loss": 0.5549, "step": 31824 }, { "epoch": 0.9753892362388132, "grad_norm": 1.4647102608525147, "learning_rate": 1.587526224080871e-08, "loss": 0.7176, "step": 31825 }, { "epoch": 0.9754198847615545, "grad_norm": 1.2921743397562875, "learning_rate": 1.583576785158525e-08, "loss": 0.6214, "step": 31826 }, { "epoch": 0.9754505332842957, "grad_norm": 1.426305912114838, "learning_rate": 1.5796322572607105e-08, "loss": 0.6311, "step": 31827 }, { "epoch": 0.9754811818070369, "grad_norm": 1.231268420241494, "learning_rate": 1.5756926404262852e-08, "loss": 0.6716, "step": 31828 }, { "epoch": 0.9755118303297781, "grad_norm": 1.45794104941926, "learning_rate": 1.571757934694107e-08, "loss": 0.6118, "step": 31829 }, { "epoch": 0.9755424788525193, "grad_norm": 1.4993994669163944, "learning_rate": 1.5678281401029228e-08, "loss": 0.6423, "step": 31830 }, { "epoch": 0.9755731273752605, "grad_norm": 1.3226007002419349, "learning_rate": 1.5639032566914793e-08, "loss": 0.5961, "step": 31831 }, { "epoch": 0.9756037758980017, "grad_norm": 1.32287644370324, "learning_rate": 1.5599832844983564e-08, "loss": 0.536, "step": 31832 }, { "epoch": 0.9756344244207429, "grad_norm": 1.3169867935432258, "learning_rate": 1.5560682235623014e-08, "loss": 0.5773, "step": 31833 }, { "epoch": 0.9756650729434841, "grad_norm": 1.5519722634915034, "learning_rate": 1.552158073921839e-08, "loss": 0.574, "step": 31834 }, { "epoch": 0.9756957214662253, "grad_norm": 1.2782690153995204, "learning_rate": 1.5482528356154937e-08, "loss": 0.5271, "step": 31835 }, { "epoch": 0.9757263699889666, "grad_norm": 1.4300461419582629, "learning_rate": 1.5443525086817347e-08, "loss": 0.6243, "step": 31836 }, { "epoch": 0.9757570185117077, "grad_norm": 1.6834887144689226, "learning_rate": 1.5404570931590314e-08, "loss": 0.625, "step": 31837 }, { "epoch": 0.975787667034449, "grad_norm": 1.3436418300882027, "learning_rate": 1.536566589085742e-08, "loss": 0.6442, "step": 31838 }, { "epoch": 0.9758183155571901, "grad_norm": 1.4655400927371716, "learning_rate": 1.532680996500169e-08, "loss": 0.5974, "step": 31839 }, { "epoch": 0.9758489640799314, "grad_norm": 1.3211453273139517, "learning_rate": 1.5288003154406707e-08, "loss": 0.5605, "step": 31840 }, { "epoch": 0.9758796126026725, "grad_norm": 1.322444830797983, "learning_rate": 1.5249245459453833e-08, "loss": 0.5147, "step": 31841 }, { "epoch": 0.9759102611254138, "grad_norm": 1.4092747359755782, "learning_rate": 1.5210536880526093e-08, "loss": 0.6347, "step": 31842 }, { "epoch": 0.9759409096481549, "grad_norm": 1.2648993805420772, "learning_rate": 1.5171877418003744e-08, "loss": 0.5951, "step": 31843 }, { "epoch": 0.9759715581708962, "grad_norm": 1.469108197961975, "learning_rate": 1.51332670722687e-08, "loss": 0.5964, "step": 31844 }, { "epoch": 0.9760022066936374, "grad_norm": 1.4476043506884357, "learning_rate": 1.509470584370121e-08, "loss": 0.6457, "step": 31845 }, { "epoch": 0.9760328552163786, "grad_norm": 0.44889157732187895, "learning_rate": 1.505619373268097e-08, "loss": 0.3878, "step": 31846 }, { "epoch": 0.9760635037391198, "grad_norm": 1.5167749193393947, "learning_rate": 1.501773073958712e-08, "loss": 0.6499, "step": 31847 }, { "epoch": 0.976094152261861, "grad_norm": 1.3106328020819953, "learning_rate": 1.4979316864799364e-08, "loss": 0.676, "step": 31848 }, { "epoch": 0.9761248007846022, "grad_norm": 1.2934752561412317, "learning_rate": 1.4940952108695727e-08, "loss": 0.6174, "step": 31849 }, { "epoch": 0.9761554493073434, "grad_norm": 1.3277962752615513, "learning_rate": 1.490263647165424e-08, "loss": 0.6034, "step": 31850 }, { "epoch": 0.9761860978300846, "grad_norm": 1.4413156363437278, "learning_rate": 1.4864369954052938e-08, "loss": 0.6639, "step": 31851 }, { "epoch": 0.9762167463528258, "grad_norm": 1.326205287174338, "learning_rate": 1.4826152556268181e-08, "loss": 0.546, "step": 31852 }, { "epoch": 0.976247394875567, "grad_norm": 1.2860318250240699, "learning_rate": 1.4787984278676892e-08, "loss": 0.607, "step": 31853 }, { "epoch": 0.9762780433983081, "grad_norm": 1.4582804454423268, "learning_rate": 1.4749865121655438e-08, "loss": 0.6365, "step": 31854 }, { "epoch": 0.9763086919210494, "grad_norm": 0.4472987821603391, "learning_rate": 1.4711795085578517e-08, "loss": 0.3967, "step": 31855 }, { "epoch": 0.9763393404437906, "grad_norm": 1.291446539293122, "learning_rate": 1.4673774170822496e-08, "loss": 0.585, "step": 31856 }, { "epoch": 0.9763699889665318, "grad_norm": 1.219815052880314, "learning_rate": 1.4635802377760966e-08, "loss": 0.5199, "step": 31857 }, { "epoch": 0.976400637489273, "grad_norm": 1.3382777864264643, "learning_rate": 1.4597879706768625e-08, "loss": 0.5838, "step": 31858 }, { "epoch": 0.9764312860120142, "grad_norm": 1.2521732713244746, "learning_rate": 1.456000615821851e-08, "loss": 0.5875, "step": 31859 }, { "epoch": 0.9764619345347554, "grad_norm": 1.2629433711506861, "learning_rate": 1.4522181732484209e-08, "loss": 0.6151, "step": 31860 }, { "epoch": 0.9764925830574966, "grad_norm": 0.47359077277299927, "learning_rate": 1.4484406429938758e-08, "loss": 0.3917, "step": 31861 }, { "epoch": 0.9765232315802378, "grad_norm": 1.3559519698426912, "learning_rate": 1.4446680250954082e-08, "loss": 0.5612, "step": 31862 }, { "epoch": 0.976553880102979, "grad_norm": 1.2271042766884999, "learning_rate": 1.4409003195902105e-08, "loss": 0.5657, "step": 31863 }, { "epoch": 0.9765845286257202, "grad_norm": 1.1540231333779463, "learning_rate": 1.4371375265153643e-08, "loss": 0.5949, "step": 31864 }, { "epoch": 0.9766151771484615, "grad_norm": 1.3194671872055774, "learning_rate": 1.4333796459079508e-08, "loss": 0.563, "step": 31865 }, { "epoch": 0.9766458256712026, "grad_norm": 1.621998885653808, "learning_rate": 1.429626677804996e-08, "loss": 0.6206, "step": 31866 }, { "epoch": 0.9766764741939439, "grad_norm": 1.399878636414501, "learning_rate": 1.4258786222435261e-08, "loss": 0.6396, "step": 31867 }, { "epoch": 0.976707122716685, "grad_norm": 0.44653674083516115, "learning_rate": 1.4221354792604004e-08, "loss": 0.3897, "step": 31868 }, { "epoch": 0.9767377712394263, "grad_norm": 1.343217382091043, "learning_rate": 1.418397248892589e-08, "loss": 0.5477, "step": 31869 }, { "epoch": 0.9767684197621674, "grad_norm": 1.5142262319941087, "learning_rate": 1.4146639311768406e-08, "loss": 0.632, "step": 31870 }, { "epoch": 0.9767990682849087, "grad_norm": 1.263058640907628, "learning_rate": 1.4109355261500146e-08, "loss": 0.6539, "step": 31871 }, { "epoch": 0.9768297168076499, "grad_norm": 0.44753765501478604, "learning_rate": 1.4072120338488038e-08, "loss": 0.3855, "step": 31872 }, { "epoch": 0.9768603653303911, "grad_norm": 1.290638038417091, "learning_rate": 1.4034934543098454e-08, "loss": 0.5768, "step": 31873 }, { "epoch": 0.9768910138531323, "grad_norm": 1.2332593692180471, "learning_rate": 1.3997797875698882e-08, "loss": 0.5346, "step": 31874 }, { "epoch": 0.9769216623758735, "grad_norm": 1.73644515511721, "learning_rate": 1.3960710336654582e-08, "loss": 0.6685, "step": 31875 }, { "epoch": 0.9769523108986147, "grad_norm": 0.4449197206924347, "learning_rate": 1.3923671926331373e-08, "loss": 0.3808, "step": 31876 }, { "epoch": 0.9769829594213559, "grad_norm": 1.3247176725465521, "learning_rate": 1.3886682645093407e-08, "loss": 0.5757, "step": 31877 }, { "epoch": 0.9770136079440971, "grad_norm": 0.45029156010248256, "learning_rate": 1.3849742493306506e-08, "loss": 0.3867, "step": 31878 }, { "epoch": 0.9770442564668383, "grad_norm": 1.2611673994315924, "learning_rate": 1.3812851471333156e-08, "loss": 0.587, "step": 31879 }, { "epoch": 0.9770749049895795, "grad_norm": 1.434225255009235, "learning_rate": 1.3776009579538063e-08, "loss": 0.541, "step": 31880 }, { "epoch": 0.9771055535123208, "grad_norm": 1.266558593977349, "learning_rate": 1.3739216818283163e-08, "loss": 0.5815, "step": 31881 }, { "epoch": 0.9771362020350619, "grad_norm": 0.43404701052165645, "learning_rate": 1.3702473187932053e-08, "loss": 0.3891, "step": 31882 }, { "epoch": 0.9771668505578032, "grad_norm": 0.4335802881222866, "learning_rate": 1.366577868884611e-08, "loss": 0.3957, "step": 31883 }, { "epoch": 0.9771974990805443, "grad_norm": 1.662423421802286, "learning_rate": 1.3629133321387266e-08, "loss": 0.6052, "step": 31884 }, { "epoch": 0.9772281476032855, "grad_norm": 1.263620753113058, "learning_rate": 1.3592537085915792e-08, "loss": 0.6167, "step": 31885 }, { "epoch": 0.9772587961260267, "grad_norm": 1.2958575004007227, "learning_rate": 1.3555989982793615e-08, "loss": 0.5942, "step": 31886 }, { "epoch": 0.9772894446487679, "grad_norm": 1.192931539588644, "learning_rate": 1.3519492012379898e-08, "loss": 0.6217, "step": 31887 }, { "epoch": 0.9773200931715091, "grad_norm": 1.2787652349425922, "learning_rate": 1.3483043175033794e-08, "loss": 0.533, "step": 31888 }, { "epoch": 0.9773507416942503, "grad_norm": 1.318818717897301, "learning_rate": 1.3446643471116127e-08, "loss": 0.5971, "step": 31889 }, { "epoch": 0.9773813902169916, "grad_norm": 1.3214978119660559, "learning_rate": 1.3410292900983835e-08, "loss": 0.5805, "step": 31890 }, { "epoch": 0.9774120387397327, "grad_norm": 1.3109734039205256, "learning_rate": 1.3373991464996072e-08, "loss": 0.607, "step": 31891 }, { "epoch": 0.977442687262474, "grad_norm": 1.2598993914709435, "learning_rate": 1.3337739163510333e-08, "loss": 0.5326, "step": 31892 }, { "epoch": 0.9774733357852151, "grad_norm": 1.287391085404399, "learning_rate": 1.3301535996883552e-08, "loss": 0.6101, "step": 31893 }, { "epoch": 0.9775039843079564, "grad_norm": 0.4223651072998317, "learning_rate": 1.3265381965472668e-08, "loss": 0.375, "step": 31894 }, { "epoch": 0.9775346328306975, "grad_norm": 0.43767267991581527, "learning_rate": 1.3229277069634062e-08, "loss": 0.4037, "step": 31895 }, { "epoch": 0.9775652813534388, "grad_norm": 0.4435254716386646, "learning_rate": 1.3193221309723004e-08, "loss": 0.4085, "step": 31896 }, { "epoch": 0.9775959298761799, "grad_norm": 0.4728689394568296, "learning_rate": 1.3157214686095321e-08, "loss": 0.395, "step": 31897 }, { "epoch": 0.9776265783989212, "grad_norm": 1.2998139269692663, "learning_rate": 1.312125719910573e-08, "loss": 0.6147, "step": 31898 }, { "epoch": 0.9776572269216623, "grad_norm": 1.253837949423353, "learning_rate": 1.3085348849107837e-08, "loss": 0.5526, "step": 31899 }, { "epoch": 0.9776878754444036, "grad_norm": 1.210808507404056, "learning_rate": 1.3049489636456358e-08, "loss": 0.6174, "step": 31900 }, { "epoch": 0.9777185239671448, "grad_norm": 1.3788357953917925, "learning_rate": 1.3013679561503789e-08, "loss": 0.6551, "step": 31901 }, { "epoch": 0.977749172489886, "grad_norm": 1.305339256125939, "learning_rate": 1.2977918624603736e-08, "loss": 0.5067, "step": 31902 }, { "epoch": 0.9777798210126272, "grad_norm": 1.377935932670802, "learning_rate": 1.2942206826108139e-08, "loss": 0.5971, "step": 31903 }, { "epoch": 0.9778104695353684, "grad_norm": 1.3182322596179417, "learning_rate": 1.290654416636894e-08, "loss": 0.6301, "step": 31904 }, { "epoch": 0.9778411180581096, "grad_norm": 1.447854738983065, "learning_rate": 1.2870930645738078e-08, "loss": 0.6292, "step": 31905 }, { "epoch": 0.9778717665808508, "grad_norm": 1.3051302296958391, "learning_rate": 1.2835366264565275e-08, "loss": 0.513, "step": 31906 }, { "epoch": 0.977902415103592, "grad_norm": 1.189248577014792, "learning_rate": 1.2799851023201914e-08, "loss": 0.6001, "step": 31907 }, { "epoch": 0.9779330636263333, "grad_norm": 1.2906352255332079, "learning_rate": 1.2764384921997718e-08, "loss": 0.5833, "step": 31908 }, { "epoch": 0.9779637121490744, "grad_norm": 1.4574094041760173, "learning_rate": 1.2728967961301853e-08, "loss": 0.6193, "step": 31909 }, { "epoch": 0.9779943606718157, "grad_norm": 1.3849850297410677, "learning_rate": 1.269360014146348e-08, "loss": 0.5748, "step": 31910 }, { "epoch": 0.9780250091945568, "grad_norm": 1.3869154114263256, "learning_rate": 1.2658281462831212e-08, "loss": 0.5672, "step": 31911 }, { "epoch": 0.9780556577172981, "grad_norm": 1.2326008599633391, "learning_rate": 1.2623011925753104e-08, "loss": 0.6667, "step": 31912 }, { "epoch": 0.9780863062400392, "grad_norm": 1.5520137828336442, "learning_rate": 1.2587791530576653e-08, "loss": 0.5938, "step": 31913 }, { "epoch": 0.9781169547627805, "grad_norm": 1.3659936402356565, "learning_rate": 1.2552620277648253e-08, "loss": 0.6646, "step": 31914 }, { "epoch": 0.9781476032855216, "grad_norm": 1.3584441364231568, "learning_rate": 1.2517498167315401e-08, "loss": 0.59, "step": 31915 }, { "epoch": 0.9781782518082628, "grad_norm": 1.2636141301572552, "learning_rate": 1.2482425199923931e-08, "loss": 0.6056, "step": 31916 }, { "epoch": 0.978208900331004, "grad_norm": 1.3554737505714205, "learning_rate": 1.2447401375818569e-08, "loss": 0.5867, "step": 31917 }, { "epoch": 0.9782395488537452, "grad_norm": 1.222909518280671, "learning_rate": 1.2412426695345702e-08, "loss": 0.5419, "step": 31918 }, { "epoch": 0.9782701973764865, "grad_norm": 1.342196032643114, "learning_rate": 1.2377501158848947e-08, "loss": 0.5973, "step": 31919 }, { "epoch": 0.9783008458992276, "grad_norm": 1.4161910624823029, "learning_rate": 1.2342624766673028e-08, "loss": 0.537, "step": 31920 }, { "epoch": 0.9783314944219689, "grad_norm": 1.2778193864428744, "learning_rate": 1.2307797519161558e-08, "loss": 0.5802, "step": 31921 }, { "epoch": 0.97836214294471, "grad_norm": 1.254567337586244, "learning_rate": 1.227301941665704e-08, "loss": 0.5853, "step": 31922 }, { "epoch": 0.9783927914674513, "grad_norm": 1.5481981264765674, "learning_rate": 1.2238290459502533e-08, "loss": 0.6183, "step": 31923 }, { "epoch": 0.9784234399901924, "grad_norm": 1.5950982899265238, "learning_rate": 1.2203610648041098e-08, "loss": 0.7061, "step": 31924 }, { "epoch": 0.9784540885129337, "grad_norm": 0.4305180545063131, "learning_rate": 1.2168979982613016e-08, "loss": 0.3647, "step": 31925 }, { "epoch": 0.9784847370356748, "grad_norm": 1.390287853236446, "learning_rate": 1.2134398463560238e-08, "loss": 0.5537, "step": 31926 }, { "epoch": 0.9785153855584161, "grad_norm": 1.3195416821530328, "learning_rate": 1.20998660912236e-08, "loss": 0.5579, "step": 31927 }, { "epoch": 0.9785460340811573, "grad_norm": 1.5345438267997291, "learning_rate": 1.2065382865942832e-08, "loss": 0.663, "step": 31928 }, { "epoch": 0.9785766826038985, "grad_norm": 0.43689698284546774, "learning_rate": 1.2030948788058772e-08, "loss": 0.3984, "step": 31929 }, { "epoch": 0.9786073311266397, "grad_norm": 1.3424350157741938, "learning_rate": 1.1996563857909482e-08, "loss": 0.5626, "step": 31930 }, { "epoch": 0.9786379796493809, "grad_norm": 1.2996942578957353, "learning_rate": 1.1962228075834137e-08, "loss": 0.5652, "step": 31931 }, { "epoch": 0.9786686281721221, "grad_norm": 1.4620615077545474, "learning_rate": 1.1927941442171908e-08, "loss": 0.6146, "step": 31932 }, { "epoch": 0.9786992766948633, "grad_norm": 0.44613566369770746, "learning_rate": 1.1893703957259194e-08, "loss": 0.4019, "step": 31933 }, { "epoch": 0.9787299252176045, "grad_norm": 1.7152401601723313, "learning_rate": 1.1859515621434615e-08, "loss": 0.6821, "step": 31934 }, { "epoch": 0.9787605737403458, "grad_norm": 1.2244522712832546, "learning_rate": 1.1825376435034008e-08, "loss": 0.5146, "step": 31935 }, { "epoch": 0.9787912222630869, "grad_norm": 1.6675321139844181, "learning_rate": 1.179128639839433e-08, "loss": 0.5992, "step": 31936 }, { "epoch": 0.9788218707858282, "grad_norm": 1.3131156075332775, "learning_rate": 1.1757245511851423e-08, "loss": 0.5666, "step": 31937 }, { "epoch": 0.9788525193085693, "grad_norm": 1.3125631598324228, "learning_rate": 1.1723253775741129e-08, "loss": 0.5573, "step": 31938 }, { "epoch": 0.9788831678313106, "grad_norm": 0.4415715412720021, "learning_rate": 1.1689311190397624e-08, "loss": 0.3851, "step": 31939 }, { "epoch": 0.9789138163540517, "grad_norm": 1.3775916615245891, "learning_rate": 1.165541775615564e-08, "loss": 0.5981, "step": 31940 }, { "epoch": 0.978944464876793, "grad_norm": 1.5035376903951956, "learning_rate": 1.1621573473348801e-08, "loss": 0.6807, "step": 31941 }, { "epoch": 0.9789751133995341, "grad_norm": 0.4588238471649435, "learning_rate": 1.1587778342311284e-08, "loss": 0.4153, "step": 31942 }, { "epoch": 0.9790057619222754, "grad_norm": 1.2731834628740868, "learning_rate": 1.1554032363376156e-08, "loss": 0.5927, "step": 31943 }, { "epoch": 0.9790364104450165, "grad_norm": 1.2354101821849603, "learning_rate": 1.1520335536874816e-08, "loss": 0.502, "step": 31944 }, { "epoch": 0.9790670589677578, "grad_norm": 1.2210541965435344, "learning_rate": 1.1486687863139778e-08, "loss": 0.5992, "step": 31945 }, { "epoch": 0.979097707490499, "grad_norm": 1.3614421741261769, "learning_rate": 1.1453089342503555e-08, "loss": 0.5934, "step": 31946 }, { "epoch": 0.9791283560132401, "grad_norm": 1.2134522898911815, "learning_rate": 1.1419539975295878e-08, "loss": 0.5662, "step": 31947 }, { "epoch": 0.9791590045359814, "grad_norm": 1.3273236988553014, "learning_rate": 1.1386039761848155e-08, "loss": 0.5315, "step": 31948 }, { "epoch": 0.9791896530587225, "grad_norm": 1.3206295365759473, "learning_rate": 1.135258870249012e-08, "loss": 0.6008, "step": 31949 }, { "epoch": 0.9792203015814638, "grad_norm": 1.488837255770904, "learning_rate": 1.1319186797550952e-08, "loss": 0.6587, "step": 31950 }, { "epoch": 0.9792509501042049, "grad_norm": 1.4916371721877133, "learning_rate": 1.1285834047360943e-08, "loss": 0.6907, "step": 31951 }, { "epoch": 0.9792815986269462, "grad_norm": 1.2128244371237713, "learning_rate": 1.1252530452247612e-08, "loss": 0.5276, "step": 31952 }, { "epoch": 0.9793122471496873, "grad_norm": 1.1810303607291155, "learning_rate": 1.1219276012539581e-08, "loss": 0.5516, "step": 31953 }, { "epoch": 0.9793428956724286, "grad_norm": 1.4381451786416093, "learning_rate": 1.1186070728564369e-08, "loss": 0.6758, "step": 31954 }, { "epoch": 0.9793735441951698, "grad_norm": 1.456881286007247, "learning_rate": 1.1152914600649489e-08, "loss": 0.5353, "step": 31955 }, { "epoch": 0.979404192717911, "grad_norm": 1.376814692676309, "learning_rate": 1.1119807629121348e-08, "loss": 0.6621, "step": 31956 }, { "epoch": 0.9794348412406522, "grad_norm": 1.5193399762535764, "learning_rate": 1.1086749814306352e-08, "loss": 0.5839, "step": 31957 }, { "epoch": 0.9794654897633934, "grad_norm": 1.4932528671100884, "learning_rate": 1.1053741156529795e-08, "loss": 0.6288, "step": 31958 }, { "epoch": 0.9794961382861346, "grad_norm": 1.3633549848923443, "learning_rate": 1.1020781656116975e-08, "loss": 0.6245, "step": 31959 }, { "epoch": 0.9795267868088758, "grad_norm": 0.4492821287100326, "learning_rate": 1.0987871313393183e-08, "loss": 0.39, "step": 31960 }, { "epoch": 0.979557435331617, "grad_norm": 1.3361989513417702, "learning_rate": 1.0955010128682608e-08, "loss": 0.5942, "step": 31961 }, { "epoch": 0.9795880838543582, "grad_norm": 1.299734217432541, "learning_rate": 1.0922198102308878e-08, "loss": 0.5758, "step": 31962 }, { "epoch": 0.9796187323770994, "grad_norm": 1.3140150538837942, "learning_rate": 1.0889435234594514e-08, "loss": 0.679, "step": 31963 }, { "epoch": 0.9796493808998407, "grad_norm": 1.421502319264672, "learning_rate": 1.0856721525863701e-08, "loss": 0.6341, "step": 31964 }, { "epoch": 0.9796800294225818, "grad_norm": 0.45253835445273044, "learning_rate": 1.0824056976437846e-08, "loss": 0.3887, "step": 31965 }, { "epoch": 0.9797106779453231, "grad_norm": 1.3159734059867871, "learning_rate": 1.0791441586639472e-08, "loss": 0.5476, "step": 31966 }, { "epoch": 0.9797413264680642, "grad_norm": 1.4143280635955089, "learning_rate": 1.0758875356789434e-08, "loss": 0.6804, "step": 31967 }, { "epoch": 0.9797719749908055, "grad_norm": 1.2703412189543273, "learning_rate": 1.0726358287208583e-08, "loss": 0.499, "step": 31968 }, { "epoch": 0.9798026235135466, "grad_norm": 1.3297983108932268, "learning_rate": 1.069389037821722e-08, "loss": 0.6226, "step": 31969 }, { "epoch": 0.9798332720362879, "grad_norm": 1.377202599561595, "learning_rate": 1.0661471630135644e-08, "loss": 0.6479, "step": 31970 }, { "epoch": 0.979863920559029, "grad_norm": 1.3369267692047158, "learning_rate": 1.0629102043283602e-08, "loss": 0.636, "step": 31971 }, { "epoch": 0.9798945690817703, "grad_norm": 1.3530064947160974, "learning_rate": 1.0596781617979168e-08, "loss": 0.6209, "step": 31972 }, { "epoch": 0.9799252176045115, "grad_norm": 0.4605863234871987, "learning_rate": 1.0564510354541535e-08, "loss": 0.392, "step": 31973 }, { "epoch": 0.9799558661272527, "grad_norm": 1.3146234807008663, "learning_rate": 1.0532288253288225e-08, "loss": 0.6278, "step": 31974 }, { "epoch": 0.9799865146499939, "grad_norm": 0.4586458555233055, "learning_rate": 1.0500115314536763e-08, "loss": 0.3853, "step": 31975 }, { "epoch": 0.9800171631727351, "grad_norm": 1.3448145754202299, "learning_rate": 1.0467991538604672e-08, "loss": 0.529, "step": 31976 }, { "epoch": 0.9800478116954763, "grad_norm": 1.5711963340623685, "learning_rate": 1.043591692580781e-08, "loss": 0.6549, "step": 31977 }, { "epoch": 0.9800784602182174, "grad_norm": 1.3612033041656877, "learning_rate": 1.040389147646259e-08, "loss": 0.6027, "step": 31978 }, { "epoch": 0.9801091087409587, "grad_norm": 1.2324718228126135, "learning_rate": 1.0371915190884319e-08, "loss": 0.5472, "step": 31979 }, { "epoch": 0.9801397572636998, "grad_norm": 1.3911485861954949, "learning_rate": 1.0339988069388295e-08, "loss": 0.6449, "step": 31980 }, { "epoch": 0.9801704057864411, "grad_norm": 1.472427386515416, "learning_rate": 1.0308110112289271e-08, "loss": 0.6217, "step": 31981 }, { "epoch": 0.9802010543091823, "grad_norm": 1.205100403296675, "learning_rate": 1.0276281319900883e-08, "loss": 0.5534, "step": 31982 }, { "epoch": 0.9802317028319235, "grad_norm": 1.3736601819208918, "learning_rate": 1.0244501692536768e-08, "loss": 0.6036, "step": 31983 }, { "epoch": 0.9802623513546647, "grad_norm": 1.3768019883990579, "learning_rate": 1.0212771230510565e-08, "loss": 0.5835, "step": 31984 }, { "epoch": 0.9802929998774059, "grad_norm": 0.44765547052495824, "learning_rate": 1.0181089934134247e-08, "loss": 0.3781, "step": 31985 }, { "epoch": 0.9803236484001471, "grad_norm": 1.3997025620595502, "learning_rate": 1.0149457803720897e-08, "loss": 0.5878, "step": 31986 }, { "epoch": 0.9803542969228883, "grad_norm": 1.3907534895907483, "learning_rate": 1.0117874839581376e-08, "loss": 0.6664, "step": 31987 }, { "epoch": 0.9803849454456295, "grad_norm": 1.263444582043133, "learning_rate": 1.0086341042027104e-08, "loss": 0.5984, "step": 31988 }, { "epoch": 0.9804155939683707, "grad_norm": 0.4572987759544306, "learning_rate": 1.0054856411368941e-08, "loss": 0.381, "step": 31989 }, { "epoch": 0.9804462424911119, "grad_norm": 1.4863814859181346, "learning_rate": 1.0023420947917195e-08, "loss": 0.587, "step": 31990 }, { "epoch": 0.9804768910138532, "grad_norm": 1.6070637248541768, "learning_rate": 9.992034651981064e-09, "loss": 0.631, "step": 31991 }, { "epoch": 0.9805075395365943, "grad_norm": 1.4919545978022817, "learning_rate": 9.9606975238703e-09, "loss": 0.6248, "step": 31992 }, { "epoch": 0.9805381880593356, "grad_norm": 0.42596653081708374, "learning_rate": 9.929409563893544e-09, "loss": 0.3777, "step": 31993 }, { "epoch": 0.9805688365820767, "grad_norm": 1.2780550542187838, "learning_rate": 9.898170772358883e-09, "loss": 0.6183, "step": 31994 }, { "epoch": 0.980599485104818, "grad_norm": 1.4164168726355078, "learning_rate": 9.866981149574405e-09, "loss": 0.5738, "step": 31995 }, { "epoch": 0.9806301336275591, "grad_norm": 1.3993197636771746, "learning_rate": 9.835840695847643e-09, "loss": 0.6126, "step": 31996 }, { "epoch": 0.9806607821503004, "grad_norm": 1.3266351205014522, "learning_rate": 9.804749411485014e-09, "loss": 0.6332, "step": 31997 }, { "epoch": 0.9806914306730415, "grad_norm": 1.269167627555861, "learning_rate": 9.773707296792944e-09, "loss": 0.6172, "step": 31998 }, { "epoch": 0.9807220791957828, "grad_norm": 1.4608635745978447, "learning_rate": 9.742714352077298e-09, "loss": 0.7022, "step": 31999 }, { "epoch": 0.980752727718524, "grad_norm": 1.3736135124768398, "learning_rate": 9.711770577643387e-09, "loss": 0.5834, "step": 32000 }, { "epoch": 0.9807833762412652, "grad_norm": 1.3275745137203316, "learning_rate": 9.680875973795966e-09, "loss": 0.6145, "step": 32001 }, { "epoch": 0.9808140247640064, "grad_norm": 1.4082453350988084, "learning_rate": 9.650030540840349e-09, "loss": 0.6766, "step": 32002 }, { "epoch": 0.9808446732867476, "grad_norm": 0.45597167114881293, "learning_rate": 9.619234279079625e-09, "loss": 0.3839, "step": 32003 }, { "epoch": 0.9808753218094888, "grad_norm": 1.484432925378915, "learning_rate": 9.588487188816886e-09, "loss": 0.6109, "step": 32004 }, { "epoch": 0.98090597033223, "grad_norm": 1.3461735794893412, "learning_rate": 9.557789270356333e-09, "loss": 0.5796, "step": 32005 }, { "epoch": 0.9809366188549712, "grad_norm": 1.4082187126226546, "learning_rate": 9.52714052399939e-09, "loss": 0.6293, "step": 32006 }, { "epoch": 0.9809672673777124, "grad_norm": 1.3502279681828497, "learning_rate": 9.496540950048594e-09, "loss": 0.5606, "step": 32007 }, { "epoch": 0.9809979159004536, "grad_norm": 0.4554646380254883, "learning_rate": 9.46599054880537e-09, "loss": 0.3988, "step": 32008 }, { "epoch": 0.9810285644231947, "grad_norm": 0.4439956523728914, "learning_rate": 9.435489320570035e-09, "loss": 0.3718, "step": 32009 }, { "epoch": 0.981059212945936, "grad_norm": 0.42280549125462125, "learning_rate": 9.405037265644568e-09, "loss": 0.3763, "step": 32010 }, { "epoch": 0.9810898614686772, "grad_norm": 1.349521063642161, "learning_rate": 9.37463438432762e-09, "loss": 0.5599, "step": 32011 }, { "epoch": 0.9811205099914184, "grad_norm": 1.4781408192931476, "learning_rate": 9.344280676918949e-09, "loss": 0.5878, "step": 32012 }, { "epoch": 0.9811511585141596, "grad_norm": 1.336888605732259, "learning_rate": 9.313976143718873e-09, "loss": 0.6432, "step": 32013 }, { "epoch": 0.9811818070369008, "grad_norm": 1.269159526203781, "learning_rate": 9.283720785024376e-09, "loss": 0.5297, "step": 32014 }, { "epoch": 0.981212455559642, "grad_norm": 1.302784180278165, "learning_rate": 9.25351460113466e-09, "loss": 0.5622, "step": 32015 }, { "epoch": 0.9812431040823832, "grad_norm": 0.4552978235902065, "learning_rate": 9.223357592347272e-09, "loss": 0.3922, "step": 32016 }, { "epoch": 0.9812737526051244, "grad_norm": 1.554275932305239, "learning_rate": 9.193249758958633e-09, "loss": 0.5797, "step": 32017 }, { "epoch": 0.9813044011278657, "grad_norm": 1.2243830951594026, "learning_rate": 9.163191101265734e-09, "loss": 0.609, "step": 32018 }, { "epoch": 0.9813350496506068, "grad_norm": 1.4884631792010428, "learning_rate": 9.133181619565002e-09, "loss": 0.5864, "step": 32019 }, { "epoch": 0.9813656981733481, "grad_norm": 1.5262093573497544, "learning_rate": 9.10322131415231e-09, "loss": 0.5648, "step": 32020 }, { "epoch": 0.9813963466960892, "grad_norm": 1.3486239279625842, "learning_rate": 9.073310185322425e-09, "loss": 0.6814, "step": 32021 }, { "epoch": 0.9814269952188305, "grad_norm": 1.3341280334160819, "learning_rate": 9.043448233370111e-09, "loss": 0.6307, "step": 32022 }, { "epoch": 0.9814576437415716, "grad_norm": 0.4279993156296199, "learning_rate": 9.013635458589575e-09, "loss": 0.3771, "step": 32023 }, { "epoch": 0.9814882922643129, "grad_norm": 1.4025093674434408, "learning_rate": 8.983871861275029e-09, "loss": 0.5577, "step": 32024 }, { "epoch": 0.981518940787054, "grad_norm": 1.4924003730869542, "learning_rate": 8.954157441719014e-09, "loss": 0.636, "step": 32025 }, { "epoch": 0.9815495893097953, "grad_norm": 1.5986337045567904, "learning_rate": 8.92449220021463e-09, "loss": 0.5848, "step": 32026 }, { "epoch": 0.9815802378325365, "grad_norm": 0.4519703293376499, "learning_rate": 8.89487613705442e-09, "loss": 0.3699, "step": 32027 }, { "epoch": 0.9816108863552777, "grad_norm": 1.224092467305643, "learning_rate": 8.865309252530374e-09, "loss": 0.5638, "step": 32028 }, { "epoch": 0.9816415348780189, "grad_norm": 1.210883465489465, "learning_rate": 8.83579154693337e-09, "loss": 0.5313, "step": 32029 }, { "epoch": 0.9816721834007601, "grad_norm": 1.3343245630804625, "learning_rate": 8.806323020553731e-09, "loss": 0.5504, "step": 32030 }, { "epoch": 0.9817028319235013, "grad_norm": 1.3221501640664461, "learning_rate": 8.776903673683446e-09, "loss": 0.6473, "step": 32031 }, { "epoch": 0.9817334804462425, "grad_norm": 1.625596475374076, "learning_rate": 8.747533506610618e-09, "loss": 0.6114, "step": 32032 }, { "epoch": 0.9817641289689837, "grad_norm": 0.44274672414675076, "learning_rate": 8.718212519625569e-09, "loss": 0.4031, "step": 32033 }, { "epoch": 0.981794777491725, "grad_norm": 1.3075162942393854, "learning_rate": 8.688940713016958e-09, "loss": 0.5373, "step": 32034 }, { "epoch": 0.9818254260144661, "grad_norm": 1.3867673993614442, "learning_rate": 8.659718087073998e-09, "loss": 0.6092, "step": 32035 }, { "epoch": 0.9818560745372074, "grad_norm": 1.4079583157021758, "learning_rate": 8.630544642083128e-09, "loss": 0.56, "step": 32036 }, { "epoch": 0.9818867230599485, "grad_norm": 0.4703044071804561, "learning_rate": 8.601420378333003e-09, "loss": 0.3889, "step": 32037 }, { "epoch": 0.9819173715826898, "grad_norm": 1.4019005326912843, "learning_rate": 8.572345296109508e-09, "loss": 0.6737, "step": 32038 }, { "epoch": 0.9819480201054309, "grad_norm": 1.3788948892300277, "learning_rate": 8.543319395700744e-09, "loss": 0.5057, "step": 32039 }, { "epoch": 0.9819786686281721, "grad_norm": 1.3044319834775704, "learning_rate": 8.514342677391486e-09, "loss": 0.593, "step": 32040 }, { "epoch": 0.9820093171509133, "grad_norm": 1.5085109652487052, "learning_rate": 8.485415141467057e-09, "loss": 0.6705, "step": 32041 }, { "epoch": 0.9820399656736545, "grad_norm": 1.2337561675364248, "learning_rate": 8.456536788213343e-09, "loss": 0.6442, "step": 32042 }, { "epoch": 0.9820706141963957, "grad_norm": 1.4525735621555422, "learning_rate": 8.42770761791456e-09, "loss": 0.6977, "step": 32043 }, { "epoch": 0.9821012627191369, "grad_norm": 0.46658615009257015, "learning_rate": 8.398927630854925e-09, "loss": 0.4075, "step": 32044 }, { "epoch": 0.9821319112418782, "grad_norm": 1.2710953402970677, "learning_rate": 8.370196827317545e-09, "loss": 0.5326, "step": 32045 }, { "epoch": 0.9821625597646193, "grad_norm": 1.3712649398967616, "learning_rate": 8.341515207585526e-09, "loss": 0.6136, "step": 32046 }, { "epoch": 0.9821932082873606, "grad_norm": 1.2930258770689038, "learning_rate": 8.312882771941976e-09, "loss": 0.5921, "step": 32047 }, { "epoch": 0.9822238568101017, "grad_norm": 1.3493892801059209, "learning_rate": 8.284299520668892e-09, "loss": 0.5763, "step": 32048 }, { "epoch": 0.982254505332843, "grad_norm": 0.4363057012842907, "learning_rate": 8.255765454047716e-09, "loss": 0.3935, "step": 32049 }, { "epoch": 0.9822851538555841, "grad_norm": 1.2704753564063775, "learning_rate": 8.227280572359331e-09, "loss": 0.6003, "step": 32050 }, { "epoch": 0.9823158023783254, "grad_norm": 1.5711352360304685, "learning_rate": 8.198844875885182e-09, "loss": 0.6378, "step": 32051 }, { "epoch": 0.9823464509010665, "grad_norm": 1.2576636860795707, "learning_rate": 8.170458364905043e-09, "loss": 0.6082, "step": 32052 }, { "epoch": 0.9823770994238078, "grad_norm": 1.387516544879663, "learning_rate": 8.142121039698136e-09, "loss": 0.5969, "step": 32053 }, { "epoch": 0.982407747946549, "grad_norm": 1.3448141037309407, "learning_rate": 8.113832900544239e-09, "loss": 0.549, "step": 32054 }, { "epoch": 0.9824383964692902, "grad_norm": 1.2994580217002625, "learning_rate": 8.085593947722569e-09, "loss": 0.5576, "step": 32055 }, { "epoch": 0.9824690449920314, "grad_norm": 1.3094937645120786, "learning_rate": 8.057404181510131e-09, "loss": 0.583, "step": 32056 }, { "epoch": 0.9824996935147726, "grad_norm": 1.383517651876437, "learning_rate": 8.029263602185588e-09, "loss": 0.684, "step": 32057 }, { "epoch": 0.9825303420375138, "grad_norm": 1.2972794146047226, "learning_rate": 8.001172210025942e-09, "loss": 0.5291, "step": 32058 }, { "epoch": 0.982560990560255, "grad_norm": 1.3260899244060553, "learning_rate": 7.973130005308193e-09, "loss": 0.6551, "step": 32059 }, { "epoch": 0.9825916390829962, "grad_norm": 0.4594267874222895, "learning_rate": 7.945136988308232e-09, "loss": 0.4086, "step": 32060 }, { "epoch": 0.9826222876057374, "grad_norm": 1.3127325265744954, "learning_rate": 7.91719315930195e-09, "loss": 0.6031, "step": 32061 }, { "epoch": 0.9826529361284786, "grad_norm": 0.41822258507859367, "learning_rate": 7.889298518565236e-09, "loss": 0.3736, "step": 32062 }, { "epoch": 0.9826835846512199, "grad_norm": 1.3563021342945634, "learning_rate": 7.861453066372316e-09, "loss": 0.5995, "step": 32063 }, { "epoch": 0.982714233173961, "grad_norm": 1.3468795027571214, "learning_rate": 7.833656802997968e-09, "loss": 0.606, "step": 32064 }, { "epoch": 0.9827448816967023, "grad_norm": 1.5056865388779133, "learning_rate": 7.805909728715866e-09, "loss": 0.5981, "step": 32065 }, { "epoch": 0.9827755302194434, "grad_norm": 1.3900036487007572, "learning_rate": 7.778211843799122e-09, "loss": 0.625, "step": 32066 }, { "epoch": 0.9828061787421847, "grad_norm": 1.3294643846165137, "learning_rate": 7.750563148521406e-09, "loss": 0.5598, "step": 32067 }, { "epoch": 0.9828368272649258, "grad_norm": 1.2617311558697952, "learning_rate": 7.722963643154169e-09, "loss": 0.6448, "step": 32068 }, { "epoch": 0.9828674757876671, "grad_norm": 1.3257633993669786, "learning_rate": 7.695413327970525e-09, "loss": 0.6238, "step": 32069 }, { "epoch": 0.9828981243104082, "grad_norm": 1.2803398580131482, "learning_rate": 7.667912203240812e-09, "loss": 0.5476, "step": 32070 }, { "epoch": 0.9829287728331494, "grad_norm": 1.1839788389078043, "learning_rate": 7.640460269237038e-09, "loss": 0.5545, "step": 32071 }, { "epoch": 0.9829594213558907, "grad_norm": 1.4497011031854796, "learning_rate": 7.613057526228428e-09, "loss": 0.5422, "step": 32072 }, { "epoch": 0.9829900698786318, "grad_norm": 1.423087884193441, "learning_rate": 7.585703974486435e-09, "loss": 0.5534, "step": 32073 }, { "epoch": 0.9830207184013731, "grad_norm": 0.4601699693733471, "learning_rate": 7.558399614279732e-09, "loss": 0.3959, "step": 32074 }, { "epoch": 0.9830513669241142, "grad_norm": 1.3548322064191347, "learning_rate": 7.531144445876993e-09, "loss": 0.5741, "step": 32075 }, { "epoch": 0.9830820154468555, "grad_norm": 0.4375913646433901, "learning_rate": 7.503938469547444e-09, "loss": 0.3832, "step": 32076 }, { "epoch": 0.9831126639695966, "grad_norm": 1.180080053134065, "learning_rate": 7.47678168555921e-09, "loss": 0.5812, "step": 32077 }, { "epoch": 0.9831433124923379, "grad_norm": 1.3794550329717767, "learning_rate": 7.449674094179848e-09, "loss": 0.6426, "step": 32078 }, { "epoch": 0.983173961015079, "grad_norm": 1.3295950720274068, "learning_rate": 7.422615695675817e-09, "loss": 0.5903, "step": 32079 }, { "epoch": 0.9832046095378203, "grad_norm": 1.3333364673164305, "learning_rate": 7.395606490314122e-09, "loss": 0.6615, "step": 32080 }, { "epoch": 0.9832352580605614, "grad_norm": 1.5245485830705174, "learning_rate": 7.3686464783612185e-09, "loss": 0.6447, "step": 32081 }, { "epoch": 0.9832659065833027, "grad_norm": 1.345878023426218, "learning_rate": 7.34173566008245e-09, "loss": 0.5381, "step": 32082 }, { "epoch": 0.9832965551060439, "grad_norm": 1.2974117796109061, "learning_rate": 7.3148740357426025e-09, "loss": 0.6121, "step": 32083 }, { "epoch": 0.9833272036287851, "grad_norm": 1.644239249247016, "learning_rate": 7.288061605607022e-09, "loss": 0.6768, "step": 32084 }, { "epoch": 0.9833578521515263, "grad_norm": 0.44783919368291614, "learning_rate": 7.261298369939939e-09, "loss": 0.4149, "step": 32085 }, { "epoch": 0.9833885006742675, "grad_norm": 1.6685000184465202, "learning_rate": 7.234584329003924e-09, "loss": 0.7793, "step": 32086 }, { "epoch": 0.9834191491970087, "grad_norm": 1.495693383938908, "learning_rate": 7.207919483063763e-09, "loss": 0.578, "step": 32087 }, { "epoch": 0.9834497977197499, "grad_norm": 1.4909421245387056, "learning_rate": 7.181303832380915e-09, "loss": 0.6546, "step": 32088 }, { "epoch": 0.9834804462424911, "grad_norm": 1.3989586194305788, "learning_rate": 7.154737377218501e-09, "loss": 0.6026, "step": 32089 }, { "epoch": 0.9835110947652324, "grad_norm": 0.43227504007388895, "learning_rate": 7.12822011783798e-09, "loss": 0.3808, "step": 32090 }, { "epoch": 0.9835417432879735, "grad_norm": 1.2535438920726172, "learning_rate": 7.101752054500255e-09, "loss": 0.5878, "step": 32091 }, { "epoch": 0.9835723918107148, "grad_norm": 1.406140699356694, "learning_rate": 7.075333187466782e-09, "loss": 0.5693, "step": 32092 }, { "epoch": 0.9836030403334559, "grad_norm": 2.0307015116299802, "learning_rate": 7.048963516997354e-09, "loss": 0.5736, "step": 32093 }, { "epoch": 0.9836336888561972, "grad_norm": 0.43257578629782534, "learning_rate": 7.022643043351762e-09, "loss": 0.3986, "step": 32094 }, { "epoch": 0.9836643373789383, "grad_norm": 1.4885207203155997, "learning_rate": 6.9963717667898e-09, "loss": 0.5742, "step": 32095 }, { "epoch": 0.9836949859016796, "grad_norm": 0.4582717997726814, "learning_rate": 6.970149687570149e-09, "loss": 0.3861, "step": 32096 }, { "epoch": 0.9837256344244207, "grad_norm": 1.4965872763358652, "learning_rate": 6.943976805950936e-09, "loss": 0.6456, "step": 32097 }, { "epoch": 0.983756282947162, "grad_norm": 1.3080862438099035, "learning_rate": 6.917853122190843e-09, "loss": 0.5927, "step": 32098 }, { "epoch": 0.9837869314699031, "grad_norm": 1.27622574011366, "learning_rate": 6.891778636546331e-09, "loss": 0.6052, "step": 32099 }, { "epoch": 0.9838175799926444, "grad_norm": 1.4982280827349368, "learning_rate": 6.865753349274418e-09, "loss": 0.6681, "step": 32100 }, { "epoch": 0.9838482285153856, "grad_norm": 1.6111229954538662, "learning_rate": 6.8397772606315635e-09, "loss": 0.6041, "step": 32101 }, { "epoch": 0.9838788770381267, "grad_norm": 1.4188499441317084, "learning_rate": 6.813850370874786e-09, "loss": 0.6674, "step": 32102 }, { "epoch": 0.983909525560868, "grad_norm": 1.353160833977116, "learning_rate": 6.78797268025777e-09, "loss": 0.4936, "step": 32103 }, { "epoch": 0.9839401740836091, "grad_norm": 0.41632212740474567, "learning_rate": 6.762144189036978e-09, "loss": 0.3878, "step": 32104 }, { "epoch": 0.9839708226063504, "grad_norm": 1.42143336556228, "learning_rate": 6.7363648974666514e-09, "loss": 0.6083, "step": 32105 }, { "epoch": 0.9840014711290915, "grad_norm": 1.419468489915062, "learning_rate": 6.710634805799921e-09, "loss": 0.5975, "step": 32106 }, { "epoch": 0.9840321196518328, "grad_norm": 1.4358212669977228, "learning_rate": 6.684953914291026e-09, "loss": 0.5863, "step": 32107 }, { "epoch": 0.9840627681745739, "grad_norm": 1.5066951328644527, "learning_rate": 6.659322223193098e-09, "loss": 0.6987, "step": 32108 }, { "epoch": 0.9840934166973152, "grad_norm": 1.2628727666499855, "learning_rate": 6.63373973275816e-09, "loss": 0.5499, "step": 32109 }, { "epoch": 0.9841240652200564, "grad_norm": 1.4555582857578984, "learning_rate": 6.608206443238785e-09, "loss": 0.6629, "step": 32110 }, { "epoch": 0.9841547137427976, "grad_norm": 1.2445918373761904, "learning_rate": 6.58272235488644e-09, "loss": 0.5835, "step": 32111 }, { "epoch": 0.9841853622655388, "grad_norm": 1.4658015120094205, "learning_rate": 6.557287467952034e-09, "loss": 0.5338, "step": 32112 }, { "epoch": 0.98421601078828, "grad_norm": 1.2765848285690224, "learning_rate": 6.531901782686478e-09, "loss": 0.5959, "step": 32113 }, { "epoch": 0.9842466593110212, "grad_norm": 1.3819602334342356, "learning_rate": 6.5065652993395736e-09, "loss": 0.6853, "step": 32114 }, { "epoch": 0.9842773078337624, "grad_norm": 1.3905482470508526, "learning_rate": 6.481278018161119e-09, "loss": 0.5959, "step": 32115 }, { "epoch": 0.9843079563565036, "grad_norm": 1.240861171770799, "learning_rate": 6.45603993940036e-09, "loss": 0.6241, "step": 32116 }, { "epoch": 0.9843386048792448, "grad_norm": 1.3003985777366223, "learning_rate": 6.430851063305432e-09, "loss": 0.6291, "step": 32117 }, { "epoch": 0.984369253401986, "grad_norm": 1.2768734675993212, "learning_rate": 6.40571139012558e-09, "loss": 0.5365, "step": 32118 }, { "epoch": 0.9843999019247273, "grad_norm": 1.3422220925271866, "learning_rate": 6.380620920107827e-09, "loss": 0.6021, "step": 32119 }, { "epoch": 0.9844305504474684, "grad_norm": 1.4936072265191982, "learning_rate": 6.3555796534992e-09, "loss": 0.6559, "step": 32120 }, { "epoch": 0.9844611989702097, "grad_norm": 1.5572268301622443, "learning_rate": 6.330587590546722e-09, "loss": 0.6335, "step": 32121 }, { "epoch": 0.9844918474929508, "grad_norm": 1.5405066575906752, "learning_rate": 6.305644731496863e-09, "loss": 0.6048, "step": 32122 }, { "epoch": 0.9845224960156921, "grad_norm": 1.4481201286187435, "learning_rate": 6.280751076594982e-09, "loss": 0.609, "step": 32123 }, { "epoch": 0.9845531445384332, "grad_norm": 1.3617800708921024, "learning_rate": 6.255906626086994e-09, "loss": 0.5535, "step": 32124 }, { "epoch": 0.9845837930611745, "grad_norm": 1.3293434136306825, "learning_rate": 6.231111380217147e-09, "loss": 0.6534, "step": 32125 }, { "epoch": 0.9846144415839156, "grad_norm": 0.42736105426558585, "learning_rate": 6.206365339229692e-09, "loss": 0.3822, "step": 32126 }, { "epoch": 0.9846450901066569, "grad_norm": 1.4042677621449307, "learning_rate": 6.181668503368321e-09, "loss": 0.6937, "step": 32127 }, { "epoch": 0.9846757386293981, "grad_norm": 1.435416452187091, "learning_rate": 6.157020872877284e-09, "loss": 0.6425, "step": 32128 }, { "epoch": 0.9847063871521393, "grad_norm": 1.519839691457344, "learning_rate": 6.13242244799861e-09, "loss": 0.6344, "step": 32129 }, { "epoch": 0.9847370356748805, "grad_norm": 1.4807924203786895, "learning_rate": 6.107873228974881e-09, "loss": 0.5554, "step": 32130 }, { "epoch": 0.9847676841976217, "grad_norm": 1.2978689508764911, "learning_rate": 6.083373216048127e-09, "loss": 0.558, "step": 32131 }, { "epoch": 0.9847983327203629, "grad_norm": 1.301733077393054, "learning_rate": 6.058922409459267e-09, "loss": 0.6298, "step": 32132 }, { "epoch": 0.984828981243104, "grad_norm": 1.2998694829539388, "learning_rate": 6.034520809449773e-09, "loss": 0.6999, "step": 32133 }, { "epoch": 0.9848596297658453, "grad_norm": 1.7847950101772012, "learning_rate": 6.01016841626001e-09, "loss": 0.6327, "step": 32134 }, { "epoch": 0.9848902782885864, "grad_norm": 1.2570327865539772, "learning_rate": 5.985865230129784e-09, "loss": 0.5947, "step": 32135 }, { "epoch": 0.9849209268113277, "grad_norm": 0.4363360663359802, "learning_rate": 5.961611251298904e-09, "loss": 0.3901, "step": 32136 }, { "epoch": 0.9849515753340689, "grad_norm": 1.1359107071812389, "learning_rate": 5.9374064800060695e-09, "loss": 0.5557, "step": 32137 }, { "epoch": 0.9849822238568101, "grad_norm": 1.5549411114565486, "learning_rate": 5.9132509164888664e-09, "loss": 0.5706, "step": 32138 }, { "epoch": 0.9850128723795513, "grad_norm": 1.244677716607316, "learning_rate": 5.889144560987103e-09, "loss": 0.5678, "step": 32139 }, { "epoch": 0.9850435209022925, "grad_norm": 1.3783714810657752, "learning_rate": 5.8650874137372586e-09, "loss": 0.6068, "step": 32140 }, { "epoch": 0.9850741694250337, "grad_norm": 1.2933940919508158, "learning_rate": 5.841079474976363e-09, "loss": 0.6034, "step": 32141 }, { "epoch": 0.9851048179477749, "grad_norm": 2.0171283119412906, "learning_rate": 5.817120744940896e-09, "loss": 0.5953, "step": 32142 }, { "epoch": 0.9851354664705161, "grad_norm": 1.4110917669907672, "learning_rate": 5.793211223867334e-09, "loss": 0.6063, "step": 32143 }, { "epoch": 0.9851661149932573, "grad_norm": 1.213548182203675, "learning_rate": 5.7693509119910455e-09, "loss": 0.641, "step": 32144 }, { "epoch": 0.9851967635159985, "grad_norm": 1.296298336486774, "learning_rate": 5.745539809547396e-09, "loss": 0.5872, "step": 32145 }, { "epoch": 0.9852274120387398, "grad_norm": 1.278467601280112, "learning_rate": 5.721777916770643e-09, "loss": 0.5819, "step": 32146 }, { "epoch": 0.9852580605614809, "grad_norm": 1.4931053542380235, "learning_rate": 5.698065233895045e-09, "loss": 0.6237, "step": 32147 }, { "epoch": 0.9852887090842222, "grad_norm": 1.2628127671925007, "learning_rate": 5.674401761154302e-09, "loss": 0.4936, "step": 32148 }, { "epoch": 0.9853193576069633, "grad_norm": 0.42993321431280673, "learning_rate": 5.650787498781563e-09, "loss": 0.3906, "step": 32149 }, { "epoch": 0.9853500061297046, "grad_norm": 0.4419351364161411, "learning_rate": 5.627222447009417e-09, "loss": 0.4026, "step": 32150 }, { "epoch": 0.9853806546524457, "grad_norm": 1.3092272855213931, "learning_rate": 5.603706606069903e-09, "loss": 0.5805, "step": 32151 }, { "epoch": 0.985411303175187, "grad_norm": 1.3063393150710254, "learning_rate": 5.580239976195057e-09, "loss": 0.61, "step": 32152 }, { "epoch": 0.9854419516979281, "grad_norm": 1.4067434164491714, "learning_rate": 5.556822557615804e-09, "loss": 0.5523, "step": 32153 }, { "epoch": 0.9854726002206694, "grad_norm": 1.316943868961075, "learning_rate": 5.5334543505636275e-09, "loss": 0.615, "step": 32154 }, { "epoch": 0.9855032487434106, "grad_norm": 0.47157802157391043, "learning_rate": 5.5101353552677876e-09, "loss": 0.3712, "step": 32155 }, { "epoch": 0.9855338972661518, "grad_norm": 1.361749200334658, "learning_rate": 5.486865571958655e-09, "loss": 0.6312, "step": 32156 }, { "epoch": 0.985564545788893, "grad_norm": 1.3068189708040658, "learning_rate": 5.463645000864937e-09, "loss": 0.6747, "step": 32157 }, { "epoch": 0.9855951943116342, "grad_norm": 1.3636964022315377, "learning_rate": 5.440473642216449e-09, "loss": 0.581, "step": 32158 }, { "epoch": 0.9856258428343754, "grad_norm": 0.4554524617995601, "learning_rate": 5.417351496240786e-09, "loss": 0.3825, "step": 32159 }, { "epoch": 0.9856564913571166, "grad_norm": 1.4758992229107812, "learning_rate": 5.3942785631655444e-09, "loss": 0.6581, "step": 32160 }, { "epoch": 0.9856871398798578, "grad_norm": 0.4479090792249212, "learning_rate": 5.371254843218321e-09, "loss": 0.3976, "step": 32161 }, { "epoch": 0.985717788402599, "grad_norm": 1.3397312941084654, "learning_rate": 5.34828033662671e-09, "loss": 0.5511, "step": 32162 }, { "epoch": 0.9857484369253402, "grad_norm": 1.4591783284117383, "learning_rate": 5.325355043615532e-09, "loss": 0.6808, "step": 32163 }, { "epoch": 0.9857790854480813, "grad_norm": 1.4590798661454105, "learning_rate": 5.302478964412383e-09, "loss": 0.7162, "step": 32164 }, { "epoch": 0.9858097339708226, "grad_norm": 0.45822868475340645, "learning_rate": 5.279652099241528e-09, "loss": 0.3943, "step": 32165 }, { "epoch": 0.9858403824935638, "grad_norm": 1.24823606746341, "learning_rate": 5.256874448328342e-09, "loss": 0.5446, "step": 32166 }, { "epoch": 0.985871031016305, "grad_norm": 1.4622972418522489, "learning_rate": 5.23414601189709e-09, "loss": 0.5912, "step": 32167 }, { "epoch": 0.9859016795390462, "grad_norm": 1.3457544400740218, "learning_rate": 5.211466790171482e-09, "loss": 0.606, "step": 32168 }, { "epoch": 0.9859323280617874, "grad_norm": 1.4484187713034566, "learning_rate": 5.188836783375228e-09, "loss": 0.5336, "step": 32169 }, { "epoch": 0.9859629765845286, "grad_norm": 1.2802721390756682, "learning_rate": 5.166255991731484e-09, "loss": 0.5544, "step": 32170 }, { "epoch": 0.9859936251072698, "grad_norm": 1.3383513399665068, "learning_rate": 5.143724415462847e-09, "loss": 0.5167, "step": 32171 }, { "epoch": 0.986024273630011, "grad_norm": 1.2454815630215008, "learning_rate": 5.1212420547908095e-09, "loss": 0.5488, "step": 32172 }, { "epoch": 0.9860549221527523, "grad_norm": 1.5914139200555688, "learning_rate": 5.098808909937414e-09, "loss": 0.6429, "step": 32173 }, { "epoch": 0.9860855706754934, "grad_norm": 1.4132979447623415, "learning_rate": 5.07642498112304e-09, "loss": 0.6082, "step": 32174 }, { "epoch": 0.9861162191982347, "grad_norm": 1.3109386948795092, "learning_rate": 5.054090268569178e-09, "loss": 0.6135, "step": 32175 }, { "epoch": 0.9861468677209758, "grad_norm": 0.4728860848288471, "learning_rate": 5.031804772495097e-09, "loss": 0.3873, "step": 32176 }, { "epoch": 0.9861775162437171, "grad_norm": 1.3076050150517435, "learning_rate": 5.00956849312062e-09, "loss": 0.627, "step": 32177 }, { "epoch": 0.9862081647664582, "grad_norm": 0.43546461956766896, "learning_rate": 4.987381430665017e-09, "loss": 0.4046, "step": 32178 }, { "epoch": 0.9862388132891995, "grad_norm": 1.5016913515387322, "learning_rate": 4.965243585346447e-09, "loss": 0.5597, "step": 32179 }, { "epoch": 0.9862694618119406, "grad_norm": 1.416170425388255, "learning_rate": 4.943154957384177e-09, "loss": 0.5858, "step": 32180 }, { "epoch": 0.9863001103346819, "grad_norm": 1.3330707251680078, "learning_rate": 4.921115546994148e-09, "loss": 0.5945, "step": 32181 }, { "epoch": 0.986330758857423, "grad_norm": 0.4243407754154504, "learning_rate": 4.899125354395074e-09, "loss": 0.3743, "step": 32182 }, { "epoch": 0.9863614073801643, "grad_norm": 1.4931396117684828, "learning_rate": 4.877184379802335e-09, "loss": 0.6496, "step": 32183 }, { "epoch": 0.9863920559029055, "grad_norm": 1.1948565945447014, "learning_rate": 4.855292623432983e-09, "loss": 0.524, "step": 32184 }, { "epoch": 0.9864227044256467, "grad_norm": 1.2842624909156923, "learning_rate": 4.8334500855029555e-09, "loss": 0.5925, "step": 32185 }, { "epoch": 0.9864533529483879, "grad_norm": 1.3966022473528834, "learning_rate": 4.811656766226524e-09, "loss": 0.6098, "step": 32186 }, { "epoch": 0.9864840014711291, "grad_norm": 1.3209490641937836, "learning_rate": 4.7899126658190745e-09, "loss": 0.6375, "step": 32187 }, { "epoch": 0.9865146499938703, "grad_norm": 1.3752555945213, "learning_rate": 4.7682177844948775e-09, "loss": 0.6482, "step": 32188 }, { "epoch": 0.9865452985166115, "grad_norm": 1.4346021727577305, "learning_rate": 4.746572122467097e-09, "loss": 0.5365, "step": 32189 }, { "epoch": 0.9865759470393527, "grad_norm": 1.3940676126031322, "learning_rate": 4.72497567994945e-09, "loss": 0.5983, "step": 32190 }, { "epoch": 0.986606595562094, "grad_norm": 1.546601828292645, "learning_rate": 4.703428457155102e-09, "loss": 0.5416, "step": 32191 }, { "epoch": 0.9866372440848351, "grad_norm": 1.319355512110654, "learning_rate": 4.681930454295547e-09, "loss": 0.6122, "step": 32192 }, { "epoch": 0.9866678926075764, "grad_norm": 1.3118144797142064, "learning_rate": 4.660481671583394e-09, "loss": 0.6257, "step": 32193 }, { "epoch": 0.9866985411303175, "grad_norm": 1.3951581060584406, "learning_rate": 4.639082109229587e-09, "loss": 0.6199, "step": 32194 }, { "epoch": 0.9867291896530587, "grad_norm": 1.421140234594682, "learning_rate": 4.617731767445066e-09, "loss": 0.5731, "step": 32195 }, { "epoch": 0.9867598381757999, "grad_norm": 1.3303105707831726, "learning_rate": 4.596430646439664e-09, "loss": 0.5477, "step": 32196 }, { "epoch": 0.9867904866985411, "grad_norm": 1.475685731346054, "learning_rate": 4.575178746424324e-09, "loss": 0.6289, "step": 32197 }, { "epoch": 0.9868211352212823, "grad_norm": 1.2844861653191917, "learning_rate": 4.553976067607768e-09, "loss": 0.5435, "step": 32198 }, { "epoch": 0.9868517837440235, "grad_norm": 1.4687011553489993, "learning_rate": 4.532822610198717e-09, "loss": 0.583, "step": 32199 }, { "epoch": 0.9868824322667648, "grad_norm": 1.3380227894163113, "learning_rate": 4.511718374406448e-09, "loss": 0.5603, "step": 32200 }, { "epoch": 0.9869130807895059, "grad_norm": 1.2187910099103165, "learning_rate": 4.490663360438019e-09, "loss": 0.5298, "step": 32201 }, { "epoch": 0.9869437293122472, "grad_norm": 1.345903997739545, "learning_rate": 4.4696575685010406e-09, "loss": 0.6297, "step": 32202 }, { "epoch": 0.9869743778349883, "grad_norm": 1.3477425940138816, "learning_rate": 4.448700998803124e-09, "loss": 0.5599, "step": 32203 }, { "epoch": 0.9870050263577296, "grad_norm": 1.5081792922692034, "learning_rate": 4.427793651550216e-09, "loss": 0.6809, "step": 32204 }, { "epoch": 0.9870356748804707, "grad_norm": 1.3313665187824746, "learning_rate": 4.406935526948264e-09, "loss": 0.5579, "step": 32205 }, { "epoch": 0.987066323403212, "grad_norm": 2.1562148137531234, "learning_rate": 4.386126625202658e-09, "loss": 0.6737, "step": 32206 }, { "epoch": 0.9870969719259531, "grad_norm": 0.44876633633994706, "learning_rate": 4.365366946519345e-09, "loss": 0.3633, "step": 32207 }, { "epoch": 0.9871276204486944, "grad_norm": 1.4265125385560324, "learning_rate": 4.34465649110205e-09, "loss": 0.6549, "step": 32208 }, { "epoch": 0.9871582689714355, "grad_norm": 1.5920746560694867, "learning_rate": 4.323995259155056e-09, "loss": 0.6501, "step": 32209 }, { "epoch": 0.9871889174941768, "grad_norm": 0.441557724356953, "learning_rate": 4.3033832508815325e-09, "loss": 0.4102, "step": 32210 }, { "epoch": 0.987219566016918, "grad_norm": 0.43200288185791214, "learning_rate": 4.28282046648576e-09, "loss": 0.4074, "step": 32211 }, { "epoch": 0.9872502145396592, "grad_norm": 1.2581495162748404, "learning_rate": 4.262306906168689e-09, "loss": 0.6, "step": 32212 }, { "epoch": 0.9872808630624004, "grad_norm": 1.8484488344098065, "learning_rate": 4.241842570134047e-09, "loss": 0.6545, "step": 32213 }, { "epoch": 0.9873115115851416, "grad_norm": 1.2920102609547024, "learning_rate": 4.221427458582228e-09, "loss": 0.662, "step": 32214 }, { "epoch": 0.9873421601078828, "grad_norm": 1.3151625572610697, "learning_rate": 4.201061571715292e-09, "loss": 0.6068, "step": 32215 }, { "epoch": 0.987372808630624, "grad_norm": 1.3771176610824698, "learning_rate": 4.180744909733636e-09, "loss": 0.6544, "step": 32216 }, { "epoch": 0.9874034571533652, "grad_norm": 1.3502574052630472, "learning_rate": 4.160477472837099e-09, "loss": 0.7069, "step": 32217 }, { "epoch": 0.9874341056761065, "grad_norm": 1.3970705046852654, "learning_rate": 4.140259261225521e-09, "loss": 0.6411, "step": 32218 }, { "epoch": 0.9874647541988476, "grad_norm": 0.4496190204719622, "learning_rate": 4.120090275098187e-09, "loss": 0.3997, "step": 32219 }, { "epoch": 0.9874954027215889, "grad_norm": 1.392170921248532, "learning_rate": 4.099970514653828e-09, "loss": 0.5848, "step": 32220 }, { "epoch": 0.98752605124433, "grad_norm": 1.2789356396136242, "learning_rate": 4.079899980091173e-09, "loss": 0.6008, "step": 32221 }, { "epoch": 0.9875566997670713, "grad_norm": 1.301740924760359, "learning_rate": 4.059878671607287e-09, "loss": 0.6465, "step": 32222 }, { "epoch": 0.9875873482898124, "grad_norm": 0.42115742353696706, "learning_rate": 4.039906589399234e-09, "loss": 0.3974, "step": 32223 }, { "epoch": 0.9876179968125537, "grad_norm": 1.5499337803537487, "learning_rate": 4.019983733664634e-09, "loss": 0.6674, "step": 32224 }, { "epoch": 0.9876486453352948, "grad_norm": 1.5106964865913894, "learning_rate": 4.000110104599442e-09, "loss": 0.5233, "step": 32225 }, { "epoch": 0.987679293858036, "grad_norm": 0.42607439096495553, "learning_rate": 3.980285702399611e-09, "loss": 0.3921, "step": 32226 }, { "epoch": 0.9877099423807773, "grad_norm": 1.4158992689953833, "learning_rate": 3.960510527259986e-09, "loss": 0.5737, "step": 32227 }, { "epoch": 0.9877405909035184, "grad_norm": 1.2788576530259814, "learning_rate": 3.9407845793759665e-09, "loss": 0.6083, "step": 32228 }, { "epoch": 0.9877712394262597, "grad_norm": 1.3988780334726336, "learning_rate": 3.921107858941287e-09, "loss": 0.6206, "step": 32229 }, { "epoch": 0.9878018879490008, "grad_norm": 1.402724777326338, "learning_rate": 3.90148036615079e-09, "loss": 0.6208, "step": 32230 }, { "epoch": 0.9878325364717421, "grad_norm": 1.4212353639171484, "learning_rate": 3.881902101197099e-09, "loss": 0.5614, "step": 32231 }, { "epoch": 0.9878631849944832, "grad_norm": 1.342614512943778, "learning_rate": 3.862373064273395e-09, "loss": 0.5678, "step": 32232 }, { "epoch": 0.9878938335172245, "grad_norm": 1.6295637664469649, "learning_rate": 3.842893255571745e-09, "loss": 0.6019, "step": 32233 }, { "epoch": 0.9879244820399656, "grad_norm": 1.3332923529289946, "learning_rate": 3.823462675284772e-09, "loss": 0.6154, "step": 32234 }, { "epoch": 0.9879551305627069, "grad_norm": 1.3782340026414304, "learning_rate": 3.804081323603437e-09, "loss": 0.5686, "step": 32235 }, { "epoch": 0.987985779085448, "grad_norm": 1.2972978530571448, "learning_rate": 3.784749200718696e-09, "loss": 0.6172, "step": 32236 }, { "epoch": 0.9880164276081893, "grad_norm": 0.44336612022822836, "learning_rate": 3.765466306820953e-09, "loss": 0.3788, "step": 32237 }, { "epoch": 0.9880470761309305, "grad_norm": 1.1993903944710196, "learning_rate": 3.746232642100611e-09, "loss": 0.5758, "step": 32238 }, { "epoch": 0.9880777246536717, "grad_norm": 1.3786462632831078, "learning_rate": 3.727048206746964e-09, "loss": 0.5738, "step": 32239 }, { "epoch": 0.9881083731764129, "grad_norm": 0.4413522729756782, "learning_rate": 3.7079130009493035e-09, "loss": 0.38, "step": 32240 }, { "epoch": 0.9881390216991541, "grad_norm": 1.339169494431623, "learning_rate": 3.6888270248958136e-09, "loss": 0.5663, "step": 32241 }, { "epoch": 0.9881696702218953, "grad_norm": 1.2925655133564855, "learning_rate": 3.6697902787746763e-09, "loss": 0.6375, "step": 32242 }, { "epoch": 0.9882003187446365, "grad_norm": 1.3243612264465692, "learning_rate": 3.6508027627735198e-09, "loss": 0.5944, "step": 32243 }, { "epoch": 0.9882309672673777, "grad_norm": 0.43131363373523596, "learning_rate": 3.6318644770788613e-09, "loss": 0.3966, "step": 32244 }, { "epoch": 0.988261615790119, "grad_norm": 1.3219981043618014, "learning_rate": 3.6129754218783286e-09, "loss": 0.5638, "step": 32245 }, { "epoch": 0.9882922643128601, "grad_norm": 1.3656267333744174, "learning_rate": 3.5941355973573288e-09, "loss": 0.5544, "step": 32246 }, { "epoch": 0.9883229128356014, "grad_norm": 1.4458467103380879, "learning_rate": 3.5753450037018244e-09, "loss": 0.6249, "step": 32247 }, { "epoch": 0.9883535613583425, "grad_norm": 1.30719239246209, "learning_rate": 3.556603641097223e-09, "loss": 0.6083, "step": 32248 }, { "epoch": 0.9883842098810838, "grad_norm": 1.2754264634090327, "learning_rate": 3.5379115097272655e-09, "loss": 0.565, "step": 32249 }, { "epoch": 0.9884148584038249, "grad_norm": 1.4553174742217292, "learning_rate": 3.5192686097768045e-09, "loss": 0.5306, "step": 32250 }, { "epoch": 0.9884455069265662, "grad_norm": 0.4729070715499731, "learning_rate": 3.5006749414295825e-09, "loss": 0.3835, "step": 32251 }, { "epoch": 0.9884761554493073, "grad_norm": 1.6575796981779747, "learning_rate": 3.482130504868231e-09, "loss": 0.6414, "step": 32252 }, { "epoch": 0.9885068039720486, "grad_norm": 1.4788560415380785, "learning_rate": 3.463635300275936e-09, "loss": 0.5512, "step": 32253 }, { "epoch": 0.9885374524947897, "grad_norm": 1.3216891102678805, "learning_rate": 3.445189327834775e-09, "loss": 0.6001, "step": 32254 }, { "epoch": 0.988568101017531, "grad_norm": 0.4256748255408116, "learning_rate": 3.4267925877268238e-09, "loss": 0.3993, "step": 32255 }, { "epoch": 0.9885987495402722, "grad_norm": 1.6116785787914243, "learning_rate": 3.4084450801330493e-09, "loss": 0.7287, "step": 32256 }, { "epoch": 0.9886293980630133, "grad_norm": 1.320050725845917, "learning_rate": 3.3901468052344177e-09, "loss": 0.5581, "step": 32257 }, { "epoch": 0.9886600465857546, "grad_norm": 1.348357142512993, "learning_rate": 3.3718977632113404e-09, "loss": 0.6323, "step": 32258 }, { "epoch": 0.9886906951084957, "grad_norm": 1.3396772449144858, "learning_rate": 3.353697954243118e-09, "loss": 0.5956, "step": 32259 }, { "epoch": 0.988721343631237, "grad_norm": 1.3764695600383685, "learning_rate": 3.335547378509052e-09, "loss": 0.6357, "step": 32260 }, { "epoch": 0.9887519921539781, "grad_norm": 1.4664270491403424, "learning_rate": 3.3174460361884432e-09, "loss": 0.6302, "step": 32261 }, { "epoch": 0.9887826406767194, "grad_norm": 1.3987639606753057, "learning_rate": 3.2993939274594823e-09, "loss": 0.5647, "step": 32262 }, { "epoch": 0.9888132891994605, "grad_norm": 1.1726288130194045, "learning_rate": 3.28139105250036e-09, "loss": 0.621, "step": 32263 }, { "epoch": 0.9888439377222018, "grad_norm": 1.2634500275385778, "learning_rate": 3.2634374114881574e-09, "loss": 0.6065, "step": 32264 }, { "epoch": 0.988874586244943, "grad_norm": 1.713820031752087, "learning_rate": 3.2455330045993994e-09, "loss": 0.6195, "step": 32265 }, { "epoch": 0.9889052347676842, "grad_norm": 1.379343309285053, "learning_rate": 3.2276778320111666e-09, "loss": 0.5552, "step": 32266 }, { "epoch": 0.9889358832904254, "grad_norm": 1.199968972475477, "learning_rate": 3.209871893898875e-09, "loss": 0.5673, "step": 32267 }, { "epoch": 0.9889665318131666, "grad_norm": 0.4403447151519913, "learning_rate": 3.192115190438494e-09, "loss": 0.4006, "step": 32268 }, { "epoch": 0.9889971803359078, "grad_norm": 1.2426868606683794, "learning_rate": 3.174407721804329e-09, "loss": 0.6379, "step": 32269 }, { "epoch": 0.989027828858649, "grad_norm": 1.4182113465310067, "learning_rate": 3.15674948817124e-09, "loss": 0.5968, "step": 32270 }, { "epoch": 0.9890584773813902, "grad_norm": 1.3821299763308654, "learning_rate": 3.1391404897135323e-09, "loss": 0.6155, "step": 32271 }, { "epoch": 0.9890891259041314, "grad_norm": 1.2859692343329674, "learning_rate": 3.121580726604401e-09, "loss": 0.5014, "step": 32272 }, { "epoch": 0.9891197744268726, "grad_norm": 1.2506385702647909, "learning_rate": 3.1040701990164844e-09, "loss": 0.4745, "step": 32273 }, { "epoch": 0.9891504229496139, "grad_norm": 1.367905799618339, "learning_rate": 3.086608907122979e-09, "loss": 0.6515, "step": 32274 }, { "epoch": 0.989181071472355, "grad_norm": 1.3936547031939401, "learning_rate": 3.069196851095413e-09, "loss": 0.6202, "step": 32275 }, { "epoch": 0.9892117199950963, "grad_norm": 1.2516219614188557, "learning_rate": 3.051834031105316e-09, "loss": 0.5749, "step": 32276 }, { "epoch": 0.9892423685178374, "grad_norm": 1.6337307876305438, "learning_rate": 3.0345204473247735e-09, "loss": 0.5557, "step": 32277 }, { "epoch": 0.9892730170405787, "grad_norm": 1.332470503078584, "learning_rate": 3.0172560999230937e-09, "loss": 0.4904, "step": 32278 }, { "epoch": 0.9893036655633198, "grad_norm": 1.566508781739991, "learning_rate": 3.000040989071251e-09, "loss": 0.4824, "step": 32279 }, { "epoch": 0.9893343140860611, "grad_norm": 0.46341911870722846, "learning_rate": 2.9828751149379997e-09, "loss": 0.4058, "step": 32280 }, { "epoch": 0.9893649626088022, "grad_norm": 1.4252185683975196, "learning_rate": 2.9657584776932035e-09, "loss": 0.5955, "step": 32281 }, { "epoch": 0.9893956111315435, "grad_norm": 1.4061916869437836, "learning_rate": 2.9486910775056165e-09, "loss": 0.6139, "step": 32282 }, { "epoch": 0.9894262596542847, "grad_norm": 1.2380912839499039, "learning_rate": 2.9316729145428825e-09, "loss": 0.5244, "step": 32283 }, { "epoch": 0.9894569081770259, "grad_norm": 1.3282140657639776, "learning_rate": 2.9147039889731997e-09, "loss": 0.5887, "step": 32284 }, { "epoch": 0.9894875566997671, "grad_norm": 0.4575468856222069, "learning_rate": 2.8977843009631025e-09, "loss": 0.3888, "step": 32285 }, { "epoch": 0.9895182052225083, "grad_norm": 1.2404232122562662, "learning_rate": 2.8809138506802338e-09, "loss": 0.5491, "step": 32286 }, { "epoch": 0.9895488537452495, "grad_norm": 1.3834169625993162, "learning_rate": 2.864092638290017e-09, "loss": 0.6055, "step": 32287 }, { "epoch": 0.9895795022679906, "grad_norm": 1.2640422066994679, "learning_rate": 2.8473206639584307e-09, "loss": 0.5963, "step": 32288 }, { "epoch": 0.9896101507907319, "grad_norm": 1.3732116234976106, "learning_rate": 2.8305979278508977e-09, "loss": 0.6388, "step": 32289 }, { "epoch": 0.989640799313473, "grad_norm": 1.5292121198530275, "learning_rate": 2.8139244301317316e-09, "loss": 0.6508, "step": 32290 }, { "epoch": 0.9896714478362143, "grad_norm": 1.225961367372643, "learning_rate": 2.7973001709658e-09, "loss": 0.565, "step": 32291 }, { "epoch": 0.9897020963589555, "grad_norm": 0.43785357994649554, "learning_rate": 2.7807251505168608e-09, "loss": 0.3657, "step": 32292 }, { "epoch": 0.9897327448816967, "grad_norm": 1.5262495836121586, "learning_rate": 2.7641993689475623e-09, "loss": 0.5871, "step": 32293 }, { "epoch": 0.9897633934044379, "grad_norm": 1.3023553154642658, "learning_rate": 2.7477228264216614e-09, "loss": 0.5656, "step": 32294 }, { "epoch": 0.9897940419271791, "grad_norm": 1.2994191978975107, "learning_rate": 2.7312955231006966e-09, "loss": 0.5315, "step": 32295 }, { "epoch": 0.9898246904499203, "grad_norm": 1.2636612792499833, "learning_rate": 2.7149174591467597e-09, "loss": 0.6434, "step": 32296 }, { "epoch": 0.9898553389726615, "grad_norm": 1.2496629557022463, "learning_rate": 2.6985886347219438e-09, "loss": 0.6175, "step": 32297 }, { "epoch": 0.9898859874954027, "grad_norm": 0.46589378614307386, "learning_rate": 2.6823090499861204e-09, "loss": 0.4013, "step": 32298 }, { "epoch": 0.989916636018144, "grad_norm": 0.4490956676104739, "learning_rate": 2.6660787051002724e-09, "loss": 0.4017, "step": 32299 }, { "epoch": 0.9899472845408851, "grad_norm": 1.4879081587937044, "learning_rate": 2.6498976002237166e-09, "loss": 0.6533, "step": 32300 }, { "epoch": 0.9899779330636264, "grad_norm": 1.3580684564364216, "learning_rate": 2.63376573551688e-09, "loss": 0.5718, "step": 32301 }, { "epoch": 0.9900085815863675, "grad_norm": 1.438280840257493, "learning_rate": 2.6176831111379697e-09, "loss": 0.7065, "step": 32302 }, { "epoch": 0.9900392301091088, "grad_norm": 1.3870600887302418, "learning_rate": 2.6016497272457473e-09, "loss": 0.6026, "step": 32303 }, { "epoch": 0.9900698786318499, "grad_norm": 0.42172403916216045, "learning_rate": 2.5856655839984203e-09, "loss": 0.3759, "step": 32304 }, { "epoch": 0.9901005271545912, "grad_norm": 1.317457045548617, "learning_rate": 2.5697306815530842e-09, "loss": 0.6354, "step": 32305 }, { "epoch": 0.9901311756773323, "grad_norm": 1.407169676772575, "learning_rate": 2.553845020066281e-09, "loss": 0.6151, "step": 32306 }, { "epoch": 0.9901618242000736, "grad_norm": 1.30693433047685, "learning_rate": 2.538008599695663e-09, "loss": 0.4921, "step": 32307 }, { "epoch": 0.9901924727228147, "grad_norm": 1.2290146748203896, "learning_rate": 2.522221420596105e-09, "loss": 0.5176, "step": 32308 }, { "epoch": 0.990223121245556, "grad_norm": 1.4233502638444682, "learning_rate": 2.5064834829241492e-09, "loss": 0.5987, "step": 32309 }, { "epoch": 0.9902537697682972, "grad_norm": 1.4474323197043257, "learning_rate": 2.4907947868346717e-09, "loss": 0.6089, "step": 32310 }, { "epoch": 0.9902844182910384, "grad_norm": 1.2141994789221993, "learning_rate": 2.475155332481438e-09, "loss": 0.5656, "step": 32311 }, { "epoch": 0.9903150668137796, "grad_norm": 1.470479201690549, "learning_rate": 2.45956512001988e-09, "loss": 0.5781, "step": 32312 }, { "epoch": 0.9903457153365208, "grad_norm": 1.4671892625938348, "learning_rate": 2.4440241496026527e-09, "loss": 0.5844, "step": 32313 }, { "epoch": 0.990376363859262, "grad_norm": 0.4336926185219924, "learning_rate": 2.4285324213829675e-09, "loss": 0.3848, "step": 32314 }, { "epoch": 0.9904070123820032, "grad_norm": 1.4287715807386134, "learning_rate": 2.4130899355140346e-09, "loss": 0.6261, "step": 32315 }, { "epoch": 0.9904376609047444, "grad_norm": 0.43657829667630366, "learning_rate": 2.3976966921468448e-09, "loss": 0.3845, "step": 32316 }, { "epoch": 0.9904683094274856, "grad_norm": 1.2552190198509292, "learning_rate": 2.3823526914346086e-09, "loss": 0.5565, "step": 32317 }, { "epoch": 0.9904989579502268, "grad_norm": 1.3685897820189734, "learning_rate": 2.367057933527206e-09, "loss": 0.6156, "step": 32318 }, { "epoch": 0.990529606472968, "grad_norm": 1.253998231611226, "learning_rate": 2.3518124185761827e-09, "loss": 0.6164, "step": 32319 }, { "epoch": 0.9905602549957092, "grad_norm": 1.4074816352076094, "learning_rate": 2.3366161467314187e-09, "loss": 0.6169, "step": 32320 }, { "epoch": 0.9905909035184504, "grad_norm": 1.4223686628013628, "learning_rate": 2.321469118142794e-09, "loss": 0.5028, "step": 32321 }, { "epoch": 0.9906215520411916, "grad_norm": 1.3461415159302605, "learning_rate": 2.3063713329590787e-09, "loss": 0.5743, "step": 32322 }, { "epoch": 0.9906522005639328, "grad_norm": 1.3731511366473237, "learning_rate": 2.291322791330153e-09, "loss": 0.645, "step": 32323 }, { "epoch": 0.990682849086674, "grad_norm": 1.3160550815232694, "learning_rate": 2.2763234934025656e-09, "loss": 0.5811, "step": 32324 }, { "epoch": 0.9907134976094152, "grad_norm": 1.3915686788388408, "learning_rate": 2.2613734393256427e-09, "loss": 0.623, "step": 32325 }, { "epoch": 0.9907441461321564, "grad_norm": 1.3335355430347906, "learning_rate": 2.2464726292459326e-09, "loss": 0.5812, "step": 32326 }, { "epoch": 0.9907747946548976, "grad_norm": 1.2741873195560305, "learning_rate": 2.2316210633105406e-09, "loss": 0.5312, "step": 32327 }, { "epoch": 0.9908054431776389, "grad_norm": 1.3636820252048967, "learning_rate": 2.2168187416660158e-09, "loss": 0.7017, "step": 32328 }, { "epoch": 0.99083609170038, "grad_norm": 1.3182552668587761, "learning_rate": 2.2020656644577976e-09, "loss": 0.6402, "step": 32329 }, { "epoch": 0.9908667402231213, "grad_norm": 1.307727211894696, "learning_rate": 2.1873618318307698e-09, "loss": 0.4928, "step": 32330 }, { "epoch": 0.9908973887458624, "grad_norm": 1.4275425119026839, "learning_rate": 2.172707243930927e-09, "loss": 0.5818, "step": 32331 }, { "epoch": 0.9909280372686037, "grad_norm": 1.3318192419774073, "learning_rate": 2.1581019009020434e-09, "loss": 0.6127, "step": 32332 }, { "epoch": 0.9909586857913448, "grad_norm": 0.44531370973469525, "learning_rate": 2.143545802888447e-09, "loss": 0.3887, "step": 32333 }, { "epoch": 0.9909893343140861, "grad_norm": 1.2389551020383174, "learning_rate": 2.1290389500328023e-09, "loss": 0.5646, "step": 32334 }, { "epoch": 0.9910199828368272, "grad_norm": 1.3133613378373288, "learning_rate": 2.114581342478883e-09, "loss": 0.611, "step": 32335 }, { "epoch": 0.9910506313595685, "grad_norm": 1.3316732704899958, "learning_rate": 2.1001729803682424e-09, "loss": 0.6001, "step": 32336 }, { "epoch": 0.9910812798823097, "grad_norm": 1.5390097121279849, "learning_rate": 2.0858138638440995e-09, "loss": 0.5619, "step": 32337 }, { "epoch": 0.9911119284050509, "grad_norm": 1.2055136200438616, "learning_rate": 2.071503993046342e-09, "loss": 0.5769, "step": 32338 }, { "epoch": 0.9911425769277921, "grad_norm": 1.4473186522152914, "learning_rate": 2.057243368117634e-09, "loss": 0.5885, "step": 32339 }, { "epoch": 0.9911732254505333, "grad_norm": 1.2949228014017882, "learning_rate": 2.043031989197308e-09, "loss": 0.5437, "step": 32340 }, { "epoch": 0.9912038739732745, "grad_norm": 1.343340444348723, "learning_rate": 2.028869856425808e-09, "loss": 0.6197, "step": 32341 }, { "epoch": 0.9912345224960157, "grad_norm": 1.4505898434290139, "learning_rate": 2.0147569699424664e-09, "loss": 0.686, "step": 32342 }, { "epoch": 0.9912651710187569, "grad_norm": 1.323200903886707, "learning_rate": 2.000693329886616e-09, "loss": 0.6249, "step": 32343 }, { "epoch": 0.9912958195414981, "grad_norm": 1.3295872288423027, "learning_rate": 1.98667893639648e-09, "loss": 0.563, "step": 32344 }, { "epoch": 0.9913264680642393, "grad_norm": 1.3327074734486082, "learning_rate": 1.972713789610836e-09, "loss": 0.552, "step": 32345 }, { "epoch": 0.9913571165869806, "grad_norm": 2.0761539850414987, "learning_rate": 1.958797889666797e-09, "loss": 0.6026, "step": 32346 }, { "epoch": 0.9913877651097217, "grad_norm": 1.5045430119266305, "learning_rate": 1.944931236701475e-09, "loss": 0.6212, "step": 32347 }, { "epoch": 0.991418413632463, "grad_norm": 1.345002985720237, "learning_rate": 1.9311138308514276e-09, "loss": 0.5983, "step": 32348 }, { "epoch": 0.9914490621552041, "grad_norm": 0.43993947445970577, "learning_rate": 1.9173456722526574e-09, "loss": 0.3992, "step": 32349 }, { "epoch": 0.9914797106779453, "grad_norm": 1.374894233843468, "learning_rate": 1.9036267610417215e-09, "loss": 0.679, "step": 32350 }, { "epoch": 0.9915103592006865, "grad_norm": 1.3294893462894994, "learning_rate": 1.889957097352957e-09, "loss": 0.4989, "step": 32351 }, { "epoch": 0.9915410077234277, "grad_norm": 1.489157603686226, "learning_rate": 1.876336681321256e-09, "loss": 0.6359, "step": 32352 }, { "epoch": 0.9915716562461689, "grad_norm": 0.44624159328088064, "learning_rate": 1.8627655130804e-09, "loss": 0.3879, "step": 32353 }, { "epoch": 0.9916023047689101, "grad_norm": 1.2476078322592854, "learning_rate": 1.849243592765282e-09, "loss": 0.5631, "step": 32354 }, { "epoch": 0.9916329532916514, "grad_norm": 1.4330051912274344, "learning_rate": 1.8357709205080177e-09, "loss": 0.6164, "step": 32355 }, { "epoch": 0.9916636018143925, "grad_norm": 1.7688157988934077, "learning_rate": 1.8223474964418343e-09, "loss": 0.504, "step": 32356 }, { "epoch": 0.9916942503371338, "grad_norm": 1.4306863967655479, "learning_rate": 1.808973320698293e-09, "loss": 0.6133, "step": 32357 }, { "epoch": 0.9917248988598749, "grad_norm": 1.2631477070676556, "learning_rate": 1.7956483934106205e-09, "loss": 0.6181, "step": 32358 }, { "epoch": 0.9917555473826162, "grad_norm": 1.415640779607056, "learning_rate": 1.7823727147087132e-09, "loss": 0.5692, "step": 32359 }, { "epoch": 0.9917861959053573, "grad_norm": 1.424594691272618, "learning_rate": 1.7691462847241325e-09, "loss": 0.5881, "step": 32360 }, { "epoch": 0.9918168444280986, "grad_norm": 1.2764748769878096, "learning_rate": 1.7559691035873295e-09, "loss": 0.5641, "step": 32361 }, { "epoch": 0.9918474929508397, "grad_norm": 1.7579081560307506, "learning_rate": 1.742841171427645e-09, "loss": 0.6556, "step": 32362 }, { "epoch": 0.991878141473581, "grad_norm": 1.4212072733473797, "learning_rate": 1.7297624883744203e-09, "loss": 0.613, "step": 32363 }, { "epoch": 0.9919087899963221, "grad_norm": 0.4453966197118416, "learning_rate": 1.716733054556441e-09, "loss": 0.392, "step": 32364 }, { "epoch": 0.9919394385190634, "grad_norm": 1.2063385071661823, "learning_rate": 1.703752870103048e-09, "loss": 0.5527, "step": 32365 }, { "epoch": 0.9919700870418046, "grad_norm": 0.43442305226236977, "learning_rate": 1.6908219351408072e-09, "loss": 0.3789, "step": 32366 }, { "epoch": 0.9920007355645458, "grad_norm": 1.4369212444067745, "learning_rate": 1.6779402497979491e-09, "loss": 0.5656, "step": 32367 }, { "epoch": 0.992031384087287, "grad_norm": 1.3232900063213286, "learning_rate": 1.6651078142015942e-09, "loss": 0.584, "step": 32368 }, { "epoch": 0.9920620326100282, "grad_norm": 1.482022421478721, "learning_rate": 1.6523246284777528e-09, "loss": 0.5281, "step": 32369 }, { "epoch": 0.9920926811327694, "grad_norm": 1.3590857549071433, "learning_rate": 1.63959069275188e-09, "loss": 0.6128, "step": 32370 }, { "epoch": 0.9921233296555106, "grad_norm": 1.351887521239205, "learning_rate": 1.6269060071505416e-09, "loss": 0.5896, "step": 32371 }, { "epoch": 0.9921539781782518, "grad_norm": 0.438721622848745, "learning_rate": 1.6142705717980823e-09, "loss": 0.393, "step": 32372 }, { "epoch": 0.992184626700993, "grad_norm": 1.5736875119644445, "learning_rate": 1.601684386818847e-09, "loss": 0.6526, "step": 32373 }, { "epoch": 0.9922152752237342, "grad_norm": 1.3193776624150964, "learning_rate": 1.5891474523371809e-09, "loss": 0.5301, "step": 32374 }, { "epoch": 0.9922459237464755, "grad_norm": 1.1987864058565025, "learning_rate": 1.5766597684768737e-09, "loss": 0.5873, "step": 32375 }, { "epoch": 0.9922765722692166, "grad_norm": 1.4524112839085432, "learning_rate": 1.564221335360605e-09, "loss": 0.6303, "step": 32376 }, { "epoch": 0.9923072207919579, "grad_norm": 0.45019615588307993, "learning_rate": 1.5518321531104996e-09, "loss": 0.402, "step": 32377 }, { "epoch": 0.992337869314699, "grad_norm": 1.291145248420137, "learning_rate": 1.539492221849237e-09, "loss": 0.5971, "step": 32378 }, { "epoch": 0.9923685178374403, "grad_norm": 1.491604981768234, "learning_rate": 1.5272015416983866e-09, "loss": 0.5737, "step": 32379 }, { "epoch": 0.9923991663601814, "grad_norm": 1.4509250602317432, "learning_rate": 1.5149601127789627e-09, "loss": 0.5548, "step": 32380 }, { "epoch": 0.9924298148829226, "grad_norm": 1.7215946879988737, "learning_rate": 1.5027679352119795e-09, "loss": 0.6892, "step": 32381 }, { "epoch": 0.9924604634056639, "grad_norm": 1.2966371456247523, "learning_rate": 1.490625009116231e-09, "loss": 0.5719, "step": 32382 }, { "epoch": 0.992491111928405, "grad_norm": 1.3761566719939522, "learning_rate": 1.4785313346132868e-09, "loss": 0.6107, "step": 32383 }, { "epoch": 0.9925217604511463, "grad_norm": 1.3074467338899345, "learning_rate": 1.466486911820275e-09, "loss": 0.6209, "step": 32384 }, { "epoch": 0.9925524089738874, "grad_norm": 1.3915758838476446, "learning_rate": 1.4544917408576553e-09, "loss": 0.5233, "step": 32385 }, { "epoch": 0.9925830574966287, "grad_norm": 1.3511437380704903, "learning_rate": 1.442545821842556e-09, "loss": 0.6274, "step": 32386 }, { "epoch": 0.9926137060193698, "grad_norm": 0.43767870478758114, "learning_rate": 1.4306491548932156e-09, "loss": 0.3912, "step": 32387 }, { "epoch": 0.9926443545421111, "grad_norm": 1.3568114500982331, "learning_rate": 1.4188017401262077e-09, "loss": 0.5671, "step": 32388 }, { "epoch": 0.9926750030648522, "grad_norm": 1.456848133857202, "learning_rate": 1.4070035776592162e-09, "loss": 0.6975, "step": 32389 }, { "epoch": 0.9927056515875935, "grad_norm": 1.3170853530924145, "learning_rate": 1.395254667607704e-09, "loss": 0.5864, "step": 32390 }, { "epoch": 0.9927363001103346, "grad_norm": 1.2147239986913667, "learning_rate": 1.3835550100876892e-09, "loss": 0.5862, "step": 32391 }, { "epoch": 0.9927669486330759, "grad_norm": 1.2452913803836034, "learning_rate": 1.3719046052140805e-09, "loss": 0.5493, "step": 32392 }, { "epoch": 0.9927975971558171, "grad_norm": 0.46862724093431146, "learning_rate": 1.3603034531023407e-09, "loss": 0.3827, "step": 32393 }, { "epoch": 0.9928282456785583, "grad_norm": 1.4936779177813988, "learning_rate": 1.3487515538668229e-09, "loss": 0.6438, "step": 32394 }, { "epoch": 0.9928588942012995, "grad_norm": 1.3742712022825694, "learning_rate": 1.3372489076207695e-09, "loss": 0.6378, "step": 32395 }, { "epoch": 0.9928895427240407, "grad_norm": 1.327077875423987, "learning_rate": 1.3257955144774238e-09, "loss": 0.5727, "step": 32396 }, { "epoch": 0.9929201912467819, "grad_norm": 1.9976737888258798, "learning_rate": 1.3143913745505831e-09, "loss": 0.5955, "step": 32397 }, { "epoch": 0.9929508397695231, "grad_norm": 0.4686049613821072, "learning_rate": 1.3030364879518253e-09, "loss": 0.4015, "step": 32398 }, { "epoch": 0.9929814882922643, "grad_norm": 1.3683037990078895, "learning_rate": 1.2917308547932828e-09, "loss": 0.6675, "step": 32399 }, { "epoch": 0.9930121368150056, "grad_norm": 1.612354987038269, "learning_rate": 1.2804744751859777e-09, "loss": 0.7511, "step": 32400 }, { "epoch": 0.9930427853377467, "grad_norm": 1.241338103559842, "learning_rate": 1.2692673492414875e-09, "loss": 0.5297, "step": 32401 }, { "epoch": 0.993073433860488, "grad_norm": 1.3792987928680283, "learning_rate": 1.2581094770697243e-09, "loss": 0.6438, "step": 32402 }, { "epoch": 0.9931040823832291, "grad_norm": 1.2714570313692934, "learning_rate": 1.2470008587806004e-09, "loss": 0.5692, "step": 32403 }, { "epoch": 0.9931347309059704, "grad_norm": 1.525253849127341, "learning_rate": 1.2359414944840276e-09, "loss": 0.5634, "step": 32404 }, { "epoch": 0.9931653794287115, "grad_norm": 1.180707957247849, "learning_rate": 1.2249313842882527e-09, "loss": 0.5944, "step": 32405 }, { "epoch": 0.9931960279514528, "grad_norm": 1.5143860764629784, "learning_rate": 1.2139705283026326e-09, "loss": 0.6796, "step": 32406 }, { "epoch": 0.9932266764741939, "grad_norm": 0.45415308961528694, "learning_rate": 1.203058926634859e-09, "loss": 0.3854, "step": 32407 }, { "epoch": 0.9932573249969352, "grad_norm": 1.129438031475769, "learning_rate": 1.1921965793920687e-09, "loss": 0.5683, "step": 32408 }, { "epoch": 0.9932879735196763, "grad_norm": 1.696447810710803, "learning_rate": 1.1813834866819529e-09, "loss": 0.5953, "step": 32409 }, { "epoch": 0.9933186220424176, "grad_norm": 1.4348330829335099, "learning_rate": 1.170619648609983e-09, "loss": 0.7123, "step": 32410 }, { "epoch": 0.9933492705651588, "grad_norm": 0.45582941038876745, "learning_rate": 1.1599050652832955e-09, "loss": 0.3995, "step": 32411 }, { "epoch": 0.9933799190878999, "grad_norm": 1.4347454987146375, "learning_rate": 1.1492397368073615e-09, "loss": 0.5458, "step": 32412 }, { "epoch": 0.9934105676106412, "grad_norm": 1.3425805421848267, "learning_rate": 1.1386236632865421e-09, "loss": 0.585, "step": 32413 }, { "epoch": 0.9934412161333823, "grad_norm": 1.209771847823531, "learning_rate": 1.1280568448263084e-09, "loss": 0.5747, "step": 32414 }, { "epoch": 0.9934718646561236, "grad_norm": 1.3148340904256477, "learning_rate": 1.1175392815299112e-09, "loss": 0.6217, "step": 32415 }, { "epoch": 0.9935025131788647, "grad_norm": 1.226370036432322, "learning_rate": 1.1070709735017115e-09, "loss": 0.59, "step": 32416 }, { "epoch": 0.993533161701606, "grad_norm": 1.2493938512234175, "learning_rate": 1.0966519208444048e-09, "loss": 0.6956, "step": 32417 }, { "epoch": 0.9935638102243471, "grad_norm": 1.264276027623964, "learning_rate": 1.0862821236606868e-09, "loss": 0.4745, "step": 32418 }, { "epoch": 0.9935944587470884, "grad_norm": 1.270853075426002, "learning_rate": 1.0759615820532532e-09, "loss": 0.5857, "step": 32419 }, { "epoch": 0.9936251072698296, "grad_norm": 0.4699922514242545, "learning_rate": 1.065690296123134e-09, "loss": 0.3816, "step": 32420 }, { "epoch": 0.9936557557925708, "grad_norm": 0.42481254390404727, "learning_rate": 1.0554682659719152e-09, "loss": 0.3693, "step": 32421 }, { "epoch": 0.993686404315312, "grad_norm": 1.5739105363753205, "learning_rate": 1.0452954917000713e-09, "loss": 0.5405, "step": 32422 }, { "epoch": 0.9937170528380532, "grad_norm": 1.5364311733753202, "learning_rate": 1.035171973408078e-09, "loss": 0.6122, "step": 32423 }, { "epoch": 0.9937477013607944, "grad_norm": 1.4125448016861395, "learning_rate": 1.0250977111952998e-09, "loss": 0.5765, "step": 32424 }, { "epoch": 0.9937783498835356, "grad_norm": 1.450425487851785, "learning_rate": 1.0150727051616572e-09, "loss": 0.7035, "step": 32425 }, { "epoch": 0.9938089984062768, "grad_norm": 1.3223574219346232, "learning_rate": 1.0050969554054047e-09, "loss": 0.5171, "step": 32426 }, { "epoch": 0.993839646929018, "grad_norm": 1.468450568680024, "learning_rate": 9.95170462024797e-10, "loss": 0.6356, "step": 32427 }, { "epoch": 0.9938702954517592, "grad_norm": 1.3285254269306588, "learning_rate": 9.852932251180891e-10, "loss": 0.5853, "step": 32428 }, { "epoch": 0.9939009439745005, "grad_norm": 0.43056189821288793, "learning_rate": 9.754652447818702e-10, "loss": 0.374, "step": 32429 }, { "epoch": 0.9939315924972416, "grad_norm": 1.3649810945564962, "learning_rate": 9.6568652111384e-10, "loss": 0.5205, "step": 32430 }, { "epoch": 0.9939622410199829, "grad_norm": 1.4789857237233686, "learning_rate": 9.559570542100327e-10, "loss": 0.605, "step": 32431 }, { "epoch": 0.993992889542724, "grad_norm": 1.5739298812991946, "learning_rate": 9.462768441659276e-10, "loss": 0.5816, "step": 32432 }, { "epoch": 0.9940235380654653, "grad_norm": 1.3607462630843568, "learning_rate": 9.366458910775588e-10, "loss": 0.6318, "step": 32433 }, { "epoch": 0.9940541865882064, "grad_norm": 1.4389329119191674, "learning_rate": 9.270641950392956e-10, "loss": 0.6499, "step": 32434 }, { "epoch": 0.9940848351109477, "grad_norm": 1.5205588289793048, "learning_rate": 9.175317561460617e-10, "loss": 0.5702, "step": 32435 }, { "epoch": 0.9941154836336888, "grad_norm": 1.3035664758012464, "learning_rate": 9.08048574491116e-10, "loss": 0.4908, "step": 32436 }, { "epoch": 0.9941461321564301, "grad_norm": 0.4417679987461104, "learning_rate": 8.986146501682724e-10, "loss": 0.4006, "step": 32437 }, { "epoch": 0.9941767806791713, "grad_norm": 1.1852719001791328, "learning_rate": 8.892299832707896e-10, "loss": 0.4997, "step": 32438 }, { "epoch": 0.9942074292019125, "grad_norm": 1.4122785262224449, "learning_rate": 8.798945738902609e-10, "loss": 0.6136, "step": 32439 }, { "epoch": 0.9942380777246537, "grad_norm": 0.4557566480442034, "learning_rate": 8.7060842211939e-10, "loss": 0.3954, "step": 32440 }, { "epoch": 0.9942687262473949, "grad_norm": 1.5264813736850105, "learning_rate": 8.613715280497703e-10, "loss": 0.6287, "step": 32441 }, { "epoch": 0.9942993747701361, "grad_norm": 1.2498273449357133, "learning_rate": 8.52183891771885e-10, "loss": 0.5792, "step": 32442 }, { "epoch": 0.9943300232928772, "grad_norm": 1.2502034827828032, "learning_rate": 8.430455133767723e-10, "loss": 0.5258, "step": 32443 }, { "epoch": 0.9943606718156185, "grad_norm": 1.5174259121903686, "learning_rate": 8.339563929538052e-10, "loss": 0.6331, "step": 32444 }, { "epoch": 0.9943913203383596, "grad_norm": 1.336642904571801, "learning_rate": 8.249165305929119e-10, "loss": 0.5628, "step": 32445 }, { "epoch": 0.9944219688611009, "grad_norm": 1.3192133467152718, "learning_rate": 8.159259263834651e-10, "loss": 0.6641, "step": 32446 }, { "epoch": 0.994452617383842, "grad_norm": 1.2750693848681238, "learning_rate": 8.069845804142828e-10, "loss": 0.6085, "step": 32447 }, { "epoch": 0.9944832659065833, "grad_norm": 1.234878209454636, "learning_rate": 7.980924927725175e-10, "loss": 0.6089, "step": 32448 }, { "epoch": 0.9945139144293245, "grad_norm": 1.5564631240255162, "learning_rate": 7.892496635458769e-10, "loss": 0.5859, "step": 32449 }, { "epoch": 0.9945445629520657, "grad_norm": 1.4506047847228145, "learning_rate": 7.804560928226234e-10, "loss": 0.5392, "step": 32450 }, { "epoch": 0.9945752114748069, "grad_norm": 1.2815054242714226, "learning_rate": 7.717117806876895e-10, "loss": 0.5945, "step": 32451 }, { "epoch": 0.9946058599975481, "grad_norm": 1.491131303217219, "learning_rate": 7.630167272287825e-10, "loss": 0.616, "step": 32452 }, { "epoch": 0.9946365085202893, "grad_norm": 1.2443880335386444, "learning_rate": 7.543709325313897e-10, "loss": 0.4858, "step": 32453 }, { "epoch": 0.9946671570430305, "grad_norm": 1.453816772264536, "learning_rate": 7.457743966793329e-10, "loss": 0.6415, "step": 32454 }, { "epoch": 0.9946978055657717, "grad_norm": 1.2768483823711951, "learning_rate": 7.372271197592096e-10, "loss": 0.5747, "step": 32455 }, { "epoch": 0.994728454088513, "grad_norm": 1.2949328870890624, "learning_rate": 7.287291018537312e-10, "loss": 0.5901, "step": 32456 }, { "epoch": 0.9947591026112541, "grad_norm": 1.4000947439693714, "learning_rate": 7.202803430472749e-10, "loss": 0.623, "step": 32457 }, { "epoch": 0.9947897511339954, "grad_norm": 1.3019696723991057, "learning_rate": 7.118808434231073e-10, "loss": 0.636, "step": 32458 }, { "epoch": 0.9948203996567365, "grad_norm": 1.4768521114683408, "learning_rate": 7.0353060306394e-10, "loss": 0.5913, "step": 32459 }, { "epoch": 0.9948510481794778, "grad_norm": 1.2491769389648557, "learning_rate": 6.952296220519294e-10, "loss": 0.5472, "step": 32460 }, { "epoch": 0.9948816967022189, "grad_norm": 0.4268764934755938, "learning_rate": 6.869779004692323e-10, "loss": 0.382, "step": 32461 }, { "epoch": 0.9949123452249602, "grad_norm": 1.3509679439416944, "learning_rate": 6.787754383963396e-10, "loss": 0.6014, "step": 32462 }, { "epoch": 0.9949429937477013, "grad_norm": 1.3529645250683968, "learning_rate": 6.706222359148529e-10, "loss": 0.5538, "step": 32463 }, { "epoch": 0.9949736422704426, "grad_norm": 1.5705073682515673, "learning_rate": 6.62518293104708e-10, "loss": 0.6868, "step": 32464 }, { "epoch": 0.9950042907931838, "grad_norm": 1.324645258668749, "learning_rate": 6.544636100463963e-10, "loss": 0.7273, "step": 32465 }, { "epoch": 0.995034939315925, "grad_norm": 1.3046006932885155, "learning_rate": 6.464581868181885e-10, "loss": 0.4578, "step": 32466 }, { "epoch": 0.9950655878386662, "grad_norm": 1.1743341444800286, "learning_rate": 6.385020235000206e-10, "loss": 0.5146, "step": 32467 }, { "epoch": 0.9950962363614074, "grad_norm": 0.43726632958115536, "learning_rate": 6.305951201696081e-10, "loss": 0.3971, "step": 32468 }, { "epoch": 0.9951268848841486, "grad_norm": 1.4558406350740494, "learning_rate": 6.227374769052219e-10, "loss": 0.5551, "step": 32469 }, { "epoch": 0.9951575334068898, "grad_norm": 1.226773206048454, "learning_rate": 6.149290937840224e-10, "loss": 0.5789, "step": 32470 }, { "epoch": 0.995188181929631, "grad_norm": 1.3949530466232172, "learning_rate": 6.071699708831702e-10, "loss": 0.5743, "step": 32471 }, { "epoch": 0.9952188304523722, "grad_norm": 1.2220633768589473, "learning_rate": 5.994601082787155e-10, "loss": 0.5163, "step": 32472 }, { "epoch": 0.9952494789751134, "grad_norm": 1.2385589462975997, "learning_rate": 5.917995060472636e-10, "loss": 0.5797, "step": 32473 }, { "epoch": 0.9952801274978545, "grad_norm": 1.343451313815146, "learning_rate": 5.841881642637548e-10, "loss": 0.6209, "step": 32474 }, { "epoch": 0.9953107760205958, "grad_norm": 1.4239509614710006, "learning_rate": 5.766260830036841e-10, "loss": 0.6072, "step": 32475 }, { "epoch": 0.995341424543337, "grad_norm": 1.2925395684435812, "learning_rate": 5.691132623414364e-10, "loss": 0.6184, "step": 32476 }, { "epoch": 0.9953720730660782, "grad_norm": 1.5230942631482864, "learning_rate": 5.616497023502865e-10, "loss": 0.6443, "step": 32477 }, { "epoch": 0.9954027215888194, "grad_norm": 1.4534189525328718, "learning_rate": 5.542354031046193e-10, "loss": 0.6344, "step": 32478 }, { "epoch": 0.9954333701115606, "grad_norm": 1.3862074779942468, "learning_rate": 5.468703646771545e-10, "loss": 0.5484, "step": 32479 }, { "epoch": 0.9954640186343018, "grad_norm": 1.367301852684022, "learning_rate": 5.395545871406116e-10, "loss": 0.6281, "step": 32480 }, { "epoch": 0.995494667157043, "grad_norm": 1.393761282150453, "learning_rate": 5.322880705671552e-10, "loss": 0.4977, "step": 32481 }, { "epoch": 0.9955253156797842, "grad_norm": 1.3534625855404514, "learning_rate": 5.250708150283946e-10, "loss": 0.5816, "step": 32482 }, { "epoch": 0.9955559642025255, "grad_norm": 1.291722220107527, "learning_rate": 5.179028205948289e-10, "loss": 0.6062, "step": 32483 }, { "epoch": 0.9955866127252666, "grad_norm": 1.4348944618126502, "learning_rate": 5.107840873375125e-10, "loss": 0.622, "step": 32484 }, { "epoch": 0.9956172612480079, "grad_norm": 1.2470752838293953, "learning_rate": 5.037146153269446e-10, "loss": 0.5618, "step": 32485 }, { "epoch": 0.995647909770749, "grad_norm": 1.1504205007355068, "learning_rate": 4.96694404632514e-10, "loss": 0.6483, "step": 32486 }, { "epoch": 0.9956785582934903, "grad_norm": 1.3273408822032498, "learning_rate": 4.897234553230546e-10, "loss": 0.5939, "step": 32487 }, { "epoch": 0.9957092068162314, "grad_norm": 1.4067560976731486, "learning_rate": 4.828017674674002e-10, "loss": 0.6209, "step": 32488 }, { "epoch": 0.9957398553389727, "grad_norm": 1.2495851228784747, "learning_rate": 4.759293411343846e-10, "loss": 0.6167, "step": 32489 }, { "epoch": 0.9957705038617138, "grad_norm": 1.0538695298178757, "learning_rate": 4.691061763906213e-10, "loss": 0.507, "step": 32490 }, { "epoch": 0.9958011523844551, "grad_norm": 1.3543740736620076, "learning_rate": 4.623322733043889e-10, "loss": 0.6409, "step": 32491 }, { "epoch": 0.9958318009071963, "grad_norm": 0.43640871245889773, "learning_rate": 4.556076319417457e-10, "loss": 0.381, "step": 32492 }, { "epoch": 0.9958624494299375, "grad_norm": 1.1012622404048935, "learning_rate": 4.489322523693052e-10, "loss": 0.4403, "step": 32493 }, { "epoch": 0.9958930979526787, "grad_norm": 1.307074516776296, "learning_rate": 4.4230613465257033e-10, "loss": 0.6285, "step": 32494 }, { "epoch": 0.9959237464754199, "grad_norm": 1.2781503395298979, "learning_rate": 4.3572927885704443e-10, "loss": 0.5033, "step": 32495 }, { "epoch": 0.9959543949981611, "grad_norm": 1.2979884421166248, "learning_rate": 4.2920168504767547e-10, "loss": 0.5864, "step": 32496 }, { "epoch": 0.9959850435209023, "grad_norm": 0.4575617977984515, "learning_rate": 4.2272335328830127e-10, "loss": 0.3951, "step": 32497 }, { "epoch": 0.9960156920436435, "grad_norm": 1.3550906320058473, "learning_rate": 4.162942836433148e-10, "loss": 0.6557, "step": 32498 }, { "epoch": 0.9960463405663847, "grad_norm": 1.2177829565766463, "learning_rate": 4.0991447617599876e-10, "loss": 0.6533, "step": 32499 }, { "epoch": 0.9960769890891259, "grad_norm": 1.4360505539142276, "learning_rate": 4.035839309485257e-10, "loss": 0.6549, "step": 32500 }, { "epoch": 0.9961076376118672, "grad_norm": 1.4010879088153618, "learning_rate": 3.973026480236231e-10, "loss": 0.6067, "step": 32501 }, { "epoch": 0.9961382861346083, "grad_norm": 1.1971526113381004, "learning_rate": 3.9107062746346346e-10, "loss": 0.5858, "step": 32502 }, { "epoch": 0.9961689346573496, "grad_norm": 1.2778569427180253, "learning_rate": 3.848878693296643e-10, "loss": 0.5581, "step": 32503 }, { "epoch": 0.9961995831800907, "grad_norm": 1.4785326086691613, "learning_rate": 3.787543736821775e-10, "loss": 0.6066, "step": 32504 }, { "epoch": 0.9962302317028319, "grad_norm": 1.4113947440810959, "learning_rate": 3.726701405826205e-10, "loss": 0.6535, "step": 32505 }, { "epoch": 0.9962608802255731, "grad_norm": 1.290848281186415, "learning_rate": 3.666351700898352e-10, "loss": 0.567, "step": 32506 }, { "epoch": 0.9962915287483143, "grad_norm": 1.5152997222610276, "learning_rate": 3.606494622643286e-10, "loss": 0.5931, "step": 32507 }, { "epoch": 0.9963221772710555, "grad_norm": 1.1987622663507072, "learning_rate": 3.5471301716383246e-10, "loss": 0.6738, "step": 32508 }, { "epoch": 0.9963528257937967, "grad_norm": 1.2564875380091407, "learning_rate": 3.4882583484829867e-10, "loss": 0.6019, "step": 32509 }, { "epoch": 0.996383474316538, "grad_norm": 1.4267156183989245, "learning_rate": 3.4298791537434874e-10, "loss": 0.6304, "step": 32510 }, { "epoch": 0.9964141228392791, "grad_norm": 1.3078389374544541, "learning_rate": 3.3719925880082437e-10, "loss": 0.6193, "step": 32511 }, { "epoch": 0.9964447713620204, "grad_norm": 0.43698655117060387, "learning_rate": 3.314598651837919e-10, "loss": 0.3836, "step": 32512 }, { "epoch": 0.9964754198847615, "grad_norm": 1.2201294115222596, "learning_rate": 3.257697345798727e-10, "loss": 0.5994, "step": 32513 }, { "epoch": 0.9965060684075028, "grad_norm": 1.2908095409107823, "learning_rate": 3.2012886704568814e-10, "loss": 0.5978, "step": 32514 }, { "epoch": 0.9965367169302439, "grad_norm": 1.2615492952319776, "learning_rate": 3.1453726263619424e-10, "loss": 0.5741, "step": 32515 }, { "epoch": 0.9965673654529852, "grad_norm": 1.270626987414223, "learning_rate": 3.089949214069021e-10, "loss": 0.5206, "step": 32516 }, { "epoch": 0.9965980139757263, "grad_norm": 1.280531133151038, "learning_rate": 3.035018434127679e-10, "loss": 0.5539, "step": 32517 }, { "epoch": 0.9966286624984676, "grad_norm": 1.4662186047372987, "learning_rate": 2.9805802870708224e-10, "loss": 0.5743, "step": 32518 }, { "epoch": 0.9966593110212087, "grad_norm": 1.3039987427751976, "learning_rate": 2.926634773436909e-10, "loss": 0.5312, "step": 32519 }, { "epoch": 0.99668995954395, "grad_norm": 0.43831203926298656, "learning_rate": 2.8731818937588473e-10, "loss": 0.3996, "step": 32520 }, { "epoch": 0.9967206080666912, "grad_norm": 1.4858262800697282, "learning_rate": 2.820221648569543e-10, "loss": 0.61, "step": 32521 }, { "epoch": 0.9967512565894324, "grad_norm": 1.2225070503108213, "learning_rate": 2.7677540383796996e-10, "loss": 0.5711, "step": 32522 }, { "epoch": 0.9967819051121736, "grad_norm": 1.2648134686643207, "learning_rate": 2.7157790637111213e-10, "loss": 0.6359, "step": 32523 }, { "epoch": 0.9968125536349148, "grad_norm": 1.2507274571527336, "learning_rate": 2.664296725080062e-10, "loss": 0.5218, "step": 32524 }, { "epoch": 0.996843202157656, "grad_norm": 0.4456385597850242, "learning_rate": 2.613307022986122e-10, "loss": 0.3913, "step": 32525 }, { "epoch": 0.9968738506803972, "grad_norm": 1.422238440356898, "learning_rate": 2.5628099579344535e-10, "loss": 0.65, "step": 32526 }, { "epoch": 0.9969044992031384, "grad_norm": 2.0390824161975085, "learning_rate": 2.5128055304302067e-10, "loss": 0.6306, "step": 32527 }, { "epoch": 0.9969351477258797, "grad_norm": 1.4673454125126544, "learning_rate": 2.4632937409563297e-10, "loss": 0.5878, "step": 32528 }, { "epoch": 0.9969657962486208, "grad_norm": 1.3556669288652128, "learning_rate": 2.4142745900013196e-10, "loss": 0.6224, "step": 32529 }, { "epoch": 0.9969964447713621, "grad_norm": 1.3747760960675388, "learning_rate": 2.365748078053676e-10, "loss": 0.6021, "step": 32530 }, { "epoch": 0.9970270932941032, "grad_norm": 1.4413036607189935, "learning_rate": 2.3177142055907931e-10, "loss": 0.6183, "step": 32531 }, { "epoch": 0.9970577418168445, "grad_norm": 1.351448228155245, "learning_rate": 2.2701729730789657e-10, "loss": 0.5571, "step": 32532 }, { "epoch": 0.9970883903395856, "grad_norm": 1.5710391400636332, "learning_rate": 2.2231243809955895e-10, "loss": 0.574, "step": 32533 }, { "epoch": 0.9971190388623269, "grad_norm": 1.4292258007581002, "learning_rate": 2.1765684298014068e-10, "loss": 0.6706, "step": 32534 }, { "epoch": 0.997149687385068, "grad_norm": 1.4191032454288144, "learning_rate": 2.130505119951609e-10, "loss": 0.5816, "step": 32535 }, { "epoch": 0.9971803359078092, "grad_norm": 0.4424287711269813, "learning_rate": 2.084934451901388e-10, "loss": 0.3761, "step": 32536 }, { "epoch": 0.9972109844305505, "grad_norm": 1.2791135746921094, "learning_rate": 2.039856426100384e-10, "loss": 0.653, "step": 32537 }, { "epoch": 0.9972416329532916, "grad_norm": 1.2791000395600816, "learning_rate": 1.995271042998237e-10, "loss": 0.5834, "step": 32538 }, { "epoch": 0.9972722814760329, "grad_norm": 1.1780500529756943, "learning_rate": 1.951178303022383e-10, "loss": 0.5783, "step": 32539 }, { "epoch": 0.997302929998774, "grad_norm": 1.3511958542055358, "learning_rate": 1.9075782066169114e-10, "loss": 0.629, "step": 32540 }, { "epoch": 0.9973335785215153, "grad_norm": 0.4411028652337443, "learning_rate": 1.8644707542092578e-10, "loss": 0.3841, "step": 32541 }, { "epoch": 0.9973642270442564, "grad_norm": 0.4674503824161698, "learning_rate": 1.8218559462268582e-10, "loss": 0.3608, "step": 32542 }, { "epoch": 0.9973948755669977, "grad_norm": 1.3886016378435113, "learning_rate": 1.779733783080495e-10, "loss": 0.6904, "step": 32543 }, { "epoch": 0.9974255240897388, "grad_norm": 1.3003795627967767, "learning_rate": 1.7381042651920531e-10, "loss": 0.5255, "step": 32544 }, { "epoch": 0.9974561726124801, "grad_norm": 1.255688612074314, "learning_rate": 1.696967392972315e-10, "loss": 0.4747, "step": 32545 }, { "epoch": 0.9974868211352212, "grad_norm": 1.2571594194282, "learning_rate": 1.6563231668265124e-10, "loss": 0.5154, "step": 32546 }, { "epoch": 0.9975174696579625, "grad_norm": 1.4184345640782665, "learning_rate": 1.616171587154325e-10, "loss": 0.5587, "step": 32547 }, { "epoch": 0.9975481181807037, "grad_norm": 1.5761281775462828, "learning_rate": 1.576512654344331e-10, "loss": 0.5333, "step": 32548 }, { "epoch": 0.9975787667034449, "grad_norm": 1.3780514496571488, "learning_rate": 1.5373463687962108e-10, "loss": 0.5892, "step": 32549 }, { "epoch": 0.9976094152261861, "grad_norm": 0.42009086648694643, "learning_rate": 1.4986727308985427e-10, "loss": 0.3974, "step": 32550 }, { "epoch": 0.9976400637489273, "grad_norm": 1.2263790840383497, "learning_rate": 1.4604917410232511e-10, "loss": 0.4887, "step": 32551 }, { "epoch": 0.9976707122716685, "grad_norm": 1.362478741777102, "learning_rate": 1.4228033995478118e-10, "loss": 0.5539, "step": 32552 }, { "epoch": 0.9977013607944097, "grad_norm": 1.143384353012688, "learning_rate": 1.385607706849701e-10, "loss": 0.5016, "step": 32553 }, { "epoch": 0.9977320093171509, "grad_norm": 1.3801974318364105, "learning_rate": 1.3489046632897406e-10, "loss": 0.6467, "step": 32554 }, { "epoch": 0.9977626578398922, "grad_norm": 1.3230972432578896, "learning_rate": 1.3126942692343046e-10, "loss": 0.6182, "step": 32555 }, { "epoch": 0.9977933063626333, "grad_norm": 1.2881224614427669, "learning_rate": 1.2769765250331135e-10, "loss": 0.6738, "step": 32556 }, { "epoch": 0.9978239548853746, "grad_norm": 1.3176653868647021, "learning_rate": 1.241751431046989e-10, "loss": 0.6859, "step": 32557 }, { "epoch": 0.9978546034081157, "grad_norm": 1.4598209589306583, "learning_rate": 1.20701898761455e-10, "loss": 0.6305, "step": 32558 }, { "epoch": 0.997885251930857, "grad_norm": 1.2347883858521833, "learning_rate": 1.172779195085516e-10, "loss": 0.5159, "step": 32559 }, { "epoch": 0.9979159004535981, "grad_norm": 1.178068768363857, "learning_rate": 1.1390320537929545e-10, "loss": 0.5826, "step": 32560 }, { "epoch": 0.9979465489763394, "grad_norm": 0.454739922280962, "learning_rate": 1.105777564069932e-10, "loss": 0.3914, "step": 32561 }, { "epoch": 0.9979771974990805, "grad_norm": 1.3000728781958042, "learning_rate": 1.0730157262495156e-10, "loss": 0.558, "step": 32562 }, { "epoch": 0.9980078460218218, "grad_norm": 1.3551311134980415, "learning_rate": 1.0407465406425677e-10, "loss": 0.5926, "step": 32563 }, { "epoch": 0.998038494544563, "grad_norm": 1.4152287800894812, "learning_rate": 1.0089700075766041e-10, "loss": 0.6414, "step": 32564 }, { "epoch": 0.9980691430673042, "grad_norm": 0.4532830379046418, "learning_rate": 9.776861273624871e-11, "loss": 0.4133, "step": 32565 }, { "epoch": 0.9980997915900454, "grad_norm": 1.2582362120858337, "learning_rate": 9.468949003055283e-11, "loss": 0.5535, "step": 32566 }, { "epoch": 0.9981304401127865, "grad_norm": 1.3241604918807577, "learning_rate": 9.165963267110389e-11, "loss": 0.5588, "step": 32567 }, { "epoch": 0.9981610886355278, "grad_norm": 1.7720045659988017, "learning_rate": 8.867904068843303e-11, "loss": 0.5721, "step": 32568 }, { "epoch": 0.9981917371582689, "grad_norm": 1.4416514231729207, "learning_rate": 8.574771411085093e-11, "loss": 0.6574, "step": 32569 }, { "epoch": 0.9982223856810102, "grad_norm": 1.16073160467542, "learning_rate": 8.286565296777848e-11, "loss": 0.579, "step": 32570 }, { "epoch": 0.9982530342037513, "grad_norm": 1.119564606324833, "learning_rate": 8.00328572869713e-11, "loss": 0.461, "step": 32571 }, { "epoch": 0.9982836827264926, "grad_norm": 1.1956680322207343, "learning_rate": 7.724932709785027e-11, "loss": 0.53, "step": 32572 }, { "epoch": 0.9983143312492337, "grad_norm": 0.46419751918396485, "learning_rate": 7.451506242595053e-11, "loss": 0.3964, "step": 32573 }, { "epoch": 0.998344979771975, "grad_norm": 0.43869357380701013, "learning_rate": 7.183006329958276e-11, "loss": 0.3918, "step": 32574 }, { "epoch": 0.9983756282947162, "grad_norm": 1.475012866599754, "learning_rate": 6.919432974483719e-11, "loss": 0.6093, "step": 32575 }, { "epoch": 0.9984062768174574, "grad_norm": 0.42292647489780616, "learning_rate": 6.660786178780409e-11, "loss": 0.37, "step": 32576 }, { "epoch": 0.9984369253401986, "grad_norm": 0.44570086304132306, "learning_rate": 6.407065945346347e-11, "loss": 0.3723, "step": 32577 }, { "epoch": 0.9984675738629398, "grad_norm": 1.392302969614949, "learning_rate": 6.158272276679533e-11, "loss": 0.5662, "step": 32578 }, { "epoch": 0.998498222385681, "grad_norm": 1.293253399831353, "learning_rate": 5.914405175333482e-11, "loss": 0.5618, "step": 32579 }, { "epoch": 0.9985288709084222, "grad_norm": 0.43938022934617565, "learning_rate": 5.67546464358415e-11, "loss": 0.3877, "step": 32580 }, { "epoch": 0.9985595194311634, "grad_norm": 1.3276806788898645, "learning_rate": 5.441450683874028e-11, "loss": 0.5618, "step": 32581 }, { "epoch": 0.9985901679539047, "grad_norm": 1.3191668296831793, "learning_rate": 5.212363298479073e-11, "loss": 0.629, "step": 32582 }, { "epoch": 0.9986208164766458, "grad_norm": 1.2724622933344147, "learning_rate": 4.9882024896752427e-11, "loss": 0.6323, "step": 32583 }, { "epoch": 0.9986514649993871, "grad_norm": 1.3534517732409626, "learning_rate": 4.768968259627471e-11, "loss": 0.5078, "step": 32584 }, { "epoch": 0.9986821135221282, "grad_norm": 0.43991667569394954, "learning_rate": 4.554660610500694e-11, "loss": 0.3846, "step": 32585 }, { "epoch": 0.9987127620448695, "grad_norm": 1.3335140951093298, "learning_rate": 4.3452795444598464e-11, "loss": 0.5635, "step": 32586 }, { "epoch": 0.9987434105676106, "grad_norm": 0.45073547298129285, "learning_rate": 4.1408250635033284e-11, "loss": 0.3911, "step": 32587 }, { "epoch": 0.9987740590903519, "grad_norm": 0.4404862232701743, "learning_rate": 3.9412971696850545e-11, "loss": 0.3746, "step": 32588 }, { "epoch": 0.998804707613093, "grad_norm": 1.3373718487521624, "learning_rate": 3.7466958649479136e-11, "loss": 0.6605, "step": 32589 }, { "epoch": 0.9988353561358343, "grad_norm": 1.358854351170466, "learning_rate": 3.5570211512903076e-11, "loss": 0.6365, "step": 32590 }, { "epoch": 0.9988660046585754, "grad_norm": 0.45775948045711345, "learning_rate": 3.372273030433082e-11, "loss": 0.3908, "step": 32591 }, { "epoch": 0.9988966531813167, "grad_norm": 1.4757714864581541, "learning_rate": 3.1924515043191275e-11, "loss": 0.6523, "step": 32592 }, { "epoch": 0.9989273017040579, "grad_norm": 1.3555716578404777, "learning_rate": 3.0175565746692894e-11, "loss": 0.5597, "step": 32593 }, { "epoch": 0.9989579502267991, "grad_norm": 1.3635726126022283, "learning_rate": 2.8475882432599245e-11, "loss": 0.5544, "step": 32594 }, { "epoch": 0.9989885987495403, "grad_norm": 1.2768750032327945, "learning_rate": 2.6825465117008564e-11, "loss": 0.5623, "step": 32595 }, { "epoch": 0.9990192472722815, "grad_norm": 1.344913230502573, "learning_rate": 2.5224313816019086e-11, "loss": 0.6016, "step": 32596 }, { "epoch": 0.9990498957950227, "grad_norm": 1.3075928884616166, "learning_rate": 2.3672428546284155e-11, "loss": 0.6054, "step": 32597 }, { "epoch": 0.9990805443177638, "grad_norm": 1.3864040924687357, "learning_rate": 2.216980932223667e-11, "loss": 0.6369, "step": 32598 }, { "epoch": 0.9991111928405051, "grad_norm": 1.4406052503557074, "learning_rate": 2.071645615886464e-11, "loss": 0.534, "step": 32599 }, { "epoch": 0.9991418413632462, "grad_norm": 1.3541021193644311, "learning_rate": 1.931236907115608e-11, "loss": 0.5564, "step": 32600 }, { "epoch": 0.9991724898859875, "grad_norm": 1.2540968014863771, "learning_rate": 1.7957548072433662e-11, "loss": 0.5618, "step": 32601 }, { "epoch": 0.9992031384087287, "grad_norm": 1.3422521929364795, "learning_rate": 1.6651993176020065e-11, "loss": 0.5367, "step": 32602 }, { "epoch": 0.9992337869314699, "grad_norm": 1.2248061158072352, "learning_rate": 1.5395704394682852e-11, "loss": 0.6131, "step": 32603 }, { "epoch": 0.9992644354542111, "grad_norm": 1.5532857862207679, "learning_rate": 1.418868174063448e-11, "loss": 0.5966, "step": 32604 }, { "epoch": 0.9992950839769523, "grad_norm": 1.2691096700141633, "learning_rate": 1.303092522664251e-11, "loss": 0.4803, "step": 32605 }, { "epoch": 0.9993257324996935, "grad_norm": 1.4120624864560825, "learning_rate": 1.1922434863254063e-11, "loss": 0.6827, "step": 32606 }, { "epoch": 0.9993563810224347, "grad_norm": 1.318643370323099, "learning_rate": 1.0863210662126478e-11, "loss": 0.6019, "step": 32607 }, { "epoch": 0.9993870295451759, "grad_norm": 3.184921036360604, "learning_rate": 9.853252632696652e-12, "loss": 0.556, "step": 32608 }, { "epoch": 0.9994176780679171, "grad_norm": 0.45737527777669285, "learning_rate": 8.892560786066817e-12, "loss": 0.408, "step": 32609 }, { "epoch": 0.9994483265906583, "grad_norm": 1.5162469544426955, "learning_rate": 7.981135130563644e-12, "loss": 0.6594, "step": 32610 }, { "epoch": 0.9994789751133996, "grad_norm": 1.4373977107498144, "learning_rate": 7.118975676179141e-12, "loss": 0.5653, "step": 32611 }, { "epoch": 0.9995096236361407, "grad_norm": 1.2951128898628197, "learning_rate": 6.306082430684868e-12, "loss": 0.6552, "step": 32612 }, { "epoch": 0.999540272158882, "grad_norm": 1.1927593686476083, "learning_rate": 5.542455402407499e-12, "loss": 0.5355, "step": 32613 }, { "epoch": 0.9995709206816231, "grad_norm": 0.43349399511828474, "learning_rate": 4.8280945991185935e-12, "loss": 0.3891, "step": 32614 }, { "epoch": 0.9996015692043644, "grad_norm": 0.46089762486600305, "learning_rate": 4.16300002692438e-12, "loss": 0.3865, "step": 32615 }, { "epoch": 0.9996322177271055, "grad_norm": 1.5699227301234246, "learning_rate": 3.5471716935964183e-12, "loss": 0.61, "step": 32616 }, { "epoch": 0.9996628662498468, "grad_norm": 1.3191790692593781, "learning_rate": 2.980609604130713e-12, "loss": 0.6093, "step": 32617 }, { "epoch": 0.9996935147725879, "grad_norm": 1.313076469232179, "learning_rate": 2.463313765188602e-12, "loss": 0.512, "step": 32618 }, { "epoch": 0.9997241632953292, "grad_norm": 1.3175097388226933, "learning_rate": 1.9952841806558656e-12, "loss": 0.6472, "step": 32619 }, { "epoch": 0.9997548118180704, "grad_norm": 1.2663707443467287, "learning_rate": 1.5765208560836187e-12, "loss": 0.5912, "step": 32620 }, { "epoch": 0.9997854603408116, "grad_norm": 1.319135730340392, "learning_rate": 1.2070237953576425e-12, "loss": 0.6313, "step": 32621 }, { "epoch": 0.9998161088635528, "grad_norm": 1.5691694563624534, "learning_rate": 8.867930018086057e-13, "loss": 0.6111, "step": 32622 }, { "epoch": 0.999846757386294, "grad_norm": 1.2642186590675881, "learning_rate": 6.15828479322289e-13, "loss": 0.4883, "step": 32623 }, { "epoch": 0.9998774059090352, "grad_norm": 1.231565348726342, "learning_rate": 3.9413022956402703e-13, "loss": 0.5368, "step": 32624 }, { "epoch": 0.9999080544317764, "grad_norm": 1.1379027737435499, "learning_rate": 2.2169825530937716e-13, "loss": 0.506, "step": 32625 }, { "epoch": 0.9999387029545176, "grad_norm": 1.2774739276054312, "learning_rate": 9.853255822367403e-14, "loss": 0.5938, "step": 32626 }, { "epoch": 0.9999693514772588, "grad_norm": 0.5924810041858012, "learning_rate": 2.4633139417140626e-14, "loss": 0.4232, "step": 32627 }, { "epoch": 1.0, "grad_norm": 1.4441548083134657, "learning_rate": 0.0, "loss": 0.5701, "step": 32628 }, { "epoch": 1.0, "step": 32628, "total_flos": 1.4557311280955392e+16, "train_loss": 0.6296673105878691, "train_runtime": 358526.1827, "train_samples_per_second": 11.648, "train_steps_per_second": 0.091 } ], "logging_steps": 1.0, "max_steps": 32628, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4557311280955392e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }